From nobody Fri Sep 20 18:50:15 2024 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id D6E1EC77B7A for ; Tue, 16 May 2023 12:50:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233075AbjEPMuM (ORCPT ); Tue, 16 May 2023 08:50:12 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59356 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232784AbjEPMuI (ORCPT ); Tue, 16 May 2023 08:50:08 -0400 Received: from szxga08-in.huawei.com (szxga08-in.huawei.com [45.249.212.255]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 98A3D5FD3; Tue, 16 May 2023 05:50:04 -0700 (PDT) Received: from dggpemm500005.china.huawei.com (unknown [172.30.72.56]) by szxga08-in.huawei.com (SkyGuard) with ESMTP id 4QLGDk6d78z18LWL; Tue, 16 May 2023 20:45:42 +0800 (CST) Received: from localhost.localdomain (10.69.192.56) by dggpemm500005.china.huawei.com (7.185.36.74) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2507.23; Tue, 16 May 2023 20:50:01 +0800 From: Yunsheng Lin To: , , CC: , , Yunsheng Lin , Lorenzo Bianconi , Alexander Duyck , Jesper Dangaard Brouer , Ilias Apalodimas , Eric Dumazet Subject: [RFC 1/3] page_pool: unify frag page and non-frag page handling Date: Tue, 16 May 2023 20:47:59 +0800 Message-ID: <20230516124801.2465-2-linyunsheng@huawei.com> X-Mailer: git-send-email 2.33.0 In-Reply-To: <20230516124801.2465-1-linyunsheng@huawei.com> References: <20230516124801.2465-1-linyunsheng@huawei.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Originating-IP: [10.69.192.56] X-ClientProxiedBy: dggems701-chm.china.huawei.com (10.3.19.178) To dggpemm500005.china.huawei.com (7.185.36.74) X-CFilter-Loop: Reflected Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Currently page_pool_dev_alloc_pages() can not be called when PP_FLAG_PAGE_FRAG is set, because it does not use the frag reference counting. As we are already doing a optimization by not updating page->pp_frag_count in page_pool_defrag_page() for the last frag user, and non-frag page only have one user, so we utilize that to unify frag page and non-frag page handling, so that page_pool_dev_alloc_pages() can also be called with PP_FLAG_PAGE_FRAG set. Signed-off-by: Yunsheng Lin CC: Lorenzo Bianconi CC: Alexander Duyck --- include/net/page_pool.h | 24 ++++++++++++++++++------ net/core/page_pool.c | 3 ++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index c8ec2f34722b..14ac46297ae4 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -50,6 +50,9 @@ PP_FLAG_DMA_SYNC_DEV |\ PP_FLAG_PAGE_FRAG) =20 +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ + (sizeof(dma_addr_t) > sizeof(unsigned long)) + /* * Fast allocation side cache array/stack * @@ -295,7 +298,8 @@ void page_pool_put_defragged_page(struct page_pool *poo= l, struct page *page, */ static inline void page_pool_fragment_page(struct page *page, long nr) { - atomic_long_set(&page->pp_frag_count, nr); + if (!PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + atomic_long_set(&page->pp_frag_count, nr); } =20 static inline long page_pool_defrag_page(struct page *page, long nr) @@ -316,14 +320,25 @@ static inline long page_pool_defrag_page(struct page = *page, long nr) =20 ret =3D atomic_long_sub_return(nr, &page->pp_frag_count); WARN_ON(ret < 0); + + /* Reset it to 1 to allow only one user in case the page is + * recycled and allocated as non-frag page if it is the last + * user, this should be the rare case as it only happen when + * two users call page_pool_defrag_page() currently. + */ + if (!ret) + atomic_long_set(&page->pp_frag_count, 1); + return ret; } =20 static inline bool page_pool_is_last_frag(struct page_pool *pool, struct page *page) { - /* If fragments aren't enabled or count is 0 we were the last user */ - return !(pool->p.flags & PP_FLAG_PAGE_FRAG) || + /* When dma_addr_upper is overlapped with pp_frag_count + * or we were the last page frag user. + */ + return PAGE_POOL_DMA_USE_PP_FRAG_COUNT || (page_pool_defrag_page(page, 1) =3D=3D 0); } =20 @@ -357,9 +372,6 @@ static inline void page_pool_recycle_direct(struct page= _pool *pool, page_pool_put_full_page(pool, page, true); } =20 -#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ - (sizeof(dma_addr_t) > sizeof(unsigned long)) - static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { dma_addr_t ret =3D page->dma_addr; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index e212e9d7edcb..5d93c5dc0549 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -334,6 +334,7 @@ static void page_pool_set_pp_info(struct page_pool *poo= l, { page->pp =3D pool; page->pp_magic |=3D PP_SIGNATURE; + page_pool_fragment_page(page, 1); if (pool->p.init_callback) pool->p.init_callback(page, pool->p.init_arg); } @@ -698,7 +699,7 @@ struct page *page_pool_alloc_frag(struct page_pool *poo= l, unsigned int max_size =3D PAGE_SIZE << pool->p.order; struct page *page =3D pool->frag_page; =20 - if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) || + if (WARN_ON(PAGE_POOL_DMA_USE_PP_FRAG_COUNT || size > max_size)) return NULL; =20 --=20 2.33.0 From nobody Fri Sep 20 18:50:15 2024 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 39F16C77B75 for ; Tue, 16 May 2023 12:50:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233255AbjEPMuV (ORCPT ); Tue, 16 May 2023 08:50:21 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59770 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232999AbjEPMuP (ORCPT ); Tue, 16 May 2023 08:50:15 -0400 Received: from szxga01-in.huawei.com (szxga01-in.huawei.com [45.249.212.187]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D6B1FA3; Tue, 16 May 2023 05:50:10 -0700 (PDT) Received: from dggpemm500005.china.huawei.com (unknown [172.30.72.57]) by szxga01-in.huawei.com (SkyGuard) with ESMTP id 4QLGHX326zzsR9F; Tue, 16 May 2023 20:48:08 +0800 (CST) Received: from localhost.localdomain (10.69.192.56) by dggpemm500005.china.huawei.com (7.185.36.74) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2507.23; Tue, 16 May 2023 20:50:07 +0800 From: Yunsheng Lin To: , , CC: , , Yunsheng Lin , Lorenzo Bianconi , Alexander Duyck , Jesper Dangaard Brouer , Ilias Apalodimas , Eric Dumazet Subject: [RFC 2/3] page_pool: support non-frag page for page_pool_alloc_frag() Date: Tue, 16 May 2023 20:48:00 +0800 Message-ID: <20230516124801.2465-3-linyunsheng@huawei.com> X-Mailer: git-send-email 2.33.0 In-Reply-To: <20230516124801.2465-1-linyunsheng@huawei.com> References: <20230516124801.2465-1-linyunsheng@huawei.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Originating-IP: [10.69.192.56] X-ClientProxiedBy: dggems701-chm.china.huawei.com (10.3.19.178) To dggpemm500005.china.huawei.com (7.185.36.74) X-CFilter-Loop: Reflected Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" There is performance penalty with using page frag support when user requests a larger frag size and a page only supports one frag user, see [1]. It seems like user may request different frag size depending on the mtu and packet size, provide an option to allocate non-frag page when user has requested a frag size larger than a specific size, so that user has a unified interface for the memory allocation with least memory utilization and performance penalty. 1. https://lore.kernel.org/netdev/ZEU+vospFdm08IeE@localhost.localdomain/ Signed-off-by: Yunsheng Lin CC: Lorenzo Bianconi CC: Alexander Duyck --- include/net/page_pool.h | 9 +++++++++ net/core/page_pool.c | 10 ++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 14ac46297ae4..d1c57c0c8f49 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -163,6 +163,7 @@ struct page_pool { unsigned int frag_offset; struct page *frag_page; long frag_users; + unsigned int max_frag_size; =20 #ifdef CONFIG_PAGE_POOL_STATS /* these stats are incremented while in softirq context */ @@ -213,6 +214,14 @@ struct page_pool { u64 destroy_cnt; }; =20 +/* Called after page_pool_create() */ +static inline void page_pool_set_max_frag_size(struct page_pool *pool, + unsigned int max_frag_size) +{ + pool->max_frag_size =3D min_t(unsigned int, max_frag_size, + PAGE_SIZE << pool->p.order); +} + struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); =20 static inline struct page *page_pool_dev_alloc_pages(struct page_pool *poo= l) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5d93c5dc0549..aab6147f28af 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -198,6 +198,8 @@ static int page_pool_init(struct page_pool *pool, if (pool->p.flags & PP_FLAG_DMA_MAP) get_device(pool->p.dev); =20 + page_pool_set_max_frag_size(pool, PAGE_SIZE << pool->p.order); + return 0; } =20 @@ -699,10 +701,14 @@ struct page *page_pool_alloc_frag(struct page_pool *p= ool, unsigned int max_size =3D PAGE_SIZE << pool->p.order; struct page *page =3D pool->frag_page; =20 - if (WARN_ON(PAGE_POOL_DMA_USE_PP_FRAG_COUNT || - size > max_size)) + if (WARN_ON(PAGE_POOL_DMA_USE_PP_FRAG_COUNT)) return NULL; =20 + if (unlikely(size > pool->max_frag_size)) { + *offset =3D 0; + return page_pool_alloc_pages(pool, gfp); + } + size =3D ALIGN(size, dma_get_cache_alignment()); *offset =3D pool->frag_offset; =20 --=20 2.33.0 From nobody Fri Sep 20 18:50:15 2024 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C17BFC77B7A for ; Tue, 16 May 2023 12:50:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232697AbjEPMuY (ORCPT ); Tue, 16 May 2023 08:50:24 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59826 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233147AbjEPMuQ (ORCPT ); Tue, 16 May 2023 08:50:16 -0400 Received: from szxga01-in.huawei.com (szxga01-in.huawei.com [45.249.212.187]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 33D0B18E; Tue, 16 May 2023 05:50:12 -0700 (PDT) Received: from dggpemm500005.china.huawei.com (unknown [172.30.72.55]) by szxga01-in.huawei.com (SkyGuard) with ESMTP id 4QLGDt3xBwzqSHC; Tue, 16 May 2023 20:45:50 +0800 (CST) Received: from localhost.localdomain (10.69.192.56) by dggpemm500005.china.huawei.com (7.185.36.74) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2507.23; Tue, 16 May 2023 20:50:09 +0800 From: Yunsheng Lin To: , , CC: , , Yunsheng Lin , Lorenzo Bianconi , Alexander Duyck , Yisen Zhuang , Salil Mehta , Eric Dumazet , Felix Fietkau , Ryder Lee , Shayne Chen , Sean Wang , Kalle Valo , Matthias Brugger , AngeloGioacchino Del Regno , Jesper Dangaard Brouer , Ilias Apalodimas , , , Subject: [RFC 3/3] page_pool: introduce 'struct page_pool_frag' Date: Tue, 16 May 2023 20:48:01 +0800 Message-ID: <20230516124801.2465-4-linyunsheng@huawei.com> X-Mailer: git-send-email 2.33.0 In-Reply-To: <20230516124801.2465-1-linyunsheng@huawei.com> References: <20230516124801.2465-1-linyunsheng@huawei.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Originating-IP: [10.69.192.56] X-ClientProxiedBy: dggems701-chm.china.huawei.com (10.3.19.178) To dggpemm500005.china.huawei.com (7.185.36.74) X-CFilter-Loop: Reflected Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" As page_pool_alloc_frag() can return both frag and non-frag page now, the true size may be different for them, so introduce 'struct page_pool_frag' to return the frag info instead of adding more function parameters and adjust the interface accordingly. Signed-off-by: Yunsheng Lin CC: Lorenzo Bianconi CC: Alexander Duyck --- .../net/ethernet/hisilicon/hns3/hns3_enet.c | 16 ++++---- drivers/net/wireless/mediatek/mt76/mt76.h | 9 +++-- include/net/page_pool.h | 18 ++++++--- net/core/page_pool.c | 38 +++++++++++++------ 4 files changed, 52 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/= ethernet/hisilicon/hns3/hns3_enet.c index b676496ec6d7..b7290c3bb26b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3355,15 +3355,17 @@ static int hns3_alloc_buffer(struct hns3_enet_ring = *ring, struct page *p; =20 if (ring->page_pool) { - p =3D page_pool_dev_alloc_frag(ring->page_pool, - &cb->page_offset, - hns3_buf_size(ring)); - if (unlikely(!p)) + struct page_pool_frag *pp_frag; + + pp_frag =3D page_pool_dev_alloc_frag(ring->page_pool, + hns3_buf_size(ring)); + if (unlikely(!pp_frag)) return -ENOMEM; =20 - cb->priv =3D p; - cb->buf =3D page_address(p); - cb->dma =3D page_pool_get_dma_addr(p); + cb->priv =3D pp_frag->page; + cb->page_offset =3D pp_frag->offset; + cb->buf =3D page_address(pp_frag->page); + cb->dma =3D page_pool_get_dma_addr(pp_frag->page); cb->type =3D DESC_TYPE_PP_FRAG; cb->reuse_flag =3D 0; return 0; diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wirele= ss/mediatek/mt76/mt76.h index 6b07b8fafec2..5d42081f7ba8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1453,13 +1453,14 @@ static inline void mt76_put_page_pool_buf(void *buf= , bool allow_direct) static inline void * mt76_get_page_pool_buf(struct mt76_queue *q, u32 *offset, u32 size) { - struct page *page; + struct page_pool_frag *pp_frag; =20 - page =3D page_pool_dev_alloc_frag(q->page_pool, offset, size); - if (!page) + pp_frag =3D page_pool_dev_alloc_frag(q->page_pool, size); + if (!pp_frag) return NULL; =20 - return page_address(page) + *offset; + *offset =3D pp_frag->offset; + return page_address(pp_frag->page) + *offset; } =20 static inline void mt76_set_tx_blocked(struct mt76_dev *dev, bool blocked) diff --git a/include/net/page_pool.h b/include/net/page_pool.h index d1c57c0c8f49..86e79ea87732 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -151,6 +151,12 @@ static inline u64 *page_pool_ethtool_stats_get(u64 *da= ta, void *stats) =20 #endif =20 +struct page_pool_frag { + struct page *page; + unsigned int offset; + unsigned int truesize; +}; + struct page_pool { struct page_pool_params p; =20 @@ -231,16 +237,16 @@ static inline struct page *page_pool_dev_alloc_pages(= struct page_pool *pool) return page_pool_alloc_pages(pool, gfp); } =20 -struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *of= fset, - unsigned int size, gfp_t gfp); +struct page_pool_frag *page_pool_alloc_frag(struct page_pool *pool, + unsigned int size, gfp_t gfp); =20 -static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, - unsigned int *offset, - unsigned int size) +static inline +struct page_pool_frag *page_pool_dev_alloc_frag(struct page_pool *pool, + unsigned int size) { gfp_t gfp =3D (GFP_ATOMIC | __GFP_NOWARN); =20 - return page_pool_alloc_frag(pool, offset, size, gfp); + return page_pool_alloc_frag(pool, size, gfp); } =20 /* get the stored dma direction. A driver might decide to treat this local= ly and diff --git a/net/core/page_pool.c b/net/core/page_pool.c index aab6147f28af..83a2a85d21bf 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -28,6 +28,8 @@ =20 #define BIAS_MAX LONG_MAX =20 +static DEFINE_PER_CPU(struct page_pool_frag, pp_frag); + #ifdef CONFIG_PAGE_POOL_STATS /* alloc_stat_inc is intended to be used in softirq context */ #define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++) @@ -694,25 +696,31 @@ static void page_pool_free_frag(struct page_pool *poo= l) page_pool_return_page(pool, page); } =20 -struct page *page_pool_alloc_frag(struct page_pool *pool, - unsigned int *offset, - unsigned int size, gfp_t gfp) +struct page_pool_frag *page_pool_alloc_frag(struct page_pool *pool, + unsigned int size, + gfp_t gfp) { + struct page_pool_frag *frag =3D this_cpu_ptr(&pp_frag); unsigned int max_size =3D PAGE_SIZE << pool->p.order; - struct page *page =3D pool->frag_page; + struct page *page; =20 if (WARN_ON(PAGE_POOL_DMA_USE_PP_FRAG_COUNT)) return NULL; =20 if (unlikely(size > pool->max_frag_size)) { - *offset =3D 0; - return page_pool_alloc_pages(pool, gfp); + frag->page =3D page_pool_alloc_pages(pool, gfp); + if (unlikely(!frag->page)) + return NULL; + + frag->offset =3D 0; + frag->truesize =3D max_size; + return frag; } =20 + page =3D pool->frag_page; size =3D ALIGN(size, dma_get_cache_alignment()); - *offset =3D pool->frag_offset; =20 - if (page && *offset + size > max_size) { + if (page && pool->frag_offset + size > max_size) { page =3D page_pool_drain_frag(pool, page); if (page) { alloc_stat_inc(pool, fast); @@ -731,16 +739,22 @@ struct page *page_pool_alloc_frag(struct page_pool *p= ool, =20 frag_reset: pool->frag_users =3D 1; - *offset =3D 0; pool->frag_offset =3D size; page_pool_fragment_page(page, BIAS_MAX); - return page; + frag->page =3D page; + frag->offset =3D 0; + frag->truesize =3D size; + return frag; } =20 + frag->page =3D page; + frag->truesize =3D size; + frag->offset =3D pool->frag_offset; + pool->frag_users++; - pool->frag_offset =3D *offset + size; + pool->frag_offset +=3D size; alloc_stat_inc(pool, fast); - return page; + return frag; } EXPORT_SYMBOL(page_pool_alloc_frag); =20 --=20 2.33.0