include/net/page_pool/types.h | 5 ----- net/core/page_pool.c | 10 +++++++--- net/core/skbuff.c | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-)
Now that direct recycling is performed basing on pool->cpuid when set,
memory leaks are possible:
1. A pool is destroyed.
2. Alloc cache is emptied (it's done only once).
3. pool->cpuid is still set.
4. napi_pp_put_page() does direct recycling basing on pool->cpuid.
5. Now alloc cache is not empty, but it won't ever be freed.
In order to avoid that, rewrite pool->cpuid to -1 when unlinking NAPI to
make sure no direct recycling will be possible after emptying the cache.
This involves a bit of overhead as pool->cpuid now must be accessed
via READ_ONCE() to avoid partial reads.
Rename page_pool_unlink_napi() -> page_pool_disable_direct_recycling()
to reflect what it actually does and unexport it.
Fixes: 2b0cfa6e4956 ("net: add generic percpu page_pool allocator")
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
---
include/net/page_pool/types.h | 5 -----
net/core/page_pool.c | 10 +++++++---
net/core/skbuff.c | 2 +-
3 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 3828396ae60c..3590fbe6e3f1 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -210,17 +210,12 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
struct xdp_mem_info;
#ifdef CONFIG_PAGE_POOL
-void page_pool_unlink_napi(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
struct xdp_mem_info *mem);
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count);
#else
-static inline void page_pool_unlink_napi(struct page_pool *pool)
-{
-}
-
static inline void page_pool_destroy(struct page_pool *pool)
{
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 89c835fcf094..e8b9399d8e32 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -949,8 +949,13 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
pool->xdp_mem_id = mem->id;
}
-void page_pool_unlink_napi(struct page_pool *pool)
+static void page_pool_disable_direct_recycling(struct page_pool *pool)
{
+ /* Disable direct recycling based on pool->cpuid.
+ * Paired with READ_ONCE() in napi_pp_put_page().
+ */
+ WRITE_ONCE(pool->cpuid, -1);
+
if (!pool->p.napi)
return;
@@ -962,7 +967,6 @@ void page_pool_unlink_napi(struct page_pool *pool)
WRITE_ONCE(pool->p.napi, NULL);
}
-EXPORT_SYMBOL(page_pool_unlink_napi);
void page_pool_destroy(struct page_pool *pool)
{
@@ -972,7 +976,7 @@ void page_pool_destroy(struct page_pool *pool)
if (!page_pool_put(pool))
return;
- page_pool_unlink_napi(pool);
+ page_pool_disable_direct_recycling(pool);
page_pool_free_frag(pool);
if (!page_pool_release(pool))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0d9a489e6ae1..b41856585c24 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1018,7 +1018,7 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
unsigned int cpuid = smp_processor_id();
allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid;
- allow_direct |= (pp->cpuid == cpuid);
+ allow_direct |= READ_ONCE(pp->cpuid) == cpuid;
}
/* Driver set this to memory recycling info. Reset it on recycle.
--
2.43.0
From: Alexander Lobakin <aleksander.lobakin@intel.com> Date: Thu, 15 Feb 2024 12:39:05 +0100 > Now that direct recycling is performed basing on pool->cpuid when set, > memory leaks are possible: > > 1. A pool is destroyed. > 2. Alloc cache is emptied (it's done only once). > 3. pool->cpuid is still set. > 4. napi_pp_put_page() does direct recycling basing on pool->cpuid. > 5. Now alloc cache is not empty, but it won't ever be freed. > > In order to avoid that, rewrite pool->cpuid to -1 when unlinking NAPI to > make sure no direct recycling will be possible after emptying the cache. > This involves a bit of overhead as pool->cpuid now must be accessed > via READ_ONCE() to avoid partial reads. > Rename page_pool_unlink_napi() -> page_pool_disable_direct_recycling() > to reflect what it actually does and unexport it. PW says "Changes requested", but I don't see any in the thread, did I miss something? :D Thanks, Olek
Alexander Lobakin <aleksander.lobakin@intel.com> writes: > Now that direct recycling is performed basing on pool->cpuid when set, > memory leaks are possible: > > 1. A pool is destroyed. > 2. Alloc cache is emptied (it's done only once). > 3. pool->cpuid is still set. > 4. napi_pp_put_page() does direct recycling basing on pool->cpuid. > 5. Now alloc cache is not empty, but it won't ever be freed. Did you actually manage to trigger this? pool->cpuid is only set for the system page pool instance which is never destroyed; so this seems a very theoretical concern? I guess we could still do this in case we find other uses for setting the cpuid; I don't think the addition of the READ_ONCE() will have any measurable overhead on the common arches? -Toke
From: Toke Høiland-Jørgensen <toke@redhat.com> Date: Thu, 15 Feb 2024 13:05:30 +0100 > Alexander Lobakin <aleksander.lobakin@intel.com> writes: > >> Now that direct recycling is performed basing on pool->cpuid when set, >> memory leaks are possible: >> >> 1. A pool is destroyed. >> 2. Alloc cache is emptied (it's done only once). >> 3. pool->cpuid is still set. >> 4. napi_pp_put_page() does direct recycling basing on pool->cpuid. >> 5. Now alloc cache is not empty, but it won't ever be freed. > > Did you actually manage to trigger this? pool->cpuid is only set for the > system page pool instance which is never destroyed; so this seems a very > theoretical concern? To both Lorenzo and Toke: Yes, system page pools are never destroyed, but we might latter use cpuid in non-persistent PPs. Then there will be memory leaks. I was able to trigger this by creating bpf/test_run page_pools with the cpuid set to test direct recycling of live frames. > > I guess we could still do this in case we find other uses for setting > the cpuid; I don't think the addition of the READ_ONCE() will have any > measurable overhead on the common arches? READ_ONCE() is cheap, but I thought it's worth mentioning in the commitmsg anyway :) > > -Toke > Thanks, Olek
> From: Toke Høiland-Jørgensen <toke@redhat.com> > Date: Thu, 15 Feb 2024 13:05:30 +0100 > > > Alexander Lobakin <aleksander.lobakin@intel.com> writes: > > > >> Now that direct recycling is performed basing on pool->cpuid when set, > >> memory leaks are possible: > >> > >> 1. A pool is destroyed. > >> 2. Alloc cache is emptied (it's done only once). > >> 3. pool->cpuid is still set. > >> 4. napi_pp_put_page() does direct recycling basing on pool->cpuid. > >> 5. Now alloc cache is not empty, but it won't ever be freed. > > > > Did you actually manage to trigger this? pool->cpuid is only set for the > > system page pool instance which is never destroyed; so this seems a very > > theoretical concern? > > To both Lorenzo and Toke: > > Yes, system page pools are never destroyed, but we might latter use > cpuid in non-persistent PPs. Then there will be memory leaks. > I was able to trigger this by creating bpf/test_run page_pools with the > cpuid set to test direct recycling of live frames. what about avoiding the page to be destroyed int this case? I do not like the idea of overwriting the cpuid field for it. Regards, Lorenzo > > > > > I guess we could still do this in case we find other uses for setting > > the cpuid; I don't think the addition of the READ_ONCE() will have any > > measurable overhead on the common arches? > > READ_ONCE() is cheap, but I thought it's worth mentioning in the > commitmsg anyway :) > > > > > -Toke > > > > Thanks, > Olek
From: Lorenzo Bianconi <lorenzo@kernel.org> Date: Thu, 15 Feb 2024 14:37:10 +0100 >> From: Toke Høiland-Jørgensen <toke@redhat.com> >> Date: Thu, 15 Feb 2024 13:05:30 +0100 >> >>> Alexander Lobakin <aleksander.lobakin@intel.com> writes: >>> >>>> Now that direct recycling is performed basing on pool->cpuid when set, >>>> memory leaks are possible: >>>> >>>> 1. A pool is destroyed. >>>> 2. Alloc cache is emptied (it's done only once). >>>> 3. pool->cpuid is still set. >>>> 4. napi_pp_put_page() does direct recycling basing on pool->cpuid. >>>> 5. Now alloc cache is not empty, but it won't ever be freed. >>> >>> Did you actually manage to trigger this? pool->cpuid is only set for the >>> system page pool instance which is never destroyed; so this seems a very >>> theoretical concern? >> >> To both Lorenzo and Toke: >> >> Yes, system page pools are never destroyed, but we might latter use >> cpuid in non-persistent PPs. Then there will be memory leaks. >> I was able to trigger this by creating bpf/test_run page_pools with the >> cpuid set to test direct recycling of live frames. > > what about avoiding the page to be destroyed int this case? I do not like the I think I didn't get what you wanted to say here :s Rewriting cpuid doesn't introduce any new checks on hotpath. Destroying the pool is slowpath and we shouldn't hurt hotpath to handle it. > idea of overwriting the cpuid field for it. We also overwrite pp->p.napi field a couple lines below. It happens only when destroying the pool, we don't care about the fields at this point. > > Regards, > Lorenzo > >> >>> >>> I guess we could still do this in case we find other uses for setting >>> the cpuid; I don't think the addition of the READ_ONCE() will have any >>> measurable overhead on the common arches? >> >> READ_ONCE() is cheap, but I thought it's worth mentioning in the >> commitmsg anyway :) >> >>> >>> -Toke Thanks, Olek
> From: Lorenzo Bianconi <lorenzo@kernel.org> > Date: Thu, 15 Feb 2024 14:37:10 +0100 > > >> From: Toke Høiland-Jørgensen <toke@redhat.com> > >> Date: Thu, 15 Feb 2024 13:05:30 +0100 > >> > >>> Alexander Lobakin <aleksander.lobakin@intel.com> writes: > >>> > >>>> Now that direct recycling is performed basing on pool->cpuid when set, > >>>> memory leaks are possible: > >>>> > >>>> 1. A pool is destroyed. > >>>> 2. Alloc cache is emptied (it's done only once). > >>>> 3. pool->cpuid is still set. > >>>> 4. napi_pp_put_page() does direct recycling basing on pool->cpuid. > >>>> 5. Now alloc cache is not empty, but it won't ever be freed. > >>> > >>> Did you actually manage to trigger this? pool->cpuid is only set for the > >>> system page pool instance which is never destroyed; so this seems a very > >>> theoretical concern? > >> > >> To both Lorenzo and Toke: > >> > >> Yes, system page pools are never destroyed, but we might latter use > >> cpuid in non-persistent PPs. Then there will be memory leaks. > >> I was able to trigger this by creating bpf/test_run page_pools with the > >> cpuid set to test direct recycling of live frames. > > > > what about avoiding the page to be destroyed int this case? I do not like the > > I think I didn't get what you wanted to say here :s My assumption here was cpuid will be set just system page_pool so it is just a matter of not running page_pool_destroy for them. Anyway in the future we could allow to set cpuid even for non-system page_pool if the pool is linked to a given rx-queue and the queue is pinned to a given cpu. Regards, Lorenzo > > Rewriting cpuid doesn't introduce any new checks on hotpath. Destroying > the pool is slowpath and we shouldn't hurt hotpath to handle it. > > > idea of overwriting the cpuid field for it. > > We also overwrite pp->p.napi field a couple lines below. It happens only > when destroying the pool, we don't care about the fields at this point. > > > > > Regards, > > Lorenzo > > > >> > >>> > >>> I guess we could still do this in case we find other uses for setting > >>> the cpuid; I don't think the addition of the READ_ONCE() will have any > >>> measurable overhead on the common arches? > >> > >> READ_ONCE() is cheap, but I thought it's worth mentioning in the > >> commitmsg anyway :) > >> > >>> > >>> -Toke > > Thanks, > Olek
Alexander Lobakin <aleksander.lobakin@intel.com> writes: > From: Toke Høiland-Jørgensen <toke@redhat.com> > Date: Thu, 15 Feb 2024 13:05:30 +0100 > >> Alexander Lobakin <aleksander.lobakin@intel.com> writes: >> >>> Now that direct recycling is performed basing on pool->cpuid when set, >>> memory leaks are possible: >>> >>> 1. A pool is destroyed. >>> 2. Alloc cache is emptied (it's done only once). >>> 3. pool->cpuid is still set. >>> 4. napi_pp_put_page() does direct recycling basing on pool->cpuid. >>> 5. Now alloc cache is not empty, but it won't ever be freed. >> >> Did you actually manage to trigger this? pool->cpuid is only set for the >> system page pool instance which is never destroyed; so this seems a very >> theoretical concern? > > To both Lorenzo and Toke: > > Yes, system page pools are never destroyed, but we might latter use > cpuid in non-persistent PPs. Then there will be memory leaks. > I was able to trigger this by creating bpf/test_run page_pools with the > cpuid set to test direct recycling of live frames. > >> >> I guess we could still do this in case we find other uses for setting >> the cpuid; I don't think the addition of the READ_ONCE() will have any >> measurable overhead on the common arches? > > READ_ONCE() is cheap, but I thought it's worth mentioning in the > commitmsg anyway :) Right. I'm OK with changing this as a form of future-proofing if we end up finding other uses for setting the cpuid field, so: Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
> Now that direct recycling is performed basing on pool->cpuid when set,
> memory leaks are possible:
>
> 1. A pool is destroyed.
> 2. Alloc cache is emptied (it's done only once).
> 3. pool->cpuid is still set.
> 4. napi_pp_put_page() does direct recycling basing on pool->cpuid.
> 5. Now alloc cache is not empty, but it won't ever be freed.
>
> In order to avoid that, rewrite pool->cpuid to -1 when unlinking NAPI to
> make sure no direct recycling will be possible after emptying the cache.
> This involves a bit of overhead as pool->cpuid now must be accessed
> via READ_ONCE() to avoid partial reads.
> Rename page_pool_unlink_napi() -> page_pool_disable_direct_recycling()
> to reflect what it actually does and unexport it.
Hi Alexander,
IIUC the reported issue, it requires the page_pool is destroyed (correct?),
but system page_pool (the only one with cpuid not set to -1) will never be
destroyed at runtime (or at we should avoid that). Am I missing something?
Rergards,
Lorenzo
>
> Fixes: 2b0cfa6e4956 ("net: add generic percpu page_pool allocator")
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
> ---
> include/net/page_pool/types.h | 5 -----
> net/core/page_pool.c | 10 +++++++---
> net/core/skbuff.c | 2 +-
> 3 files changed, 8 insertions(+), 9 deletions(-)
>
> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> index 3828396ae60c..3590fbe6e3f1 100644
> --- a/include/net/page_pool/types.h
> +++ b/include/net/page_pool/types.h
> @@ -210,17 +210,12 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
> struct xdp_mem_info;
>
> #ifdef CONFIG_PAGE_POOL
> -void page_pool_unlink_napi(struct page_pool *pool);
> void page_pool_destroy(struct page_pool *pool);
> void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
> struct xdp_mem_info *mem);
> void page_pool_put_page_bulk(struct page_pool *pool, void **data,
> int count);
> #else
> -static inline void page_pool_unlink_napi(struct page_pool *pool)
> -{
> -}
> -
> static inline void page_pool_destroy(struct page_pool *pool)
> {
> }
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index 89c835fcf094..e8b9399d8e32 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -949,8 +949,13 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
> pool->xdp_mem_id = mem->id;
> }
>
> -void page_pool_unlink_napi(struct page_pool *pool)
> +static void page_pool_disable_direct_recycling(struct page_pool *pool)
> {
> + /* Disable direct recycling based on pool->cpuid.
> + * Paired with READ_ONCE() in napi_pp_put_page().
> + */
> + WRITE_ONCE(pool->cpuid, -1);
> +
> if (!pool->p.napi)
> return;
>
> @@ -962,7 +967,6 @@ void page_pool_unlink_napi(struct page_pool *pool)
>
> WRITE_ONCE(pool->p.napi, NULL);
> }
> -EXPORT_SYMBOL(page_pool_unlink_napi);
>
> void page_pool_destroy(struct page_pool *pool)
> {
> @@ -972,7 +976,7 @@ void page_pool_destroy(struct page_pool *pool)
> if (!page_pool_put(pool))
> return;
>
> - page_pool_unlink_napi(pool);
> + page_pool_disable_direct_recycling(pool);
> page_pool_free_frag(pool);
>
> if (!page_pool_release(pool))
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 0d9a489e6ae1..b41856585c24 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -1018,7 +1018,7 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
> unsigned int cpuid = smp_processor_id();
>
> allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid;
> - allow_direct |= (pp->cpuid == cpuid);
> + allow_direct |= READ_ONCE(pp->cpuid) == cpuid;
> }
>
> /* Driver set this to memory recycling info. Reset it on recycle.
> --
> 2.43.0
>
© 2016 - 2026 Red Hat, Inc.