From nobody Sat Oct 4 08:05:17 2025 Received: from lankhorst.se (lankhorst.se [141.105.120.124]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 03CA8335BB1; Tue, 19 Aug 2025 11:56:40 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=141.105.120.124 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1755604602; cv=none; b=GQAeQRxWq5+2qWEif4asrv8C8a/RGo9WPIpe2XvyOMHOcRfeY2CLd2TpOHPf9hYZ9KSMrckJnT4C40tiE+AUp5Gc+r5iaV1XIQ8u5kp52jB9HZrVU/8zLd4qV1iXqDQoLQS+R0MOp0lCsPV+Xjiu8ib6Ta5ZnMQEdDqCI4ELqXw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1755604602; c=relaxed/simple; bh=BBc4qktafzeeoB3et84zl/DrK/MKQK1W4gtygBAL5d8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=N+bMJEyIg3vdfzA2cwETdyG8l8KluUdJX1dYjMa8acD/kWthmVdA2dJMoYAAxDmOhLGztfVGqmfI9Zw+4vAeL0lSwHX7sQwgn4gQXOd8PadiwP8bNtjTw+S5HaPfB9VuiRdhB64ZhVDX/D/cB9Z0T5jblkipLfHEE2uVIAkoYlI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=lankhorst.se; spf=pass smtp.mailfrom=lankhorst.se; arc=none smtp.client-ip=141.105.120.124 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=lankhorst.se Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=lankhorst.se From: Maarten Lankhorst To: Lucas De Marchi , =?UTF-8?q?=27Thomas=20Hellstr=C3=B6m=27?= , Rodrigo Vivi , David Airlie , Simona Vetter , Maarten Lankhorst , Maxime Ripard , Natalie Vock , Tejun Heo , Johannes Weiner , =?UTF-8?q?=27Michal=20Koutn=C3=BD=27?= , Michal Hocko , Roman Gushchin , Shakeel Butt , Muchun Song , Andrew Morton , David Hildenbrand , Lorenzo Stoakes , "'Liam R . Howlett'" , Vlastimil Babka , Mike Rapoport , Suren Baghdasaryan , Thomas Zimmermann Cc: Michal Hocko , intel-xe@lists.freedesktop.org, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, linux-mm@kvack.org Subject: [RFC 2/3] cgroup/dmem: Implement pinning device memory Date: Tue, 19 Aug 2025 13:49:35 +0200 Message-ID: <20250819114932.597600-7-dev@lankhorst.se> X-Mailer: git-send-email 2.50.0 In-Reply-To: <20250819114932.597600-5-dev@lankhorst.se> References: <20250819114932.597600-5-dev@lankhorst.se> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a function to pin, and to unipn memory and adjust the calculations in dmem_cgroup_state_evict_valuable(). Signed-off-by: Maarten Lankhorst --- include/linux/cgroup_dmem.h | 2 ++ kernel/cgroup/dmem.c | 57 +++++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/include/linux/cgroup_dmem.h b/include/linux/cgroup_dmem.h index dd4869f1d736e..a981bb692ba22 100644 --- a/include/linux/cgroup_dmem.h +++ b/include/linux/cgroup_dmem.h @@ -21,6 +21,8 @@ int dmem_cgroup_try_charge(struct dmem_cgroup_region *reg= ion, u64 size, struct dmem_cgroup_pool_state **ret_pool, struct dmem_cgroup_pool_state **ret_limit_pool); void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size); +int dmem_cgroup_try_pin(struct dmem_cgroup_pool_state *pool, u64 size); +void dmem_cgroup_unpin(struct dmem_cgroup_pool_state *pool, u64 size); bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit= _pool, struct dmem_cgroup_pool_state *test_pool, bool ignore_low, bool *ret_hit_low); diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index 10b63433f0573..ec8b1ffec78de 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -147,6 +147,11 @@ static u64 get_resource_current(struct dmem_cgroup_poo= l_state *pool) return pool ? page_counter_read(&pool->cnt) : 0; } =20 +static u64 get_resource_pinned(struct dmem_cgroup_pool_state *pool) +{ + return pool ? page_counter_pinned(&pool->cnt) : 0; +} + static void reset_all_resource_limits(struct dmem_cgroup_pool_state *rpool) { set_resource_min(rpool, 0); @@ -270,7 +275,7 @@ bool dmem_cgroup_state_evict_valuable(struct dmem_cgrou= p_pool_state *limit_pool, { struct dmem_cgroup_pool_state *pool =3D test_pool; struct page_counter *ctest; - u64 used, min, low; + u64 used, min, low, pinned; =20 /* Can always evict from current pool, despite limits */ if (limit_pool =3D=3D test_pool) @@ -296,16 +301,18 @@ bool dmem_cgroup_state_evict_valuable(struct dmem_cgr= oup_pool_state *limit_pool, =20 ctest =3D &test_pool->cnt; =20 + /* Protection is calculated without pinned memory */ dmem_cgroup_calculate_protection(limit_pool, test_pool); =20 used =3D page_counter_read(ctest); - min =3D READ_ONCE(ctest->emin); + pinned =3D page_counter_pinned(ctest); + min =3D READ_ONCE(ctest->emin) + pinned; =20 if (used <=3D min) return false; =20 if (!ignore_low) { - low =3D READ_ONCE(ctest->elow); + low =3D READ_ONCE(ctest->elow) + pinned; if (used > low) return true; =20 @@ -641,6 +648,41 @@ int dmem_cgroup_try_charge(struct dmem_cgroup_region *= region, u64 size, } EXPORT_SYMBOL_GPL(dmem_cgroup_try_charge); =20 +/** + * dmem_cgroup_unpin() - Unpin from a pool. + * @pool: Pool to unpin. + * @size: Size to unpin. + * + * Undoes the effects of dmem_cgroup_try_pin. + * Must be called with the returned pool as argument, + * and same @index and @size. + */ +void dmem_cgroup_unpin(struct dmem_cgroup_pool_state *pool, u64 size) +{ + if (pool) + page_counter_unpin(&pool->cnt, size); +} +EXPORT_SYMBOL_GPL(dmem_cgroup_unpin); + +/** + * dmem_cgroup_try_pin() - Try pinning an existing allocation to a region. + * @pool: dmem region to pin + * @size: Size (in bytes) to pin. + * + * This function pins in @pool for a size of @size bytes. + * + * If the function succeeds, the memory is succesfully accounted as being = pinned. + * The memory may not be uncharged before unpin is called. + * + * Return: 0 on success, -EAGAIN on hitting a limit, or a negative errno o= n failure. + */ +int dmem_cgroup_try_pin(struct dmem_cgroup_pool_state *pool, u64 size) +{ + return page_counter_try_pin(&pool->cnt, size) ? 0 : -EAGAIN; + +} +EXPORT_SYMBOL_GPL(dmem_cgroup_try_pin); + static int dmem_cgroup_region_capacity_show(struct seq_file *sf, void *v) { struct dmem_cgroup_region *region; @@ -756,6 +798,11 @@ static int dmem_cgroup_region_current_show(struct seq_= file *sf, void *v) return dmemcg_limit_show(sf, v, get_resource_current); } =20 +static int dmem_cgroup_region_pinned_show(struct seq_file *sf, void *v) +{ + return dmemcg_limit_show(sf, v, get_resource_pinned); +} + static int dmem_cgroup_region_min_show(struct seq_file *sf, void *v) { return dmemcg_limit_show(sf, v, get_resource_min); @@ -799,6 +846,10 @@ static struct cftype files[] =3D { .name =3D "current", .seq_show =3D dmem_cgroup_region_current_show, }, + { + .name =3D "pinned", + .seq_show =3D dmem_cgroup_region_pinned_show, + }, { .name =3D "min", .write =3D dmem_cgroup_region_min_write, --=20 2.50.0