From nobody Thu Dec 18 08:07:01 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 9B4C9C04E69 for ; Mon, 14 Aug 2023 17:54:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231592AbjHNRyU (ORCPT ); Mon, 14 Aug 2023 13:54:20 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56958 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231582AbjHNRxx (ORCPT ); Mon, 14 Aug 2023 13:53:53 -0400 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 45F241711 for ; Mon, 14 Aug 2023 10:53:40 -0700 (PDT) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 54E43143D; Mon, 14 Aug 2023 10:54:22 -0700 (PDT) Received: from e121345-lin.cambridge.arm.com (e121345-lin.cambridge.arm.com [10.1.196.40]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 117363F762; Mon, 14 Aug 2023 10:53:38 -0700 (PDT) From: Robin Murphy To: joro@8bytes.org Cc: will@kernel.org, iommu@lists.linux.dev, linux-kernel@vger.kernel.org, john.g.garry@oracle.com, zhangzekun11@huawei.com Subject: [PATCH 1/2] iommu/iova: Make the rcache depot scale better Date: Mon, 14 Aug 2023 18:53:33 +0100 Message-Id: <2c3a0f6b381ae5f743c1b28bbc6eae486160f3b7.1692033783.git.robin.murphy@arm.com> X-Mailer: git-send-email 2.39.2.101.g768bb238c484.dirty In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" The algorithm in the original paper specifies the storage of full magazines in the depot as an unbounded list rather than a fixed-size array. It turns out to be pretty straightforward to do this in our implementation with no significant loss of efficiency. This allows the depot to scale up to the working set sizes of larger systems, while also potentially saving some memory on smaller ones too. Signed-off-by: Robin Murphy Reviewed-by: John Garry --- drivers/iommu/iova.c | 65 ++++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 10b964600948..d2de6fb0e9f4 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -625,10 +625,16 @@ EXPORT_SYMBOL_GPL(reserve_iova); * will be wasted. */ #define IOVA_MAG_SIZE 127 -#define MAX_GLOBAL_MAGS 32 /* magazines per bin */ =20 struct iova_magazine { - unsigned long size; + /* + * Only full magazines are inserted into the depot, so we can avoid + * a separate list head and preserve maximum space-efficiency. + */ + union { + unsigned long size; + struct iova_magazine *next; + }; unsigned long pfns[IOVA_MAG_SIZE]; }; =20 @@ -640,8 +646,7 @@ struct iova_cpu_rcache { =20 struct iova_rcache { spinlock_t lock; - unsigned long depot_size; - struct iova_magazine *depot[MAX_GLOBAL_MAGS]; + struct iova_magazine *depot; struct iova_cpu_rcache __percpu *cpu_rcaches; }; =20 @@ -717,6 +722,21 @@ static void iova_magazine_push(struct iova_magazine *m= ag, unsigned long pfn) mag->pfns[mag->size++] =3D pfn; } =20 +static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache) +{ + struct iova_magazine *mag =3D rcache->depot; + + rcache->depot =3D mag->next; + mag->size =3D IOVA_MAG_SIZE; + return mag; +} + +static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazi= ne *mag) +{ + mag->next =3D rcache->depot; + rcache->depot =3D mag; +} + int iova_domain_init_rcaches(struct iova_domain *iovad) { unsigned int cpu; @@ -734,7 +754,6 @@ int iova_domain_init_rcaches(struct iova_domain *iovad) =20 rcache =3D &iovad->rcaches[i]; spin_lock_init(&rcache->lock); - rcache->depot_size =3D 0; rcache->cpu_rcaches =3D __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); if (!rcache->cpu_rcaches) { @@ -776,7 +795,6 @@ static bool __iova_rcache_insert(struct iova_domain *io= vad, struct iova_rcache *rcache, unsigned long iova_pfn) { - struct iova_magazine *mag_to_free =3D NULL; struct iova_cpu_rcache *cpu_rcache; bool can_insert =3D false; unsigned long flags; @@ -794,12 +812,7 @@ static bool __iova_rcache_insert(struct iova_domain *i= ovad, =20 if (new_mag) { spin_lock(&rcache->lock); - if (rcache->depot_size < MAX_GLOBAL_MAGS) { - rcache->depot[rcache->depot_size++] =3D - cpu_rcache->loaded; - } else { - mag_to_free =3D cpu_rcache->loaded; - } + iova_depot_push(rcache, cpu_rcache->loaded); spin_unlock(&rcache->lock); =20 cpu_rcache->loaded =3D new_mag; @@ -812,11 +825,6 @@ static bool __iova_rcache_insert(struct iova_domain *i= ovad, =20 spin_unlock_irqrestore(&cpu_rcache->lock, flags); =20 - if (mag_to_free) { - iova_magazine_free_pfns(mag_to_free, iovad); - iova_magazine_free(mag_to_free); - } - return can_insert; } =20 @@ -854,9 +862,9 @@ static unsigned long __iova_rcache_get(struct iova_rcac= he *rcache, has_pfn =3D true; } else { spin_lock(&rcache->lock); - if (rcache->depot_size > 0) { + if (rcache->depot) { iova_magazine_free(cpu_rcache->loaded); - cpu_rcache->loaded =3D rcache->depot[--rcache->depot_size]; + cpu_rcache->loaded =3D iova_depot_pop(rcache); has_pfn =3D true; } spin_unlock(&rcache->lock); @@ -894,10 +902,10 @@ static void free_iova_rcaches(struct iova_domain *iov= ad) { struct iova_rcache *rcache; struct iova_cpu_rcache *cpu_rcache; + struct iova_magazine *mag; unsigned int cpu; - int i, j; =20 - for (i =3D 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + for (int i =3D 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache =3D &iovad->rcaches[i]; if (!rcache->cpu_rcaches) break; @@ -907,8 +915,8 @@ static void free_iova_rcaches(struct iova_domain *iovad) iova_magazine_free(cpu_rcache->prev); } free_percpu(rcache->cpu_rcaches); - for (j =3D 0; j < rcache->depot_size; ++j) - iova_magazine_free(rcache->depot[j]); + while ((mag =3D iova_depot_pop(rcache))) + iova_magazine_free(mag); } =20 kfree(iovad->rcaches); @@ -941,17 +949,16 @@ static void free_cpu_cached_iovas(unsigned int cpu, s= truct iova_domain *iovad) static void free_global_cached_iovas(struct iova_domain *iovad) { struct iova_rcache *rcache; + struct iova_magazine *mag; unsigned long flags; - int i, j; =20 - for (i =3D 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + for (int i =3D 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache =3D &iovad->rcaches[i]; spin_lock_irqsave(&rcache->lock, flags); - for (j =3D 0; j < rcache->depot_size; ++j) { - iova_magazine_free_pfns(rcache->depot[j], iovad); - iova_magazine_free(rcache->depot[j]); + while ((mag =3D iova_depot_pop(rcache))) { + iova_magazine_free_pfns(mag, iovad); + iova_magazine_free(mag); } - rcache->depot_size =3D 0; spin_unlock_irqrestore(&rcache->lock, flags); } } --=20 2.39.2.101.g768bb238c484.dirty From nobody Thu Dec 18 08:07:01 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8678AC001DB for ; Mon, 14 Aug 2023 17:54:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231547AbjHNRyS (ORCPT ); Mon, 14 Aug 2023 13:54:18 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:57028 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231588AbjHNRxx (ORCPT ); Mon, 14 Aug 2023 13:53:53 -0400 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 85F56172B for ; Mon, 14 Aug 2023 10:53:41 -0700 (PDT) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 8C2DF1477; Mon, 14 Aug 2023 10:54:23 -0700 (PDT) Received: from e121345-lin.cambridge.arm.com (e121345-lin.cambridge.arm.com [10.1.196.40]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 48FF93F762; Mon, 14 Aug 2023 10:53:40 -0700 (PDT) From: Robin Murphy To: joro@8bytes.org Cc: will@kernel.org, iommu@lists.linux.dev, linux-kernel@vger.kernel.org, john.g.garry@oracle.com, zhangzekun11@huawei.com Subject: [PATCH 2/2] iommu/iova: Manage the depot list size Date: Mon, 14 Aug 2023 18:53:34 +0100 Message-Id: X-Mailer: git-send-email 2.39.2.101.g768bb238c484.dirty In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Automatically scaling the depot up to suit the peak capacity of a workload is all well and good, but it would be nice to have a way to scale it back down again if the workload changes. To that end, add automatic reclaim that will gradually free unused magazines if the depot size remains above a reasonable threshold for long enough. Signed-off-by: Robin Murphy --- drivers/iommu/iova.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d2de6fb0e9f4..76a7d694708e 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -11,6 +11,7 @@ #include #include #include +#include =20 /* The anchor node sits above the top of the usable address space */ #define IOVA_ANCHOR ~0UL @@ -626,6 +627,8 @@ EXPORT_SYMBOL_GPL(reserve_iova); */ #define IOVA_MAG_SIZE 127 =20 +#define IOVA_DEPOT_DELAY msecs_to_jiffies(100) + struct iova_magazine { /* * Only full magazines are inserted into the depot, so we can avoid @@ -646,8 +649,11 @@ struct iova_cpu_rcache { =20 struct iova_rcache { spinlock_t lock; + unsigned int depot_size; struct iova_magazine *depot; struct iova_cpu_rcache __percpu *cpu_rcaches; + struct iova_domain *iovad; + struct delayed_work work; }; =20 static struct iova_magazine *iova_magazine_alloc(gfp_t flags) @@ -728,6 +734,7 @@ static struct iova_magazine *iova_depot_pop(struct iova= _rcache *rcache) =20 rcache->depot =3D mag->next; mag->size =3D IOVA_MAG_SIZE; + rcache->depot_size--; return mag; } =20 @@ -735,6 +742,24 @@ static void iova_depot_push(struct iova_rcache *rcache= , struct iova_magazine *ma { mag->next =3D rcache->depot; rcache->depot =3D mag; + rcache->depot_size++; +} + +static void iova_depot_work_func(struct work_struct *work) +{ + struct iova_rcache *rcache =3D container_of(work, typeof(*rcache), work.w= ork); + struct iova_magazine *mag =3D NULL; + + spin_lock(&rcache->lock); + if (rcache->depot_size > num_online_cpus()) + mag =3D iova_depot_pop(rcache); + spin_unlock(&rcache->lock); + + if (mag) { + iova_magazine_free_pfns(mag, rcache->iovad); + iova_magazine_free(mag); + schedule_delayed_work(&rcache->work, msecs_to_jiffies(IOVA_DEPOT_DELAY)); + } } =20 int iova_domain_init_rcaches(struct iova_domain *iovad) @@ -754,6 +779,8 @@ int iova_domain_init_rcaches(struct iova_domain *iovad) =20 rcache =3D &iovad->rcaches[i]; spin_lock_init(&rcache->lock); + rcache->iovad =3D iovad; + INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func); rcache->cpu_rcaches =3D __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); if (!rcache->cpu_rcaches) { @@ -814,6 +841,7 @@ static bool __iova_rcache_insert(struct iova_domain *io= vad, spin_lock(&rcache->lock); iova_depot_push(rcache, cpu_rcache->loaded); spin_unlock(&rcache->lock); + schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY); =20 cpu_rcache->loaded =3D new_mag; can_insert =3D true; @@ -915,6 +943,7 @@ static void free_iova_rcaches(struct iova_domain *iovad) iova_magazine_free(cpu_rcache->prev); } free_percpu(rcache->cpu_rcaches); + cancel_delayed_work_sync(&rcache->work); while ((mag =3D iova_depot_pop(rcache))) iova_magazine_free(mag); } --=20 2.39.2.101.g768bb238c484.dirty