iommu/io-pgtable-arm: Drop DMA API usage for CMOs

[PATCH] iommu/io-pgtable-arm: Drop DMA API usage for CMOs

Posted by Mostafa Saleh 1 month ago

As part of the KVM SMMUv3 series[1], we are trying to factor out
the kernel specific code from io-pgtable-arm so it can also compile
for the hypervisor.

Jason pointed out that the DMA-API calls are not really needed [2].

Looking more into this. Initially, the io-pgtable API let drivers
do the CMOs using tlb::flush_pgtable() where drivers were using the
DMA API (map/unmap_single) only to do CMOs as the low-level cache
functions won’t be available for modules.

This was later moved to the core code [3], with possibility to
convert it to full blown DMA-API code if there was a use case.

However, no such use case appeared, and we can simplify the code
slightly by doing CMO directly instead of going through the DMA-API
functions just for CMOs.

Although HTTU is not used at the moment, leave the
arch_sync_dma_for_cpu() in __arm_lpae_free_pages() as this is not a
hot path.

Removing the DMA-API will also remove some extra checks ensuring that
the IOMMU can deal with physical addrs. we can add those check at
page table creation time (Something as use_dma_iommu/is_swiotlb_active
However, that seemed a little too much as Linux doesn't support such
topologies anyways.

[1] https://lore.kernel.org/linux-iommu/20251117184815.1027271-1-smostafa@google.com/
[2] https://lore.kernel.org/linux-iommu/20251216005834.GC31492@ziepe.ca/
[3] https://lore.kernel.org/linux-iommu/7c5584d3efa61ee6b0b87efb72f24f32852aafb7.1438195011.git.robin.murphy@arm.com/

Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
 drivers/iommu/io-pgtable-arm.c | 39 +++++++---------------------------
 1 file changed, 8 insertions(+), 31 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index e6626004b323..0da9195155ec 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -15,7 +15,7 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/types.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
 
 #include <asm/barrier.h>
 
@@ -254,7 +254,6 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 {
 	struct device *dev = cfg->iommu_dev;
 	size_t alloc_size;
-	dma_addr_t dma;
 	void *pages;
 
 	/*
@@ -271,32 +270,10 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 	if (!pages)
 		return NULL;
 
-	if (!cfg->coherent_walk) {
-		dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
-		if (dma_mapping_error(dev, dma))
-			goto out_free;
-		/*
-		 * We depend on the IOMMU being able to work with any physical
-		 * address directly, so if the DMA layer suggests otherwise by
-		 * translating or truncating them, that bodes very badly...
-		 */
-		if (dma != virt_to_phys(pages))
-			goto out_unmap;
-	}
-
+	if (!cfg->coherent_walk)
+		arch_sync_dma_for_device(__arm_lpae_dma_addr(pages), size,
+					 DMA_TO_DEVICE);
 	return pages;
-
-out_unmap:
-	dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
-	dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
-
-out_free:
-	if (cfg->free)
-		cfg->free(cookie, pages, size);
-	else
-		iommu_free_pages(pages);
-
-	return NULL;
 }
 
 static void __arm_lpae_free_pages(void *pages, size_t size,
@@ -304,8 +281,8 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
 				  void *cookie)
 {
 	if (!cfg->coherent_walk)
-		dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
-				 size, DMA_TO_DEVICE);
+		arch_sync_dma_for_cpu(__arm_lpae_dma_addr(pages),
+				      size, DMA_TO_DEVICE);
 
 	if (cfg->free)
 		cfg->free(cookie, pages, size);
@@ -316,8 +293,8 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
 static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
 				struct io_pgtable_cfg *cfg)
 {
-	dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
-				   sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
+	arch_sync_dma_for_device(__arm_lpae_dma_addr(ptep),
+				 sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
 }
 
 static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries)
-- 
2.52.0.351.gbe84eed79e-goog

Re: [PATCH] iommu/io-pgtable-arm: Drop DMA API usage for CMOs

Posted by Jason Gunthorpe 1 month ago

On Thu, Jan 08, 2026 at 11:38:46AM +0000, Mostafa Saleh wrote:
> Jason pointed out that the DMA-API calls are not really needed [2].
> 
> Looking more into this. Initially, the io-pgtable API let drivers
> do the CMOs using tlb::flush_pgtable() where drivers were using the
> DMA API (map/unmap_single) only to do CMOs as the low-level cache
> functions won’t be available for modules.

This isn't what I ment at all, the iommu-pages.h could do the flush
inside itself using the already existing
iommu_pages_flush_incoherent() instead of open coding the dma api
calls everwhere, and maybe that could directly call the arch function
on arm (as x86 does) which would be easier to implement in pkvm's
hypervisor.

Then it is reasonable to have a pkvm version of the iommu-pages api
for the page table and drive rto use.

Jason

Re: [PATCH] iommu/io-pgtable-arm: Drop DMA API usage for CMOs

Posted by Mostafa Saleh 1 month ago

On Thu, Jan 8, 2026 at 12:59 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> On Thu, Jan 08, 2026 at 11:38:46AM +0000, Mostafa Saleh wrote:
> > Jason pointed out that the DMA-API calls are not really needed [2].
> >
> > Looking more into this. Initially, the io-pgtable API let drivers
> > do the CMOs using tlb::flush_pgtable() where drivers were using the
> > DMA API (map/unmap_single) only to do CMOs as the low-level cache
> > functions won’t be available for modules.
>
> This isn't what I ment at all, the iommu-pages.h could do the flush
> inside itself using the already existing
> iommu_pages_flush_incoherent() instead of open coding the dma api
> calls everwhere, and maybe that could directly call the arch function
> on arm (as x86 does) which would be easier to implement in pkvm's
> hypervisor.
>

I see, so basically, we add the check for IOMMU_PAGES_USE_DMA_API in
iommu_pages_flush_incoherent() with the DMA-API stuff and call it from
the io-pgtable-arm instead.

> Then it is reasonable to have a pkvm version of the iommu-pages api
> for the page table and drive rto use.
>

That makes sense, I will look it over and send another version if
Robin is Ok with it.

Thanks,
Mostafa

> Jason

Re: [PATCH] iommu/io-pgtable-arm: Drop DMA API usage for CMOs

Posted by Mostafa Saleh 1 month ago

On Thu, Jan 8, 2026 at 1:27 PM Mostafa Saleh <smostafa@google.com> wrote:
>
> On Thu, Jan 8, 2026 at 12:59 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
> >
> > On Thu, Jan 08, 2026 at 11:38:46AM +0000, Mostafa Saleh wrote:
> > > Jason pointed out that the DMA-API calls are not really needed [2].
> > >
> > > Looking more into this. Initially, the io-pgtable API let drivers
> > > do the CMOs using tlb::flush_pgtable() where drivers were using the
> > > DMA API (map/unmap_single) only to do CMOs as the low-level cache
> > > functions won’t be available for modules.
> >
> > This isn't what I ment at all, the iommu-pages.h could do the flush
> > inside itself using the already existing
> > iommu_pages_flush_incoherent() instead of open coding the dma api
> > calls everwhere, and maybe that could directly call the arch function
> > on arm (as x86 does) which would be easier to implement in pkvm's
> > hypervisor.
> >
>
> I see, so basically, we add the check for IOMMU_PAGES_USE_DMA_API in
> iommu_pages_flush_incoherent() with the DMA-API stuff and call it from
> the io-pgtable-arm instead.
>

iommu_pages_flush_incoherent() is also used to flush non-cacheable
pages in generic_pt from flush_writes_*(), so it might not be that
simple; We might need to add a new variant.

Thanks,
Mostafa

> > Then it is reasonable to have a pkvm version of the iommu-pages api
> > for the page table and drive rto use.
> >
>
> That makes sense, I will look it over and send another version if
> Robin is Ok with it.
>
> Thanks,
> Mostafa
>
> > Jason

Re: [PATCH] iommu/io-pgtable-arm: Drop DMA API usage for CMOs

Posted by Jason Gunthorpe 1 month ago

On Thu, Jan 08, 2026 at 01:44:51PM +0000, Mostafa Saleh wrote:
> On Thu, Jan 8, 2026 at 1:27 PM Mostafa Saleh <smostafa@google.com> wrote:
> >
> > On Thu, Jan 8, 2026 at 12:59 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
> > >
> > > On Thu, Jan 08, 2026 at 11:38:46AM +0000, Mostafa Saleh wrote:
> > > > Jason pointed out that the DMA-API calls are not really needed [2].
> > > >
> > > > Looking more into this. Initially, the io-pgtable API let drivers
> > > > do the CMOs using tlb::flush_pgtable() where drivers were using the
> > > > DMA API (map/unmap_single) only to do CMOs as the low-level cache
> > > > functions won’t be available for modules.
> > >
> > > This isn't what I ment at all, the iommu-pages.h could do the flush
> > > inside itself using the already existing
> > > iommu_pages_flush_incoherent() instead of open coding the dma api
> > > calls everwhere, and maybe that could directly call the arch function
> > > on arm (as x86 does) which would be easier to implement in pkvm's
> > > hypervisor.
> > >
> >
> > I see, so basically, we add the check for IOMMU_PAGES_USE_DMA_API in
> > iommu_pages_flush_incoherent() with the DMA-API stuff and call it from
> > the io-pgtable-arm instead.
> >
> 
> iommu_pages_flush_incoherent() is also used to flush non-cacheable
> pages in generic_pt from flush_writes_*(), so it might not be that
> simple; We might need to add a new variant.

There are no non-cachable pages in the iommu-pages system???

All memory comes from folios and is mapped to the CPU cachable.

The general API requires the user of iommu-pages to know if the
underlying HW is going to do non-coherent DMA, in this case it must
call iommu_pages_flush_incoherent() before the HW does DMA.

Effectively what I'm saying here is to convert the arm page table to
use iommu-pages and then put your pkvm abstraction at the iommu-pages
level instead of trying to hack up all these users.

You can also do the same conversion to the smmu driver to use
iommu-pages instead of DMA API, AMD and Intel are doing this already.

Jason

Re: [PATCH] iommu/io-pgtable-arm: Drop DMA API usage for CMOs

Posted by Mostafa Saleh 1 month ago

On Thu, Jan 8, 2026 at 2:45 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> On Thu, Jan 08, 2026 at 01:44:51PM +0000, Mostafa Saleh wrote:
> > On Thu, Jan 8, 2026 at 1:27 PM Mostafa Saleh <smostafa@google.com> wrote:
> > >
> > > On Thu, Jan 8, 2026 at 12:59 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
> > > >
> > > > On Thu, Jan 08, 2026 at 11:38:46AM +0000, Mostafa Saleh wrote:
> > > > > Jason pointed out that the DMA-API calls are not really needed [2].
> > > > >
> > > > > Looking more into this. Initially, the io-pgtable API let drivers
> > > > > do the CMOs using tlb::flush_pgtable() where drivers were using the
> > > > > DMA API (map/unmap_single) only to do CMOs as the low-level cache
> > > > > functions won’t be available for modules.
> > > >
> > > > This isn't what I ment at all, the iommu-pages.h could do the flush
> > > > inside itself using the already existing
> > > > iommu_pages_flush_incoherent() instead of open coding the dma api
> > > > calls everwhere, and maybe that could directly call the arch function
> > > > on arm (as x86 does) which would be easier to implement in pkvm's
> > > > hypervisor.
> > > >
> > >
> > > I see, so basically, we add the check for IOMMU_PAGES_USE_DMA_API in
> > > iommu_pages_flush_incoherent() with the DMA-API stuff and call it from
> > > the io-pgtable-arm instead.
> > >
> >
> > iommu_pages_flush_incoherent() is also used to flush non-cacheable
> > pages in generic_pt from flush_writes_*(), so it might not be that
> > simple; We might need to add a new variant.
>
> There are no non-cachable pages in the iommu-pages system???
>

Sorry I meant to flush writes for memory accessed by not-coherent IOMMUs.

> All memory comes from folios and is mapped to the CPU cachable.
>
> The general API requires the user of iommu-pages to know if the
> underlying HW is going to do non-coherent DMA, in this case it must
> call iommu_pages_flush_incoherent() before the HW does DMA.
>
> Effectively what I'm saying here is to convert the arm page table to
> use iommu-pages and then put your pkvm abstraction at the iommu-pages
> level instead of trying to hack up all these users.
>

I see, I need to check how that plays with the custom allocator in the
io-pgtable though. I will look further into it.

Thanks,
Mostafa

> You can also do the same conversion to the smmu driver to use
> iommu-pages instead of DMA API, AMD and Intel are doing this already.
>
> Jason

Re: [PATCH] iommu/io-pgtable-arm: Drop DMA API usage for CMOs

Posted by Robin Murphy 1 month ago

On 2026-01-08 11:38 am, Mostafa Saleh wrote:
> As part of the KVM SMMUv3 series[1], we are trying to factor out
> the kernel specific code from io-pgtable-arm so it can also compile
> for the hypervisor.
> 
> Jason pointed out that the DMA-API calls are not really needed [2].
> 
> Looking more into this. Initially, the io-pgtable API let drivers
> do the CMOs using tlb::flush_pgtable() where drivers were using the
> DMA API (map/unmap_single) only to do CMOs as the low-level cache
> functions won’t be available for modules.
> 
> This was later moved to the core code [3], with possibility to
> convert it to full blown DMA-API code if there was a use case.

I don't understand that sentence at all.

> However, no such use case appeared, and we can simplify the code
> slightly by doing CMO directly instead of going through the DMA-API
> functions just for CMOs.
> 
> Although HTTU is not used at the moment, leave the
> arch_sync_dma_for_cpu() in __arm_lpae_free_pages() as this is not a
> hot path.
> 
> Removing the DMA-API will also remove some extra checks ensuring that
> the IOMMU can deal with physical addrs. we can add those check at
> page table creation time (Something as use_dma_iommu/is_swiotlb_active
> However, that seemed a little too much as Linux doesn't support such
> topologies anyways.

NAK - io-pgtable is regular driver code doing what is absolutely a DMA 
mapping operation. How would Panfrost calling arch_sync_* signal to hyp 
that the page is now being used for DMA so needs mapping/pinning at 
stage 2? We can't even say that that doesn't apply to "real" IOMMU 
drivers, since there do unfortunately exist cases where an entire "stage 
1" SMMUv3 is upstream of a "stage 2" SMMUv3 such that it's only workable 
if the former is assigned to EL1 as just another black-box device, while 
only EL2 knows about the latter.

If "pagetable creation time" means alloc_io_pgtable_ops() as I've taken 
it to, how do you specifically propose to check that pages which haven't 
been allocated yet will definitely be valid for DMA in future? Also note 
that the only reason for the virt_to_phys() check (which *has* often 
caught genuine driver/platform configuration issues to a degree that I 
would be very uncomfortable removing it) is basically laziness, in that 
nobody's got round to doing the work to be able to keep track of kernel 
addresses separately from the DMA addresses in the tables themselves, 
and thus allow visibly-remapped pagetables - every now and then there 
arises a situation where that would be really useful, so it is still on 
my wishlist.

Frankly if KVM is going to have increasingly divergent requirements and 
assumptions about pagetables, then it should have its own pagetable code 
like it does for the CPU.

Thanks,
Robin.

> [1] https://lore.kernel.org/linux-iommu/20251117184815.1027271-1-smostafa@google.com/
> [2] https://lore.kernel.org/linux-iommu/20251216005834.GC31492@ziepe.ca/
> [3] https://lore.kernel.org/linux-iommu/7c5584d3efa61ee6b0b87efb72f24f32852aafb7.1438195011.git.robin.murphy@arm.com/
> 
> Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> ---
>   drivers/iommu/io-pgtable-arm.c | 39 +++++++---------------------------
>   1 file changed, 8 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index e6626004b323..0da9195155ec 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -15,7 +15,7 @@
>   #include <linux/sizes.h>
>   #include <linux/slab.h>
>   #include <linux/types.h>
> -#include <linux/dma-mapping.h>
> +#include <linux/dma-map-ops.h>
>   
>   #include <asm/barrier.h>
>   
> @@ -254,7 +254,6 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
>   {
>   	struct device *dev = cfg->iommu_dev;
>   	size_t alloc_size;
> -	dma_addr_t dma;
>   	void *pages;
>   
>   	/*
> @@ -271,32 +270,10 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
>   	if (!pages)
>   		return NULL;
>   
> -	if (!cfg->coherent_walk) {
> -		dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
> -		if (dma_mapping_error(dev, dma))
> -			goto out_free;
> -		/*
> -		 * We depend on the IOMMU being able to work with any physical
> -		 * address directly, so if the DMA layer suggests otherwise by
> -		 * translating or truncating them, that bodes very badly...
> -		 */
> -		if (dma != virt_to_phys(pages))
> -			goto out_unmap;
> -	}
> -
> +	if (!cfg->coherent_walk)
> +		arch_sync_dma_for_device(__arm_lpae_dma_addr(pages), size,
> +					 DMA_TO_DEVICE);
>   	return pages;
> -
> -out_unmap:
> -	dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
> -	dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
> -
> -out_free:
> -	if (cfg->free)
> -		cfg->free(cookie, pages, size);
> -	else
> -		iommu_free_pages(pages);
> -
> -	return NULL;
>   }
>   
>   static void __arm_lpae_free_pages(void *pages, size_t size,
> @@ -304,8 +281,8 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
>   				  void *cookie)
>   {
>   	if (!cfg->coherent_walk)
> -		dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
> -				 size, DMA_TO_DEVICE);
> +		arch_sync_dma_for_cpu(__arm_lpae_dma_addr(pages),
> +				      size, DMA_TO_DEVICE);
>   
>   	if (cfg->free)
>   		cfg->free(cookie, pages, size);
> @@ -316,8 +293,8 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
>   static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
>   				struct io_pgtable_cfg *cfg)
>   {
> -	dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
> -				   sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
> +	arch_sync_dma_for_device(__arm_lpae_dma_addr(ptep),
> +				 sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
>   }
>   
>   static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries)

Re: [PATCH] iommu/io-pgtable-arm: Drop DMA API usage for CMOs

Posted by Mostafa Saleh 1 month ago

On Thu, Jan 8, 2026 at 12:52 PM Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 2026-01-08 11:38 am, Mostafa Saleh wrote:
> > As part of the KVM SMMUv3 series[1], we are trying to factor out
> > the kernel specific code from io-pgtable-arm so it can also compile
> > for the hypervisor.
> >
> > Jason pointed out that the DMA-API calls are not really needed [2].
> >
> > Looking more into this. Initially, the io-pgtable API let drivers
> > do the CMOs using tlb::flush_pgtable() where drivers were using the
> > DMA API (map/unmap_single) only to do CMOs as the low-level cache
> > functions won’t be available for modules.
> >
> > This was later moved to the core code [3], with possibility to
> > convert it to full blown DMA-API code if there was a use case.
>
> I don't understand that sentence at all.
>
> > However, no such use case appeared, and we can simplify the code
> > slightly by doing CMO directly instead of going through the DMA-API
> > functions just for CMOs.
> >
> > Although HTTU is not used at the moment, leave the
> > arch_sync_dma_for_cpu() in __arm_lpae_free_pages() as this is not a
> > hot path.
> >
> > Removing the DMA-API will also remove some extra checks ensuring that
> > the IOMMU can deal with physical addrs. we can add those check at
> > page table creation time (Something as use_dma_iommu/is_swiotlb_active
> > However, that seemed a little too much as Linux doesn't support such
> > topologies anyways.
>
> NAK - io-pgtable is regular driver code doing what is absolutely a DMA
> mapping operation. How would Panfrost calling arch_sync_* signal to hyp
> that the page is now being used for DMA so needs mapping/pinning at
> stage 2? We can't even say that that doesn't apply to "real" IOMMU
> drivers, since there do unfortunately exist cases where an entire "stage
> 1" SMMUv3 is upstream of a "stage 2" SMMUv3 such that it's only workable
> if the former is assigned to EL1 as just another black-box device, while
> only EL2 knows about the latter.
>

This is not about Stage-1/Stage-2 assignment or pinning, my simplistic
understanding is that we don’t deal with cascading IOMMUs, so the DMA
API here must go through DMA direct which boils down to CMOs.

My intention was to call CMOs directly instead of calling the DMA API
to make the code slightly portable (And EL2 can provide its
arch_sync_*)

> If "pagetable creation time" means alloc_io_pgtable_ops() as I've taken
> it to, how do you specifically propose to check that pages which haven't
> been allocated yet will definitely be valid for DMA in future? Also note
> that the only reason for the virt_to_phys() check (which *has* often
> caught genuine driver/platform configuration issues to a degree that I
> would be very uncomfortable removing it) is basically laziness, in that
> nobody's got round to doing the work to be able to keep track of kernel
> addresses separately from the DMA addresses in the tables themselves,
> and thus allow visibly-remapped pagetables - every now and then there
> arises a situation where that would be really useful, so it is still on
> my wishlist.
>

I see, thanks for the explanation. I was imagining checking if SWIOTLB
is active. AFAIK, it will be set if the device has a smaller DMA mask
than the CPU.
Probably, that doesn't cover all the cases, but I thought that any
system with such constraints would also be unusable with the current
implementation. However, the virt_to_phys() check would at least make
debugging easier.

Anyway, I will drop this.

> Frankly if KVM is going to have increasingly divergent requirements and
> assumptions about pagetables, then it should have its own pagetable code
> like it does for the CPU.
>

So far, KVM has no special requirements for the pagetable, it only
needs to provide implementations of CMOs and memory allocation.
In the last version __arm_lpae_alloc_pages() implementation was fully
different between the hypervisor and the kernel, I was trying to
simplify that.

Thanks,
Mostafa

> Thanks,
> Robin.
>
> > [1] https://lore.kernel.org/linux-iommu/20251117184815.1027271-1-smostafa@google.com/
> > [2] https://lore.kernel.org/linux-iommu/20251216005834.GC31492@ziepe.ca/
> > [3] https://lore.kernel.org/linux-iommu/7c5584d3efa61ee6b0b87efb72f24f32852aafb7.1438195011.git.robin.murphy@arm.com/
> >
> > Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
> > Signed-off-by: Mostafa Saleh <smostafa@google.com>
> > ---
> >   drivers/iommu/io-pgtable-arm.c | 39 +++++++---------------------------
> >   1 file changed, 8 insertions(+), 31 deletions(-)
> >
> > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> > index e6626004b323..0da9195155ec 100644
> > --- a/drivers/iommu/io-pgtable-arm.c
> > +++ b/drivers/iommu/io-pgtable-arm.c
> > @@ -15,7 +15,7 @@
> >   #include <linux/sizes.h>
> >   #include <linux/slab.h>
> >   #include <linux/types.h>
> > -#include <linux/dma-mapping.h>
> > +#include <linux/dma-map-ops.h>
> >
> >   #include <asm/barrier.h>
> >
> > @@ -254,7 +254,6 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
> >   {
> >       struct device *dev = cfg->iommu_dev;
> >       size_t alloc_size;
> > -     dma_addr_t dma;
> >       void *pages;
> >
> >       /*
> > @@ -271,32 +270,10 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
> >       if (!pages)
> >               return NULL;
> >
> > -     if (!cfg->coherent_walk) {
> > -             dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
> > -             if (dma_mapping_error(dev, dma))
> > -                     goto out_free;
> > -             /*
> > -              * We depend on the IOMMU being able to work with any physical
> > -              * address directly, so if the DMA layer suggests otherwise by
> > -              * translating or truncating them, that bodes very badly...
> > -              */
> > -             if (dma != virt_to_phys(pages))
> > -                     goto out_unmap;
> > -     }
> > -
> > +     if (!cfg->coherent_walk)
> > +             arch_sync_dma_for_device(__arm_lpae_dma_addr(pages), size,
> > +                                      DMA_TO_DEVICE);
> >       return pages;
> > -
> > -out_unmap:
> > -     dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
> > -     dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
> > -
> > -out_free:
> > -     if (cfg->free)
> > -             cfg->free(cookie, pages, size);
> > -     else
> > -             iommu_free_pages(pages);
> > -
> > -     return NULL;
> >   }
> >
> >   static void __arm_lpae_free_pages(void *pages, size_t size,
> > @@ -304,8 +281,8 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
> >                                 void *cookie)
> >   {
> >       if (!cfg->coherent_walk)
> > -             dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
> > -                              size, DMA_TO_DEVICE);
> > +             arch_sync_dma_for_cpu(__arm_lpae_dma_addr(pages),
> > +                                   size, DMA_TO_DEVICE);
> >
> >       if (cfg->free)
> >               cfg->free(cookie, pages, size);
> > @@ -316,8 +293,8 @@ static void __arm_lpae_free_pages(void *pages, size_t size,
> >   static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
> >                               struct io_pgtable_cfg *cfg)
> >   {
> > -     dma_sync_single_for_device(cfg->iommu_dev, __arm_lpae_dma_addr(ptep),
> > -                                sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
> > +     arch_sync_dma_for_device(__arm_lpae_dma_addr(ptep),
> > +                              sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
> >   }
> >
> >   static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries)
>