From: Leon Romanovsky <leonro@nvidia.com>
Perform mechanical conversion from .map_page to .map_phys callback.
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/parisc/ccio-dma.c | 25 +++++++++++++------------
drivers/parisc/sba_iommu.c | 23 ++++++++++++-----------
2 files changed, 25 insertions(+), 23 deletions(-)
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index feef537257d0..d45f3634f827 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -773,17 +773,18 @@ ccio_map_single(struct device *dev, void *addr, size_t size,
static dma_addr_t
-ccio_map_page(struct device *dev, struct page *page, unsigned long offset,
- size_t size, enum dma_data_direction direction,
- unsigned long attrs)
+ccio_map_phys(struct device *dev, phys_addr_t phys, size_t size,
+ enum dma_data_direction direction, unsigned long attrs)
{
- return ccio_map_single(dev, page_address(page) + offset, size,
- direction);
+ if (attrs & DMA_ATTR_MMIO)
+ return DMA_MAPPING_ERROR;
+
+ return ccio_map_single(dev, phys_to_virt(phys), size, direction);
}
/**
- * ccio_unmap_page - Unmap an address range from the IOMMU.
+ * ccio_unmap_phys - Unmap an address range from the IOMMU.
* @dev: The PCI device.
* @iova: The start address of the DMA region.
* @size: The length of the DMA region.
@@ -791,7 +792,7 @@ ccio_map_page(struct device *dev, struct page *page, unsigned long offset,
* @attrs: attributes
*/
static void
-ccio_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
+ccio_unmap_phys(struct device *dev, dma_addr_t iova, size_t size,
enum dma_data_direction direction, unsigned long attrs)
{
struct ioc *ioc;
@@ -873,7 +874,7 @@ static void
ccio_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_handle, unsigned long attrs)
{
- ccio_unmap_page(dev, dma_handle, size, 0, 0);
+ ccio_unmap_phys(dev, dma_handle, size, 0, 0);
free_pages((unsigned long)cpu_addr, get_order(size));
}
@@ -1004,7 +1005,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
#ifdef CCIO_COLLECT_STATS
ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT;
#endif
- ccio_unmap_page(dev, sg_dma_address(sglist),
+ ccio_unmap_phys(dev, sg_dma_address(sglist),
sg_dma_len(sglist), direction, 0);
++sglist;
nents--;
@@ -1017,8 +1018,8 @@ static const struct dma_map_ops ccio_ops = {
.dma_supported = ccio_dma_supported,
.alloc = ccio_alloc,
.free = ccio_free,
- .map_page = ccio_map_page,
- .unmap_page = ccio_unmap_page,
+ .map_phys = ccio_map_phys,
+ .unmap_phys = ccio_unmap_phys,
.map_sg = ccio_map_sg,
.unmap_sg = ccio_unmap_sg,
.get_sgtable = dma_common_get_sgtable,
@@ -1072,7 +1073,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
ioc->msingle_calls, ioc->msingle_pages,
(int)((ioc->msingle_pages * 1000)/ioc->msingle_calls));
- /* KLUGE - unmap_sg calls unmap_page for each mapped page */
+ /* KLUGE - unmap_sg calls unmap_phys for each mapped page */
min = ioc->usingle_calls - ioc->usg_calls;
max = ioc->usingle_pages - ioc->usg_pages;
seq_printf(m, "pci_unmap_single: %8ld calls %8ld pages (avg %d/1000)\n",
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index fc3863c09f83..8040aa4e6ff4 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -778,17 +778,18 @@ sba_map_single(struct device *dev, void *addr, size_t size,
static dma_addr_t
-sba_map_page(struct device *dev, struct page *page, unsigned long offset,
- size_t size, enum dma_data_direction direction,
- unsigned long attrs)
+sba_map_phys(struct device *dev, phys_addr_t phys, size_t size,
+ enum dma_data_direction direction, unsigned long attrs)
{
- return sba_map_single(dev, page_address(page) + offset, size,
- direction);
+ if (attrs & DMA_ATTR_MMIO)
+ return DMA_MAPPING_ERROR;
+
+ return sba_map_single(dev, phys_to_virt(phys), size, direction);
}
/**
- * sba_unmap_page - unmap one IOVA and free resources
+ * sba_unmap_phys - unmap one IOVA and free resources
* @dev: instance of PCI owned by the driver that's asking.
* @iova: IOVA of driver buffer previously mapped.
* @size: number of bytes mapped in driver buffer.
@@ -798,7 +799,7 @@ sba_map_page(struct device *dev, struct page *page, unsigned long offset,
* See Documentation/core-api/dma-api-howto.rst
*/
static void
-sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
+sba_unmap_phys(struct device *dev, dma_addr_t iova, size_t size,
enum dma_data_direction direction, unsigned long attrs)
{
struct ioc *ioc;
@@ -914,7 +915,7 @@ static void
sba_free(struct device *hwdev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
{
- sba_unmap_page(hwdev, dma_handle, size, 0, 0);
+ sba_unmap_phys(hwdev, dma_handle, size, 0, 0);
free_pages((unsigned long) vaddr, get_order(size));
}
@@ -1061,7 +1062,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
while (nents && sg_dma_len(sglist)) {
- sba_unmap_page(dev, sg_dma_address(sglist), sg_dma_len(sglist),
+ sba_unmap_phys(dev, sg_dma_address(sglist), sg_dma_len(sglist),
direction, 0);
#ifdef SBA_COLLECT_STATS
ioc->usg_pages += ((sg_dma_address(sglist) & ~IOVP_MASK) + sg_dma_len(sglist) + IOVP_SIZE - 1) >> PAGE_SHIFT;
@@ -1085,8 +1086,8 @@ static const struct dma_map_ops sba_ops = {
.dma_supported = sba_dma_supported,
.alloc = sba_alloc,
.free = sba_free,
- .map_page = sba_map_page,
- .unmap_page = sba_unmap_page,
+ .map_phys = sba_map_phys,
+ .unmap_phys = sba_unmap_phys,
.map_sg = sba_map_sg,
.unmap_sg = sba_unmap_sg,
.get_sgtable = dma_common_get_sgtable,
--
2.51.0
On Sun, Sep 28, 2025 at 06:02:23PM +0300, Leon Romanovsky wrote:
> +ccio_map_phys(struct device *dev, phys_addr_t phys, size_t size,
> + enum dma_data_direction direction, unsigned long attrs)
> {
> - return ccio_map_single(dev, page_address(page) + offset, size,
> - direction);
> + if (attrs & DMA_ATTR_MMIO)
> + return DMA_MAPPING_ERROR;
> +
> + return ccio_map_single(dev, phys_to_virt(phys), size, direction);
This doesn't actually use the virt at all:
offset = ((unsigned long) addr) & ~IOVP_MASK;
if((size % L1_CACHE_BYTES) || ((unsigned long)addr % L1_CACHE_BYTES))
ccio_io_pdir_entry(pdir_start, KERNEL_SPACE, (unsigned long)addr, hint);
And ccio_io_pdir_entry():
pa = lpa(vba);
Is a special instruction that uses virt but AI tells me that special
LPA instruction is returning phys. Not sure if that is a different
value than virt_to_phys()..
IDK, I'm not feeling brave enough to drop the LPA but maybe include
this note in the commit message.
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Jason
On Fri, 2025-10-03 at 12:01 -0300, Jason Gunthorpe wrote:
> On Sun, Sep 28, 2025 at 06:02:23PM +0300, Leon Romanovsky wrote:
> > +ccio_map_phys(struct device *dev, phys_addr_t phys, size_t size,
> > + enum dma_data_direction direction, unsigned long
> > attrs)
> > {
> > - return ccio_map_single(dev, page_address(page) + offset,
> > size,
> > - direction);
> > + if (attrs & DMA_ATTR_MMIO)
> > + return DMA_MAPPING_ERROR;
> > +
> > + return ccio_map_single(dev, phys_to_virt(phys), size,
> > direction);
>
> This doesn't actually use the virt at all:
>
> offset = ((unsigned long) addr) & ~IOVP_MASK;
> if((size % L1_CACHE_BYTES) || ((unsigned long)addr %
> L1_CACHE_BYTES))
> ccio_io_pdir_entry(pdir_start, KERNEL_SPACE,
> (unsigned long)addr, hint);
Actually, it does: it has to; parisc caches are VIPT. The iommu needs
to know both the physical address and the virtual tag (also called the
coherence index) to instruct the CPU to flush its cache. The sole use
of the vba is in ccio_io_pdir_entry() which programs the IOMMU page
table. The coherence index is pretty small because the largest VIPT
cache stride parisc has is 4MB and obviously the lower 12 bits (page
offset) are the same for both physical and virtual, so it's only the 10
bits between 4k and 4M that the iommu needs (the entry is 12 bits
because architecturally there were chips with a 16M stride planned for
but never produced).
>
> And ccio_io_pdir_entry():
> pa = lpa(vba);
>
> Is a special instruction that uses virt but AI tells me that special
> LPA instruction is returning phys. Not sure if that is a different
> value than virt_to_phys()..
That's right, so if you want to pass both phys and virt addresses to
the function, this could be dropped.
Regards,
James
On Fri, Oct 03, 2025 at 12:01:44PM -0300, Jason Gunthorpe wrote:
> On Sun, Sep 28, 2025 at 06:02:23PM +0300, Leon Romanovsky wrote:
> > +ccio_map_phys(struct device *dev, phys_addr_t phys, size_t size,
> > + enum dma_data_direction direction, unsigned long attrs)
> > {
> > - return ccio_map_single(dev, page_address(page) + offset, size,
> > - direction);
> > + if (attrs & DMA_ATTR_MMIO)
> > + return DMA_MAPPING_ERROR;
> > +
> > + return ccio_map_single(dev, phys_to_virt(phys), size, direction);
>
> This doesn't actually use the virt at all:
>
> offset = ((unsigned long) addr) & ~IOVP_MASK;
> if((size % L1_CACHE_BYTES) || ((unsigned long)addr % L1_CACHE_BYTES))
> ccio_io_pdir_entry(pdir_start, KERNEL_SPACE, (unsigned long)addr, hint);
>
> And ccio_io_pdir_entry():
> pa = lpa(vba);
>
> Is a special instruction that uses virt but AI tells me that special
> LPA instruction is returning phys. Not sure if that is a different
> value than virt_to_phys()..
>
> IDK, I'm not feeling brave enough to drop the LPA but maybe include
> this note in the commit message.
>
It looks like I was chosen as a volunteer to do so. WDYT?
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index b00f6fc49063..4d73e67fbd54 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -517,10 +517,10 @@ static u32 hint_lookup[] = {
* ccio_io_pdir_entry - Initialize an I/O Pdir.
* @pdir_ptr: A pointer into I/O Pdir.
* @sid: The Space Identifier.
- * @vba: The virtual address.
+ * @pba: The physical address.
* @hints: The DMA Hint.
*
- * Given a virtual address (vba, arg2) and space id, (sid, arg1),
+ * Given a physical address (pba, arg2) and space id, (sid, arg1),
* load the I/O PDIR entry pointed to by pdir_ptr (arg0). Each IO Pdir
* entry consists of 8 bytes as shown below (MSB == bit 0):
*
@@ -543,7 +543,7 @@ static u32 hint_lookup[] = {
* index are bits 12:19 of the value returned by LCI.
*/
static void
-ccio_io_pdir_entry(__le64 *pdir_ptr, space_t sid, unsigned long vba,
+ccio_io_pdir_entry(__le64 *pdir_ptr, space_t sid, phys_addr_t pba,
unsigned long hints)
{
register unsigned long pa;
@@ -557,7 +557,7 @@ ccio_io_pdir_entry(__le64 *pdir_ptr, space_t sid, unsigned long vba,
** "hints" parm includes the VALID bit!
** "dep" clobbers the physical address offset bits as well.
*/
- pa = lpa(vba);
+ pa = pba;
asm volatile("depw %1,31,12,%0" : "+r" (pa) : "r" (hints));
((u32 *)pdir_ptr)[1] = (u32) pa;
@@ -582,7 +582,7 @@ ccio_io_pdir_entry(__le64 *pdir_ptr, space_t sid, unsigned long vba,
** Grab virtual index [0:11]
** Deposit virt_idx bits into I/O PDIR word
*/
- asm volatile ("lci %%r0(%1), %0" : "=r" (ci) : "r" (vba));
+ asm volatile ("lci %%r0(%1), %0" : "=r" (ci) : "r" (pba));
asm volatile ("extru %1,19,12,%0" : "+r" (ci) : "r" (ci));
asm volatile ("depw %1,15,12,%0" : "+r" (pa) : "r" (ci));
@@ -704,14 +704,14 @@ ccio_dma_supported(struct device *dev, u64 mask)
/**
* ccio_map_single - Map an address range into the IOMMU.
* @dev: The PCI device.
- * @addr: The start address of the DMA region.
+ * @addr: The physical address of the DMA region.
* @size: The length of the DMA region.
* @direction: The direction of the DMA transaction (to/from device).
*
* This function implements the pci_map_single function.
*/
static dma_addr_t
-ccio_map_single(struct device *dev, void *addr, size_t size,
+ccio_map_single(struct device *dev, phys_addr_t addr, size_t size,
enum dma_data_direction direction)
{
int idx;
@@ -730,7 +730,7 @@ ccio_map_single(struct device *dev, void *addr, size_t size,
BUG_ON(size <= 0);
/* save offset bits */
- offset = ((unsigned long) addr) & ~IOVP_MASK;
+ offset = offset_in_page(addr);
/* round up to nearest IOVP_SIZE */
size = ALIGN(size + offset, IOVP_SIZE);
@@ -746,15 +746,15 @@ ccio_map_single(struct device *dev, void *addr, size_t size,
pdir_start = &(ioc->pdir_base[idx]);
- DBG_RUN("%s() %px -> %#lx size: %zu\n",
- __func__, addr, (long)(iovp | offset), size);
+ DBG_RUN("%s() %pa -> %#lx size: %zu\n",
+ __func__, &addr, (long)(iovp | offset), size);
/* If not cacheline aligned, force SAFE_DMA on the whole mess */
- if((size % L1_CACHE_BYTES) || ((unsigned long)addr % L1_CACHE_BYTES))
+ if((size % L1_CACHE_BYTES) || (addr % L1_CACHE_BYTES))
hint |= HINT_SAFE_DMA;
while(size > 0) {
- ccio_io_pdir_entry(pdir_start, KERNEL_SPACE, (unsigned long)addr, hint);
+ ccio_io_pdir_entry(pdir_start, KERNEL_SPACE, addr, hint);
DBG_RUN(" pdir %p %08x%08x\n",
pdir_start,
@@ -779,7 +779,7 @@ ccio_map_phys(struct device *dev, phys_addr_t phys, size_t size,
if (unlikely(attrs & DMA_ATTR_MMIO))
return DMA_MAPPING_ERROR;
- return ccio_map_single(dev, phys_to_virt(phys), size, direction);
+ return ccio_map_single(dev, phys, size, direction);
}
@@ -854,7 +854,8 @@ ccio_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag,
if (ret) {
memset(ret, 0, size);
- *dma_handle = ccio_map_single(dev, ret, size, DMA_BIDIRECTIONAL);
+ *dma_handle = ccio_map_single(dev, virt_to_phys(ret), size,
+ DMA_BIDIRECTIONAL);
}
return ret;
@@ -921,7 +922,7 @@ ccio_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
/* Fast path single entry scatterlists. */
if (nents == 1) {
sg_dma_address(sglist) = ccio_map_single(dev,
- sg_virt(sglist), sglist->length,
+ sg_phys(sglist), sglist->length,
direction);
sg_dma_len(sglist) = sglist->length;
return 1;
>
> Jason
>
On Sun, Oct 05, 2025 at 04:22:59PM +0300, Leon Romanovsky wrote:
> @@ -582,7 +582,7 @@ ccio_io_pdir_entry(__le64 *pdir_ptr, space_t sid, unsigned long vba,
> ** Grab virtual index [0:11]
> ** Deposit virt_idx bits into I/O PDIR word
> */
> - asm volatile ("lci %%r0(%1), %0" : "=r" (ci) : "r" (vba));
> + asm volatile ("lci %%r0(%1), %0" : "=r" (ci) : "r" (pba));
Don't know how I missed this, but this is the virtual address for the
cache invalidate James mentioned
So the optimal is to drop the lpa() and to use phys_to_virt() to get
vba for this instruction.
Jason
On Mon, Oct 6, 2025, at 02:31, Jason Gunthorpe wrote:
> On Sun, Oct 05, 2025 at 04:22:59PM +0300, Leon Romanovsky wrote:
>> @@ -582,7 +582,7 @@ ccio_io_pdir_entry(__le64 *pdir_ptr, space_t sid, unsigned long vba,
>> ** Grab virtual index [0:11]
>> ** Deposit virt_idx bits into I/O PDIR word
>> */
>> - asm volatile ("lci %%r0(%1), %0" : "=r" (ci) : "r" (vba));
>> + asm volatile ("lci %%r0(%1), %0" : "=r" (ci) : "r" (pba));
>
> Don't know how I missed this, but this is the virtual address for the
> cache invalidate James mentioned
>
> So the optimal is to drop the lpa() and to use phys_to_virt() to get
> vba for this instruction.
The optimal is to keep parisc arch code as I did in v1 and don't change it too much.
>
> Jason
On 2025-10-03 11:01 a.m., Jason Gunthorpe wrote: > This doesn't actually use the virt at all: > > offset = ((unsigned long) addr) & ~IOVP_MASK; > if((size % L1_CACHE_BYTES) || ((unsigned long)addr % L1_CACHE_BYTES)) > ccio_io_pdir_entry(pdir_start, KERNEL_SPACE, (unsigned long)addr, hint); > > And ccio_io_pdir_entry(): > pa = lpa(vba); > > Is a special instruction that uses virt but AI tells me that special > LPA instruction is returning phys. Not sure if that is a different > value than virt_to_phys().. ccio_io_pdir_entry currently only supports KERNEL_SPACE. For KERNEL_SPACE, lpa() and virt_to_phys() are equivalent if page is mapped. lpa() returns 0 if a non-access data TLB fault occurs (i.e., page isn't mapped). Not sure if that matters. > IDK, I'm not feeling brave enough to drop the LPA but maybe include > this note in the commit message. > -- John David Anglin dave.anglin@bell.net
On Fri, 2025-10-03 at 13:18 -0400, John David Anglin wrote: > On 2025-10-03 11:01 a.m., Jason Gunthorpe wrote: > > This doesn't actually use the virt at all: > > > > offset = ((unsigned long) addr) & ~IOVP_MASK; > > if((size % L1_CACHE_BYTES) || ((unsigned long)addr % > > L1_CACHE_BYTES)) > > ccio_io_pdir_entry(pdir_start, KERNEL_SPACE, > > (unsigned long)addr, hint); > > > > And ccio_io_pdir_entry(): > > pa = lpa(vba); > > > > Is a special instruction that uses virt but AI tells me that > > special LPA instruction is returning phys. Not sure if that is a > > different value than virt_to_phys().. > > ccio_io_pdir_entry currently only supports KERNEL_SPACE. Actually there's a bit more nuance to it than that. Obviously DMA has to support user pages otherwise I/O wouldn't work. The way it does is that all physical pages are mapped in the kernel and we try to make sure all user mappings are on cache stride (4MB) boundaries so the coherence index of the kernel virtual address and the user virtual address are the same, so we can solely use the kernel virtual address to calculate the coherence index for the IOMMU. If that's not true, we flush the user virtual address range in gup and the kernel virtual address range before sending the I/O completion. Regards, James
On Fri, Oct 03, 2025 at 01:18:32PM -0400, John David Anglin wrote: > On 2025-10-03 11:01 a.m., Jason Gunthorpe wrote: > > This doesn't actually use the virt at all: > > > > offset = ((unsigned long) addr) & ~IOVP_MASK; > > if((size % L1_CACHE_BYTES) || ((unsigned long)addr % L1_CACHE_BYTES)) > > ccio_io_pdir_entry(pdir_start, KERNEL_SPACE, (unsigned long)addr, hint); > > > > And ccio_io_pdir_entry(): > > pa = lpa(vba); > > > > Is a special instruction that uses virt but AI tells me that special > > LPA instruction is returning phys. Not sure if that is a different > > value than virt_to_phys().. > > ccio_io_pdir_entry currently only supports KERNEL_SPACE. For KERNEL_SPACE, lpa() and > virt_to_phys() are equivalent if page is mapped. lpa() returns 0 if a non-access data > TLB fault occurs (i.e., page isn't mapped). Not sure if that > matters. After unfolding everything the expression is lpa(phys_to_virt(pa)) So if that is always equal to pa then lets just drop it. phys_to_virt() always returns something kernel mapped, and it can't be unmapped. Jason
On 2025-10-03 1:26 p.m., Jason Gunthorpe wrote: > On Fri, Oct 03, 2025 at 01:18:32PM -0400, John David Anglin wrote: >> On 2025-10-03 11:01 a.m., Jason Gunthorpe wrote: >>> This doesn't actually use the virt at all: >>> >>> offset = ((unsigned long) addr) & ~IOVP_MASK; >>> if((size % L1_CACHE_BYTES) || ((unsigned long)addr % L1_CACHE_BYTES)) >>> ccio_io_pdir_entry(pdir_start, KERNEL_SPACE, (unsigned long)addr, hint); >>> >>> And ccio_io_pdir_entry(): >>> pa = lpa(vba); >>> >>> Is a special instruction that uses virt but AI tells me that special >>> LPA instruction is returning phys. Not sure if that is a different >>> value than virt_to_phys().. >> >> ccio_io_pdir_entry currently only supports KERNEL_SPACE. For KERNEL_SPACE, lpa() and >> virt_to_phys() are equivalent if page is mapped. lpa() returns 0 if a non-access data >> TLB fault occurs (i.e., page isn't mapped). Not sure if that >> matters. > > After unfolding everything the expression is > > lpa(phys_to_virt(pa)) > > So if that is always equal to pa then lets just drop it. If lpa() was replaced by virt_to_phys(), the result would always be equal to pa. So, I think it can be dropped. Dave -- John David Anglin dave.anglin@bell.net
© 2016 - 2026 Red Hat, Inc.