[PATCH V1] accel/amdxdna: Add carveout memory support for non-IOMMU systems

Lizhi Hou posted 1 patch 1 month, 4 weeks ago
There is a newer version of this series
drivers/accel/amdxdna/Makefile          |   1 +
drivers/accel/amdxdna/amdxdna_cbuf.c    | 249 ++++++++++++++++++++++++
drivers/accel/amdxdna/amdxdna_cbuf.h    |  16 ++
drivers/accel/amdxdna/amdxdna_gem.c     |  95 +++++++--
drivers/accel/amdxdna/amdxdna_iommu.c   |  77 +++++---
drivers/accel/amdxdna/amdxdna_pci_drv.c |  91 ++++++---
drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +-
7 files changed, 454 insertions(+), 79 deletions(-)
create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.c
create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.h
[PATCH V1] accel/amdxdna: Add carveout memory support for non-IOMMU systems
Posted by Lizhi Hou 1 month, 4 weeks ago
From: Max Zhen <max.zhen@amd.com>

Add support for allocating buffers from reserved carveout memory when
IOMMU is not available. This is useful during debugging or bring-up.

In this configuration, the device uses physical addresses and does
not support scatter-gather lists, requiring physically contiguous
buffers.

Implement carveout-backed allocation and integrate it into buffer
management to support operation in physical address mode.

Signed-off-by: Max Zhen <max.zhen@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/Makefile          |   1 +
 drivers/accel/amdxdna/amdxdna_cbuf.c    | 249 ++++++++++++++++++++++++
 drivers/accel/amdxdna/amdxdna_cbuf.h    |  16 ++
 drivers/accel/amdxdna/amdxdna_gem.c     |  95 +++++++--
 drivers/accel/amdxdna/amdxdna_iommu.c   |  77 +++++---
 drivers/accel/amdxdna/amdxdna_pci_drv.c |  91 ++++++---
 drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +-
 7 files changed, 454 insertions(+), 79 deletions(-)
 create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.c
 create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.h

diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
index 79369e497540..a055aea36971 100644
--- a/drivers/accel/amdxdna/Makefile
+++ b/drivers/accel/amdxdna/Makefile
@@ -12,6 +12,7 @@ amdxdna-y := \
 	aie2_solver.o \
 	aie4_message.o \
 	aie4_pci.o \
+	amdxdna_cbuf.o \
 	amdxdna_ctx.o \
 	amdxdna_gem.o \
 	amdxdna_iommu.o \
diff --git a/drivers/accel/amdxdna/amdxdna_cbuf.c b/drivers/accel/amdxdna/amdxdna_cbuf.c
new file mode 100644
index 000000000000..4a556199a461
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_cbuf.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/drm_mm.h>
+#include <drm/drm_prime.h>
+
+#include "amdxdna_cbuf.h"
+#include "amdxdna_pci_drv.h"
+
+/*
+ * This is a platform debug/bringup feature.
+ *
+ * Carveout memory is a chunk of memory which is physically contiguous and
+ * is reserved during early boot time. There is only one chunk of such memory
+ * per system. Once available, all BOs accessible from device should be
+ * allocated from this memory.
+ */
+u64 carveout_addr;
+module_param(carveout_addr, ullong, 0400);
+MODULE_PARM_DESC(carveout_addr, "Physical memory address for reserved memory chunk");
+
+u64 carveout_size;
+module_param(carveout_size, ullong, 0400);
+MODULE_PARM_DESC(carveout_size, "Physical memory size for reserved memory chunk");
+
+struct amdxdna_carveout {
+	struct drm_mm	mm;
+	struct mutex	lock; /* protect mm */
+} carveout;
+
+bool amdxdna_use_carveout(void)
+{
+	return !!carveout_size;
+}
+
+void amdxdna_carveout_init(void)
+{
+	if (!amdxdna_use_carveout())
+		return;
+	mutex_init(&carveout.lock);
+	drm_mm_init(&carveout.mm, carveout_addr, carveout_size);
+	pr_info("Use carveout mem, addr=0x%llx, size=0x%llx\n", carveout_addr, carveout_size);
+}
+
+void amdxdna_carveout_fini(void)
+{
+	if (!amdxdna_use_carveout())
+		return;
+	drm_mm_takedown(&carveout.mm);
+	mutex_destroy(&carveout.lock);
+}
+
+struct amdxdna_cbuf_priv {
+	struct drm_mm_node node;
+};
+
+static struct sg_table *amdxdna_cbuf_map(struct dma_buf_attachment *attach,
+					 enum dma_data_direction direction)
+{
+	struct amdxdna_cbuf_priv *cbuf = attach->dmabuf->priv;
+	struct device *dev = attach->dev;
+	struct scatterlist *sgl, *sg;
+	int ret, n_entries, i;
+	struct sg_table *sgt;
+	dma_addr_t dma_addr;
+	size_t dma_size;
+	size_t max_seg;
+
+	sgt = kzalloc_obj(*sgt);
+	if (!sgt)
+		return ERR_PTR(-ENOMEM);
+
+	max_seg = min_t(size_t, UINT_MAX, dma_max_mapping_size(dev));
+	n_entries = (cbuf->node.size + max_seg - 1) / max_seg;
+	sgl = kzalloc_objs(*sg, n_entries);
+	if (!sgl) {
+		ret = -ENOMEM;
+		goto free_sgt;
+	}
+	sg_init_table(sgl, n_entries);
+	sgt->orig_nents = n_entries;
+	sgt->nents = n_entries;
+	sgt->sgl = sgl;
+
+	dma_size = cbuf->node.size;
+	dma_addr = dma_map_resource(dev, cbuf->node.start, dma_size,
+				    direction, DMA_ATTR_SKIP_CPU_SYNC);
+	ret = dma_mapping_error(dev, dma_addr);
+	if (ret) {
+		pr_err("Failed to dma_map_resource carveout dma buf, ret %d\n", ret);
+		goto free_sgl;
+	}
+
+	for_each_sgtable_dma_sg(sgt, sg, i) {
+		size_t len = min_t(size_t, max_seg, dma_size);
+
+		sg_dma_address(sg) = dma_addr;
+		sg_dma_len(sg) = len;
+		dma_addr += len;
+		dma_size -= len;
+	}
+
+	return sgt;
+
+free_sgl:
+	kfree(sgl);
+free_sgt:
+	kfree(sgt);
+	return ERR_PTR(ret);
+}
+
+static void amdxdna_cbuf_unmap(struct dma_buf_attachment *attach,
+			       struct sg_table *sgt,
+			       enum dma_data_direction direction)
+{
+	dma_unmap_resource(attach->dev, sg_dma_address(sgt->sgl),
+			   drm_prime_get_contiguous_size(sgt), direction,
+			   DMA_ATTR_SKIP_CPU_SYNC);
+	sg_free_table(sgt);
+	kfree(sgt);
+}
+
+static void amdxdna_cbuf_release(struct dma_buf *dbuf)
+{
+	struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
+
+	mutex_lock(&carveout.lock);
+	drm_mm_remove_node(&cbuf->node);
+	mutex_unlock(&carveout.lock);
+
+	kfree(cbuf);
+}
+
+static vm_fault_t amdxdna_cbuf_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct amdxdna_cbuf_priv *cbuf;
+	unsigned long pfn;
+	pgoff_t pgoff;
+
+	cbuf = vma->vm_private_data;
+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+	pfn = (cbuf->node.start >> PAGE_SHIFT) + pgoff;
+
+	return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+static const struct vm_operations_struct amdxdna_cbuf_vm_ops = {
+	.fault = amdxdna_cbuf_vm_fault,
+};
+
+static int amdxdna_cbuf_mmap(struct dma_buf *dbuf, struct vm_area_struct *vma)
+{
+	struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
+
+	vma->vm_ops = &amdxdna_cbuf_vm_ops;
+	vma->vm_private_data = cbuf;
+	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
+
+	return 0;
+}
+
+static int amdxdna_cbuf_vmap(struct dma_buf *dbuf, struct iosys_map *map)
+{
+	struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
+	void *kva;
+
+	kva = memremap(cbuf->node.start, cbuf->node.size, MEMREMAP_WB);
+	if (!kva) {
+		pr_err("Failed to vmap carveout dma buf\n");
+		return -ENOMEM;
+	}
+
+	iosys_map_set_vaddr(map, kva);
+	return 0;
+}
+
+static void amdxdna_cbuf_vunmap(struct dma_buf *dbuf, struct iosys_map *map)
+{
+	memunmap(map->vaddr);
+}
+
+static const struct dma_buf_ops amdxdna_cbuf_dmabuf_ops = {
+	.map_dma_buf = amdxdna_cbuf_map,
+	.unmap_dma_buf = amdxdna_cbuf_unmap,
+	.release = amdxdna_cbuf_release,
+	.mmap = amdxdna_cbuf_mmap,
+	.vmap = amdxdna_cbuf_vmap,
+	.vunmap = amdxdna_cbuf_vunmap,
+};
+
+static int amdxdna_cbuf_clear(struct dma_buf *dbuf)
+{
+	struct iosys_map vmap = IOSYS_MAP_INIT_VADDR(NULL);
+
+	dma_buf_vmap(dbuf, &vmap);
+	if (!vmap.vaddr)
+		return -EFAULT;
+
+	memset(vmap.vaddr, 0, dbuf->size);
+	dma_buf_vunmap(dbuf, &vmap);
+
+	return 0;
+}
+
+struct dma_buf *amdxdna_get_cbuf(struct drm_device *dev, size_t size, u64 alignment)
+{
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct amdxdna_cbuf_priv *cbuf;
+	struct dma_buf *dbuf;
+	int ret;
+
+	cbuf = kzalloc_obj(*cbuf);
+	if (!cbuf)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&carveout.lock);
+	ret = drm_mm_insert_node_generic(&carveout.mm, &cbuf->node, size,
+					 alignment, 0, DRM_MM_INSERT_BEST);
+	mutex_unlock(&carveout.lock);
+	if (ret)
+		goto free_cbuf;
+
+	exp_info.size = size;
+	exp_info.ops = &amdxdna_cbuf_dmabuf_ops;
+	exp_info.priv = cbuf;
+	exp_info.flags = O_RDWR;
+	dbuf = dma_buf_export(&exp_info);
+	if (IS_ERR(dbuf)) {
+		ret = PTR_ERR(dbuf);
+		goto remove_node;
+	}
+
+	ret = amdxdna_cbuf_clear(dbuf);
+	if (ret) {
+		dma_buf_put(dbuf);
+		goto out;
+	}
+	return dbuf;
+
+remove_node:
+	drm_mm_remove_node(&cbuf->node);
+free_cbuf:
+	kfree(cbuf);
+out:
+	return ERR_PTR(ret);
+}
diff --git a/drivers/accel/amdxdna/amdxdna_cbuf.h b/drivers/accel/amdxdna/amdxdna_cbuf.h
new file mode 100644
index 000000000000..15e189ce779e
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_cbuf.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2026, Advanced Micro Devices, Inc.
+ */
+#ifndef _AMDXDNA_CBUF_H_
+#define _AMDXDNA_CBUF_H_
+
+#include <drm/drm_device.h>
+#include <linux/dma-buf.h>
+
+bool amdxdna_use_carveout(void);
+void amdxdna_carveout_init(void);
+void amdxdna_carveout_fini(void);
+struct dma_buf *amdxdna_get_cbuf(struct drm_device *dev, size_t size, u64 alignment);
+
+#endif
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index 238ee244d4a6..905514ec183c 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -16,6 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
 
+#include "amdxdna_cbuf.h"
 #include "amdxdna_ctx.h"
 #include "amdxdna_gem.h"
 #include "amdxdna_pci_drv.h"
@@ -516,10 +517,6 @@ static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
 static inline bool
 amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
 {
-	/* Do not count imported BOs since the buffer is not allocated by us. */
-	if (is_import_bo(abo))
-		return true;
-
 	/* Already counted as part of HEAP BO */
 	if (abo->type == AMDXDNA_BO_DEV)
 		return true;
@@ -571,9 +568,7 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
 	if (abo->type == AMDXDNA_BO_DEV_HEAP)
 		drm_mm_takedown(&abo->mm);
 
-	if (amdxdna_iova_on(xdna))
-		amdxdna_iommu_unmap_bo(xdna, abo);
-
+	amdxdna_dma_unmap_bo(xdna, abo);
 	amdxdna_gem_vunmap(abo);
 	mutex_destroy(&abo->lock);
 
@@ -591,18 +586,20 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
 
 	guard(mutex)(&abo->lock);
 	abo->open_ref++;
+	if (abo->open_ref > 1)
+		return 0;
 
-	if (abo->open_ref == 1) {
-		/* Attached to the client when first opened by it. */
-		abo->client = filp->driver_priv;
-		amdxdna_gem_add_bo_usage(abo);
-	}
-	if (amdxdna_iova_on(xdna)) {
-		ret = amdxdna_iommu_map_bo(xdna, abo);
+	/* Attached to the client when first opened by it. */
+	abo->client = filp->driver_priv;
+
+	/* No need to set up dma addr mapping in PASID mode. */
+	if (!amdxdna_pasid_on(abo->client)) {
+		ret = amdxdna_dma_map_bo(xdna, abo);
 		if (ret)
 			return ret;
 	}
 
+	amdxdna_gem_add_bo_usage(abo);
 	return 0;
 }
 
@@ -620,6 +617,39 @@ static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file *
 	}
 }
 
+static int amdxdna_gem_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
+{
+	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
+	int ret;
+
+	iosys_map_clear(map);
+
+	dma_resv_assert_held(obj->resv);
+
+	if (is_import_bo(abo))
+		ret = dma_buf_vmap(abo->dma_buf, map);
+	else
+		ret = drm_gem_shmem_object_vmap(obj, map);
+	if (ret)
+		return ret;
+	if (!map->vaddr)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void amdxdna_gem_obj_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
+{
+	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
+
+	dma_resv_assert_held(obj->resv);
+
+	if (is_import_bo(abo))
+		dma_buf_vunmap(abo->dma_buf, map);
+	else
+		drm_gem_shmem_object_vunmap(obj, map);
+}
+
 static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
 {
 	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
@@ -645,8 +675,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
 	.pin = drm_gem_shmem_object_pin,
 	.unpin = drm_gem_shmem_object_unpin,
 	.get_sg_table = drm_gem_shmem_object_get_sg_table,
-	.vmap = drm_gem_shmem_object_vmap,
-	.vunmap = drm_gem_shmem_object_vunmap,
+	.vmap = amdxdna_gem_obj_vmap,
+	.vunmap = amdxdna_gem_obj_vunmap,
 	.mmap = amdxdna_gem_obj_mmap,
 	.vm_ops = &drm_gem_shmem_vm_ops,
 	.export = amdxdna_gem_prime_export,
@@ -714,6 +744,36 @@ amdxdna_gem_create_ubuf_object(struct drm_device *dev, struct amdxdna_drm_create
 	return to_xdna_obj(gobj);
 }
 
+static struct amdxdna_gem_obj *
+amdxdna_gem_create_cbuf_object(struct drm_device *dev, struct amdxdna_drm_create_bo *args)
+{
+	struct amdxdna_dev *xdna = to_xdna_dev(dev);
+	size_t size = PAGE_ALIGN(args->size);
+	struct drm_gem_object *gobj;
+	struct amdxdna_gem_obj *ret;
+	struct dma_buf *dma_buf;
+	u64 align;
+
+	if (!size) {
+		XDNA_ERR(xdna, "Invalid BO size 0x%llx", args->size);
+		return ERR_PTR(-EINVAL);
+	}
+
+	align = (args->type == AMDXDNA_BO_DEV_HEAP) ?  xdna->dev_info->dev_mem_size : 0;
+	dma_buf = amdxdna_get_cbuf(dev, size, align);
+	if (IS_ERR(dma_buf))
+		return ERR_CAST(dma_buf);
+
+	gobj = amdxdna_gem_prime_import(dev, dma_buf);
+	if (IS_ERR(gobj))
+		ret = ERR_CAST(gobj);
+	else
+		ret = to_xdna_obj(gobj);
+
+	dma_buf_put(dma_buf);
+	return ret;
+}
+
 struct drm_gem_object *
 amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
 {
@@ -769,6 +829,8 @@ amdxdna_drm_create_share_bo(struct drm_device *dev,
 
 	if (args->vaddr)
 		abo = amdxdna_gem_create_ubuf_object(dev, args);
+	else if (amdxdna_use_carveout())
+		abo = amdxdna_gem_create_cbuf_object(dev, args);
 	else
 		abo = amdxdna_gem_create_shmem_object(dev, args);
 	if (IS_ERR(abo))
@@ -884,7 +946,6 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
 		 args->type, args->vaddr, args->size, args->flags);
 	switch (args->type) {
 	case AMDXDNA_BO_CMD:
-		fallthrough;
 	case AMDXDNA_BO_SHARE:
 		abo = amdxdna_drm_create_share_bo(dev, args, filp);
 		break;
diff --git a/drivers/accel/amdxdna/amdxdna_iommu.c b/drivers/accel/amdxdna/amdxdna_iommu.c
index 5a9f06183487..eff00131d0f8 100644
--- a/drivers/accel/amdxdna/amdxdna_iommu.c
+++ b/drivers/accel/amdxdna/amdxdna_iommu.c
@@ -35,14 +35,15 @@ static struct iova *amdxdna_iommu_alloc_iova(struct amdxdna_dev *xdna,
 	return iova;
 }
 
-int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
+int amdxdna_dma_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
 {
+	unsigned long contig_sz;
 	struct sg_table *sgt;
 	dma_addr_t dma_addr;
 	struct iova *iova;
 	ssize_t size;
 
-	if (abo->type != AMDXDNA_BO_DEV_HEAP && abo->type != AMDXDNA_BO_SHMEM)
+	if (abo->type != AMDXDNA_BO_DEV_HEAP && abo->type != AMDXDNA_BO_SHARE)
 		return 0;
 
 	sgt = drm_gem_shmem_get_pages_sgt(&abo->base);
@@ -51,47 +52,63 @@ int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
 		return PTR_ERR(sgt);
 	}
 
-	if (!sgt->orig_nents || !sg_page(sgt->sgl)) {
-		XDNA_ERR(xdna, "sgl is zero length or not page backed");
+	if (!sgt->orig_nents) {
+		XDNA_ERR(xdna, "sgl is zero length");
 		return -EOPNOTSUPP;
 	}
 
-	iova = amdxdna_iommu_alloc_iova(xdna, abo->mem.size, &dma_addr,
-					(abo->type == AMDXDNA_BO_DEV_HEAP));
-	if (IS_ERR(iova)) {
-		XDNA_ERR(xdna, "Alloc iova failed, ret %ld", PTR_ERR(iova));
-		return PTR_ERR(iova);
+	if (amdxdna_iova_on(xdna)) {
+		if (!sg_page(sgt->sgl)) {
+			XDNA_ERR(xdna, "sgl is not page backed");
+			return -EOPNOTSUPP;
+		}
+
+		iova = amdxdna_iommu_alloc_iova(xdna, abo->mem.size, &dma_addr,
+						(abo->type == AMDXDNA_BO_DEV_HEAP));
+		if (IS_ERR(iova)) {
+			XDNA_ERR(xdna, "Alloc iova failed, ret %ld", PTR_ERR(iova));
+			return PTR_ERR(iova);
+		}
+
+		size = iommu_map_sgtable(xdna->domain, dma_addr, sgt,
+					 IOMMU_READ | IOMMU_WRITE);
+		if (size < 0) {
+			XDNA_ERR(xdna, "iommu_map_sgtable failed: %zd", size);
+			__free_iova(&xdna->iovad, iova);
+			return size;
+		}
+		if (size < abo->mem.size) {
+			iommu_unmap(xdna->domain, dma_addr, size);
+			__free_iova(&xdna->iovad, iova);
+			return -ENXIO;
+		}
+		abo->mem.dma_addr = dma_addr;
+	} else {
+		/* Device doesn't support scatter/gather list, fail non-contiguous mapping. */
+		contig_sz = drm_prime_get_contiguous_size(sgt);
+		if (contig_sz < abo->mem.size) {
+			XDNA_ERR(xdna,
+				 "noncontiguous dma addr, contig size:%ld, expected size:%ld",
+				 contig_sz, abo->mem.size);
+			return -EINVAL;
+		}
+		abo->mem.dma_addr = sg_dma_address(sgt->sgl);
 	}
-
-	size = iommu_map_sgtable(xdna->domain, dma_addr, sgt,
-				 IOMMU_READ | IOMMU_WRITE);
-	if (size < 0) {
-		XDNA_ERR(xdna, "iommu_map_sgtable failed: %zd", size);
-		__free_iova(&xdna->iovad, iova);
-		return size;
-	}
-
-	if (size < abo->mem.size) {
-		iommu_unmap(xdna->domain, dma_addr, size);
-		__free_iova(&xdna->iovad, iova);
-		return -ENXIO;
-	}
-
-	abo->mem.dma_addr = dma_addr;
-
 	return 0;
 }
 
-void amdxdna_iommu_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
+void amdxdna_dma_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
 {
 	size_t size;
 
 	if (abo->mem.dma_addr == AMDXDNA_INVALID_ADDR)
 		return;
 
-	size = iova_align(&xdna->iovad, abo->mem.size);
-	iommu_unmap(xdna->domain, abo->mem.dma_addr, size);
-	free_iova(&xdna->iovad, iova_pfn(&xdna->iovad, abo->mem.dma_addr));
+	if (amdxdna_iova_on(xdna)) {
+		size = iova_align(&xdna->iovad, abo->mem.size);
+		iommu_unmap(xdna->domain, abo->mem.dma_addr, size);
+		free_iova(&xdna->iovad, iova_pfn(&xdna->iovad, abo->mem.dma_addr));
+	}
 	abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
 }
 
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 21eddfc538d0..b8c5dbc12489 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -14,6 +14,7 @@
 #include <linux/iommu.h>
 #include <linux/pci.h>
 
+#include "amdxdna_cbuf.h"
 #include "amdxdna_ctx.h"
 #include "amdxdna_gem.h"
 #include "amdxdna_pci_drv.h"
@@ -67,11 +68,40 @@ static const struct amdxdna_device_id amdxdna_ids[] = {
 	{0}
 };
 
+static int amdxdna_sva_init(struct amdxdna_client *client)
+{
+	struct amdxdna_dev *xdna = client->xdna;
+
+	client->sva = iommu_sva_bind_device(xdna->ddev.dev, client->mm);
+	if (IS_ERR(client->sva)) {
+		XDNA_ERR(xdna, "SVA bind device failed, ret %ld", PTR_ERR(client->sva));
+		return PTR_ERR(client->sva);
+	}
+
+	client->pasid = iommu_sva_get_pasid(client->sva);
+	if (client->pasid == IOMMU_PASID_INVALID) {
+		iommu_sva_unbind_device(client->sva);
+		XDNA_ERR(xdna, "SVA get pasid failed");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void amdxdna_sva_fini(struct amdxdna_client *client)
+{
+	if (IS_ERR_OR_NULL(client->sva))
+		return;
+
+	iommu_sva_unbind_device(client->sva);
+	client->sva = NULL;
+	client->pasid = IOMMU_PASID_INVALID;
+}
+
 static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
 {
 	struct amdxdna_dev *xdna = to_xdna_dev(ddev);
 	struct amdxdna_client *client;
-	int ret;
 
 	client = kzalloc_obj(*client);
 	if (!client)
@@ -80,22 +110,13 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
 	client->pid = pid_nr(rcu_access_pointer(filp->pid));
 	client->xdna = xdna;
 	client->pasid = IOMMU_PASID_INVALID;
+	client->mm = current->mm;
 
 	if (!amdxdna_iova_on(xdna)) {
-		client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm);
-		if (IS_ERR(client->sva)) {
-			ret = PTR_ERR(client->sva);
-			XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret);
-			goto failed;
-		}
-		client->pasid = iommu_sva_get_pasid(client->sva);
-		if (client->pasid == IOMMU_PASID_INVALID) {
-			XDNA_ERR(xdna, "SVA get pasid failed");
-			ret = -ENODEV;
-			goto unbind_sva;
-		}
+		/* No need to fail open since user may use pa + carveout later. */
+		if (amdxdna_sva_init(client))
+			XDNA_WARN(xdna, "PASID not available for pid %d", client->pid);
 	}
-	client->mm = current->mm;
 	mmgrab(client->mm);
 	init_srcu_struct(&client->hwctx_srcu);
 	xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
@@ -110,14 +131,6 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
 
 	XDNA_DBG(xdna, "pid %d opened", client->pid);
 	return 0;
-
-unbind_sva:
-	if (!IS_ERR_OR_NULL(client->sva))
-		iommu_sva_unbind_device(client->sva);
-failed:
-	kfree(client);
-
-	return ret;
 }
 
 static void amdxdna_client_cleanup(struct amdxdna_client *client)
@@ -131,11 +144,8 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client)
 		drm_gem_object_put(to_gobj(client->dev_heap));
 
 	mutex_destroy(&client->mm_lock);
-
-	if (!IS_ERR_OR_NULL(client->sva))
-		iommu_sva_unbind_device(client->sva);
 	mmdrop(client->mm);
-
+	amdxdna_sva_fini(client);
 	kfree(client);
 }
 
@@ -242,15 +252,17 @@ static void amdxdna_show_fdinfo(struct drm_printer *p, struct drm_file *filp)
 
 	/*
 	 * Note for driver specific BO memory usage stat.
-	 * Total memory alloc = amdxdna-internal-alloc + amdxdna-external-alloc
+	 * Total memory in use = amdxdna-internal-alloc + amdxdna-external-alloc, which
+	 * includes both imported and created BOs. To avoid double counts, it includes
+	 * HEAP BO, but not DEV BO. DEV BO is counted by amdxdna-heap-alloc.
 	 */
 	drm_fdinfo_print_size(p, drv_name, "heap", "alloc", heap_usage);
 	drm_fdinfo_print_size(p, drv_name, "internal", "alloc", internal_usage);
 	drm_fdinfo_print_size(p, drv_name, "external", "alloc", external_usage);
 	/*
 	 * Note for DRM standard BO memory stat.
-	 * drm-total-memory counts both DEV BO and HEAP BO
-	 * drm-shared-memory counts BO imported
+	 * drm-total-memory counts both DEV BO and HEAP BO. The DEV BO size is double counted.
+	 * drm-shared-memory counts BO shared with other processes/devices.
 	 */
 	drm_show_memory_stats(p, filp);
 }
@@ -420,7 +432,26 @@ static struct pci_driver amdxdna_pci_driver = {
 	.sriov_configure = amdxdna_sriov_configure,
 };
 
-module_pci_driver(amdxdna_pci_driver);
+static int __init amdxdna_mod_init(void)
+{
+	int ret;
+
+	amdxdna_carveout_init();
+	ret = pci_register_driver(&amdxdna_pci_driver);
+	if (ret)
+		amdxdna_carveout_fini();
+
+	return ret;
+}
+
+static void __exit amdxdna_mod_exit(void)
+{
+	pci_unregister_driver(&amdxdna_pci_driver);
+	amdxdna_carveout_fini();
+}
+
+module_init(amdxdna_mod_init);
+module_exit(amdxdna_mod_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_IMPORT_NS("AMD_PMF");
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index bdd0dc83f92e..07bd38281452 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -172,11 +172,11 @@ void amdxdna_sysfs_fini(struct amdxdna_dev *xdna);
 
 int amdxdna_iommu_init(struct amdxdna_dev *xdna);
 void amdxdna_iommu_fini(struct amdxdna_dev *xdna);
-int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
-void amdxdna_iommu_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
 void *amdxdna_iommu_alloc(struct amdxdna_dev *xdna, size_t size, dma_addr_t *dma_addr);
 void amdxdna_iommu_free(struct amdxdna_dev *xdna, size_t size,
 			void *cpu_addr, dma_addr_t dma_addr);
+int amdxdna_dma_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
+void amdxdna_dma_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
 
 static inline bool amdxdna_iova_on(struct amdxdna_dev *xdna)
 {
-- 
2.34.1
Re: [PATCH V1] accel/amdxdna: Add carveout memory support for non-IOMMU systems
Posted by Mario Limonciello 1 month, 3 weeks ago

On 4/17/26 16:06, Lizhi Hou wrote:
> From: Max Zhen <max.zhen@amd.com>
> 
> Add support for allocating buffers from reserved carveout memory when
> IOMMU is not available. This is useful during debugging or bring-up.
> 
> In this configuration, the device uses physical addresses and does
> not support scatter-gather lists, requiring physically contiguous
> buffers.
> 
> Implement carveout-backed allocation and integrate it into buffer
> management to support operation in physical address mode.

Running with IOMMU disabled is unlikely to be a common production 
scenario.  At first I was thinking this is OK, but considering Greg's 
strong comments about driver specific module parameters I /wonder/ if 
this should be gated behind a kconfig option to be used at bringup?

The Kconfig option could be something like XDNA_CARVEOUT and default to 
0.  Then it can be set to any value necessary for debugging.

You could go a step further and only let XDNA_CARVEOUT do the 
reservation when IOMMU wasn't found.

> 
> Signed-off-by: Max Zhen <max.zhen@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
>   drivers/accel/amdxdna/Makefile          |   1 +
>   drivers/accel/amdxdna/amdxdna_cbuf.c    | 249 ++++++++++++++++++++++++
>   drivers/accel/amdxdna/amdxdna_cbuf.h    |  16 ++
>   drivers/accel/amdxdna/amdxdna_gem.c     |  95 +++++++--
>   drivers/accel/amdxdna/amdxdna_iommu.c   |  77 +++++---
>   drivers/accel/amdxdna/amdxdna_pci_drv.c |  91 ++++++---
>   drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +-
>   7 files changed, 454 insertions(+), 79 deletions(-)
>   create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.c
>   create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.h
> 
> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
> index 79369e497540..a055aea36971 100644
> --- a/drivers/accel/amdxdna/Makefile
> +++ b/drivers/accel/amdxdna/Makefile
> @@ -12,6 +12,7 @@ amdxdna-y := \
>   	aie2_solver.o \
>   	aie4_message.o \
>   	aie4_pci.o \
> +	amdxdna_cbuf.o \
>   	amdxdna_ctx.o \
>   	amdxdna_gem.o \
>   	amdxdna_iommu.o \
> diff --git a/drivers/accel/amdxdna/amdxdna_cbuf.c b/drivers/accel/amdxdna/amdxdna_cbuf.c
> new file mode 100644
> index 000000000000..4a556199a461
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_cbuf.c
> @@ -0,0 +1,249 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2026, Advanced Micro Devices, Inc.
> + */
> +
> +#include <drm/drm_mm.h>
> +#include <drm/drm_prime.h>
> +
> +#include "amdxdna_cbuf.h"
> +#include "amdxdna_pci_drv.h"
> +
> +/*
> + * This is a platform debug/bringup feature.
> + *
> + * Carveout memory is a chunk of memory which is physically contiguous and
> + * is reserved during early boot time. There is only one chunk of such memory
> + * per system. Once available, all BOs accessible from device should be
> + * allocated from this memory.
> + */
> +u64 carveout_addr;
> +module_param(carveout_addr, ullong, 0400);
> +MODULE_PARM_DESC(carveout_addr, "Physical memory address for reserved memory chunk");
> +
> +u64 carveout_size;
> +module_param(carveout_size, ullong, 0400);
> +MODULE_PARM_DESC(carveout_size, "Physical memory size for reserved memory chunk");
> +
> +struct amdxdna_carveout {
> +	struct drm_mm	mm;
> +	struct mutex	lock; /* protect mm */
> +} carveout;
> +
> +bool amdxdna_use_carveout(void)
> +{
> +	return !!carveout_size;
> +}
> +
> +void amdxdna_carveout_init(void)
> +{
> +	if (!amdxdna_use_carveout())
> +		return;
> +	mutex_init(&carveout.lock);
> +	drm_mm_init(&carveout.mm, carveout_addr, carveout_size);
> +	pr_info("Use carveout mem, addr=0x%llx, size=0x%llx\n", carveout_addr, carveout_size);
> +}
> +
> +void amdxdna_carveout_fini(void)
> +{
> +	if (!amdxdna_use_carveout())
> +		return;
> +	drm_mm_takedown(&carveout.mm);
> +	mutex_destroy(&carveout.lock);
> +}
> +
> +struct amdxdna_cbuf_priv {
> +	struct drm_mm_node node;
> +};
> +
> +static struct sg_table *amdxdna_cbuf_map(struct dma_buf_attachment *attach,
> +					 enum dma_data_direction direction)
> +{
> +	struct amdxdna_cbuf_priv *cbuf = attach->dmabuf->priv;
> +	struct device *dev = attach->dev;
> +	struct scatterlist *sgl, *sg;
> +	int ret, n_entries, i;
> +	struct sg_table *sgt;
> +	dma_addr_t dma_addr;
> +	size_t dma_size;
> +	size_t max_seg;
> +
> +	sgt = kzalloc_obj(*sgt);
> +	if (!sgt)
> +		return ERR_PTR(-ENOMEM);
> +
> +	max_seg = min_t(size_t, UINT_MAX, dma_max_mapping_size(dev));
> +	n_entries = (cbuf->node.size + max_seg - 1) / max_seg;
> +	sgl = kzalloc_objs(*sg, n_entries);
> +	if (!sgl) {
> +		ret = -ENOMEM;
> +		goto free_sgt;
> +	}
> +	sg_init_table(sgl, n_entries);
> +	sgt->orig_nents = n_entries;
> +	sgt->nents = n_entries;
> +	sgt->sgl = sgl;
> +
> +	dma_size = cbuf->node.size;
> +	dma_addr = dma_map_resource(dev, cbuf->node.start, dma_size,
> +				    direction, DMA_ATTR_SKIP_CPU_SYNC);
> +	ret = dma_mapping_error(dev, dma_addr);
> +	if (ret) {
> +		pr_err("Failed to dma_map_resource carveout dma buf, ret %d\n", ret);
> +		goto free_sgl;
> +	}
> +
> +	for_each_sgtable_dma_sg(sgt, sg, i) {
> +		size_t len = min_t(size_t, max_seg, dma_size);
> +
> +		sg_dma_address(sg) = dma_addr;
> +		sg_dma_len(sg) = len;
> +		dma_addr += len;
> +		dma_size -= len;
> +	}
> +
> +	return sgt;
> +
> +free_sgl:
> +	kfree(sgl);
> +free_sgt:
> +	kfree(sgt);
> +	return ERR_PTR(ret);
> +}
> +
> +static void amdxdna_cbuf_unmap(struct dma_buf_attachment *attach,
> +			       struct sg_table *sgt,
> +			       enum dma_data_direction direction)
> +{
> +	dma_unmap_resource(attach->dev, sg_dma_address(sgt->sgl),
> +			   drm_prime_get_contiguous_size(sgt), direction,
> +			   DMA_ATTR_SKIP_CPU_SYNC);
> +	sg_free_table(sgt);
> +	kfree(sgt);
> +}
> +
> +static void amdxdna_cbuf_release(struct dma_buf *dbuf)
> +{
> +	struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
> +
> +	mutex_lock(&carveout.lock);
> +	drm_mm_remove_node(&cbuf->node);
> +	mutex_unlock(&carveout.lock);
> +
> +	kfree(cbuf);
> +}
> +
> +static vm_fault_t amdxdna_cbuf_vm_fault(struct vm_fault *vmf)
> +{
> +	struct vm_area_struct *vma = vmf->vma;
> +	struct amdxdna_cbuf_priv *cbuf;
> +	unsigned long pfn;
> +	pgoff_t pgoff;
> +
> +	cbuf = vma->vm_private_data;
> +	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
> +	pfn = (cbuf->node.start >> PAGE_SHIFT) + pgoff;
> +
> +	return vmf_insert_pfn(vma, vmf->address, pfn);
> +}
> +
> +static const struct vm_operations_struct amdxdna_cbuf_vm_ops = {
> +	.fault = amdxdna_cbuf_vm_fault,
> +};
> +
> +static int amdxdna_cbuf_mmap(struct dma_buf *dbuf, struct vm_area_struct *vma)
> +{
> +	struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
> +
> +	vma->vm_ops = &amdxdna_cbuf_vm_ops;
> +	vma->vm_private_data = cbuf;
> +	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
> +
> +	return 0;
> +}
> +
> +static int amdxdna_cbuf_vmap(struct dma_buf *dbuf, struct iosys_map *map)
> +{
> +	struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
> +	void *kva;
> +
> +	kva = memremap(cbuf->node.start, cbuf->node.size, MEMREMAP_WB);
> +	if (!kva) {
> +		pr_err("Failed to vmap carveout dma buf\n");
> +		return -ENOMEM;
> +	}
> +
> +	iosys_map_set_vaddr(map, kva);
> +	return 0;
> +}
> +
> +static void amdxdna_cbuf_vunmap(struct dma_buf *dbuf, struct iosys_map *map)
> +{
> +	memunmap(map->vaddr);
> +}
> +
> +static const struct dma_buf_ops amdxdna_cbuf_dmabuf_ops = {
> +	.map_dma_buf = amdxdna_cbuf_map,
> +	.unmap_dma_buf = amdxdna_cbuf_unmap,
> +	.release = amdxdna_cbuf_release,
> +	.mmap = amdxdna_cbuf_mmap,
> +	.vmap = amdxdna_cbuf_vmap,
> +	.vunmap = amdxdna_cbuf_vunmap,
> +};
> +
> +static int amdxdna_cbuf_clear(struct dma_buf *dbuf)
> +{
> +	struct iosys_map vmap = IOSYS_MAP_INIT_VADDR(NULL);
> +
> +	dma_buf_vmap(dbuf, &vmap);
> +	if (!vmap.vaddr)
> +		return -EFAULT;
> +
> +	memset(vmap.vaddr, 0, dbuf->size);
> +	dma_buf_vunmap(dbuf, &vmap);
> +
> +	return 0;
> +}
> +
> +struct dma_buf *amdxdna_get_cbuf(struct drm_device *dev, size_t size, u64 alignment)
> +{
> +	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> +	struct amdxdna_cbuf_priv *cbuf;
> +	struct dma_buf *dbuf;
> +	int ret;
> +
> +	cbuf = kzalloc_obj(*cbuf);
> +	if (!cbuf)
> +		return ERR_PTR(-ENOMEM);
> +
> +	mutex_lock(&carveout.lock);
> +	ret = drm_mm_insert_node_generic(&carveout.mm, &cbuf->node, size,
> +					 alignment, 0, DRM_MM_INSERT_BEST);
> +	mutex_unlock(&carveout.lock);
> +	if (ret)
> +		goto free_cbuf;
> +
> +	exp_info.size = size;
> +	exp_info.ops = &amdxdna_cbuf_dmabuf_ops;
> +	exp_info.priv = cbuf;
> +	exp_info.flags = O_RDWR;
> +	dbuf = dma_buf_export(&exp_info);
> +	if (IS_ERR(dbuf)) {
> +		ret = PTR_ERR(dbuf);
> +		goto remove_node;
> +	}
> +
> +	ret = amdxdna_cbuf_clear(dbuf);
> +	if (ret) {
> +		dma_buf_put(dbuf);
> +		goto out;
> +	}
> +	return dbuf;
> +
> +remove_node:
> +	drm_mm_remove_node(&cbuf->node);
> +free_cbuf:
> +	kfree(cbuf);
> +out:
> +	return ERR_PTR(ret);
> +}
> diff --git a/drivers/accel/amdxdna/amdxdna_cbuf.h b/drivers/accel/amdxdna/amdxdna_cbuf.h
> new file mode 100644
> index 000000000000..15e189ce779e
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_cbuf.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2026, Advanced Micro Devices, Inc.
> + */
> +#ifndef _AMDXDNA_CBUF_H_
> +#define _AMDXDNA_CBUF_H_
> +
> +#include <drm/drm_device.h>
> +#include <linux/dma-buf.h>
> +
> +bool amdxdna_use_carveout(void);
> +void amdxdna_carveout_init(void);
> +void amdxdna_carveout_fini(void);
> +struct dma_buf *amdxdna_get_cbuf(struct drm_device *dev, size_t size, u64 alignment);
> +
> +#endif
> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
> index 238ee244d4a6..905514ec183c 100644
> --- a/drivers/accel/amdxdna/amdxdna_gem.c
> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
> @@ -16,6 +16,7 @@
>   #include <linux/pagemap.h>
>   #include <linux/vmalloc.h>
>   
> +#include "amdxdna_cbuf.h"
>   #include "amdxdna_ctx.h"
>   #include "amdxdna_gem.h"
>   #include "amdxdna_pci_drv.h"
> @@ -516,10 +517,6 @@ static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
>   static inline bool
>   amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
>   {
> -	/* Do not count imported BOs since the buffer is not allocated by us. */
> -	if (is_import_bo(abo))
> -		return true;
> -
>   	/* Already counted as part of HEAP BO */
>   	if (abo->type == AMDXDNA_BO_DEV)
>   		return true;
> @@ -571,9 +568,7 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
>   	if (abo->type == AMDXDNA_BO_DEV_HEAP)
>   		drm_mm_takedown(&abo->mm);
>   
> -	if (amdxdna_iova_on(xdna))
> -		amdxdna_iommu_unmap_bo(xdna, abo);
> -
> +	amdxdna_dma_unmap_bo(xdna, abo);
>   	amdxdna_gem_vunmap(abo);
>   	mutex_destroy(&abo->lock);
>   
> @@ -591,18 +586,20 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
>   
>   	guard(mutex)(&abo->lock);
>   	abo->open_ref++;
> +	if (abo->open_ref > 1)
> +		return 0;
>   
> -	if (abo->open_ref == 1) {
> -		/* Attached to the client when first opened by it. */
> -		abo->client = filp->driver_priv;
> -		amdxdna_gem_add_bo_usage(abo);
> -	}
> -	if (amdxdna_iova_on(xdna)) {
> -		ret = amdxdna_iommu_map_bo(xdna, abo);
> +	/* Attached to the client when first opened by it. */
> +	abo->client = filp->driver_priv;
> +
> +	/* No need to set up dma addr mapping in PASID mode. */
> +	if (!amdxdna_pasid_on(abo->client)) {
> +		ret = amdxdna_dma_map_bo(xdna, abo);
>   		if (ret)
>   			return ret;
>   	}
>   
> +	amdxdna_gem_add_bo_usage(abo);
>   	return 0;
>   }
>   
> @@ -620,6 +617,39 @@ static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file *
>   	}
>   }
>   
> +static int amdxdna_gem_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
> +{
> +	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
> +	int ret;
> +
> +	iosys_map_clear(map);
> +
> +	dma_resv_assert_held(obj->resv);
> +
> +	if (is_import_bo(abo))
> +		ret = dma_buf_vmap(abo->dma_buf, map);
> +	else
> +		ret = drm_gem_shmem_object_vmap(obj, map);
> +	if (ret)
> +		return ret;
> +	if (!map->vaddr)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +static void amdxdna_gem_obj_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
> +{
> +	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
> +
> +	dma_resv_assert_held(obj->resv);
> +
> +	if (is_import_bo(abo))
> +		dma_buf_vunmap(abo->dma_buf, map);
> +	else
> +		drm_gem_shmem_object_vunmap(obj, map);
> +}
> +
>   static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
>   {
>   	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
> @@ -645,8 +675,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
>   	.pin = drm_gem_shmem_object_pin,
>   	.unpin = drm_gem_shmem_object_unpin,
>   	.get_sg_table = drm_gem_shmem_object_get_sg_table,
> -	.vmap = drm_gem_shmem_object_vmap,
> -	.vunmap = drm_gem_shmem_object_vunmap,
> +	.vmap = amdxdna_gem_obj_vmap,
> +	.vunmap = amdxdna_gem_obj_vunmap,
>   	.mmap = amdxdna_gem_obj_mmap,
>   	.vm_ops = &drm_gem_shmem_vm_ops,
>   	.export = amdxdna_gem_prime_export,
> @@ -714,6 +744,36 @@ amdxdna_gem_create_ubuf_object(struct drm_device *dev, struct amdxdna_drm_create
>   	return to_xdna_obj(gobj);
>   }
>   
> +static struct amdxdna_gem_obj *
> +amdxdna_gem_create_cbuf_object(struct drm_device *dev, struct amdxdna_drm_create_bo *args)
> +{
> +	struct amdxdna_dev *xdna = to_xdna_dev(dev);
> +	size_t size = PAGE_ALIGN(args->size);
> +	struct drm_gem_object *gobj;
> +	struct amdxdna_gem_obj *ret;
> +	struct dma_buf *dma_buf;
> +	u64 align;
> +
> +	if (!size) {
> +		XDNA_ERR(xdna, "Invalid BO size 0x%llx", args->size);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	align = (args->type == AMDXDNA_BO_DEV_HEAP) ?  xdna->dev_info->dev_mem_size : 0;
> +	dma_buf = amdxdna_get_cbuf(dev, size, align);
> +	if (IS_ERR(dma_buf))
> +		return ERR_CAST(dma_buf);
> +
> +	gobj = amdxdna_gem_prime_import(dev, dma_buf);
> +	if (IS_ERR(gobj))
> +		ret = ERR_CAST(gobj);
> +	else
> +		ret = to_xdna_obj(gobj);
> +
> +	dma_buf_put(dma_buf);
> +	return ret;
> +}
> +
>   struct drm_gem_object *
>   amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
>   {
> @@ -769,6 +829,8 @@ amdxdna_drm_create_share_bo(struct drm_device *dev,
>   
>   	if (args->vaddr)
>   		abo = amdxdna_gem_create_ubuf_object(dev, args);
> +	else if (amdxdna_use_carveout())
> +		abo = amdxdna_gem_create_cbuf_object(dev, args);
>   	else
>   		abo = amdxdna_gem_create_shmem_object(dev, args);
>   	if (IS_ERR(abo))
> @@ -884,7 +946,6 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
>   		 args->type, args->vaddr, args->size, args->flags);
>   	switch (args->type) {
>   	case AMDXDNA_BO_CMD:
> -		fallthrough;
>   	case AMDXDNA_BO_SHARE:
>   		abo = amdxdna_drm_create_share_bo(dev, args, filp);
>   		break;
> diff --git a/drivers/accel/amdxdna/amdxdna_iommu.c b/drivers/accel/amdxdna/amdxdna_iommu.c
> index 5a9f06183487..eff00131d0f8 100644
> --- a/drivers/accel/amdxdna/amdxdna_iommu.c
> +++ b/drivers/accel/amdxdna/amdxdna_iommu.c
> @@ -35,14 +35,15 @@ static struct iova *amdxdna_iommu_alloc_iova(struct amdxdna_dev *xdna,
>   	return iova;
>   }
>   
> -int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
> +int amdxdna_dma_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
>   {
> +	unsigned long contig_sz;
>   	struct sg_table *sgt;
>   	dma_addr_t dma_addr;
>   	struct iova *iova;
>   	ssize_t size;
>   
> -	if (abo->type != AMDXDNA_BO_DEV_HEAP && abo->type != AMDXDNA_BO_SHMEM)
> +	if (abo->type != AMDXDNA_BO_DEV_HEAP && abo->type != AMDXDNA_BO_SHARE)
>   		return 0;
>   
>   	sgt = drm_gem_shmem_get_pages_sgt(&abo->base);
> @@ -51,47 +52,63 @@ int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
>   		return PTR_ERR(sgt);
>   	}
>   
> -	if (!sgt->orig_nents || !sg_page(sgt->sgl)) {
> -		XDNA_ERR(xdna, "sgl is zero length or not page backed");
> +	if (!sgt->orig_nents) {
> +		XDNA_ERR(xdna, "sgl is zero length");
>   		return -EOPNOTSUPP;
>   	}
>   
> -	iova = amdxdna_iommu_alloc_iova(xdna, abo->mem.size, &dma_addr,
> -					(abo->type == AMDXDNA_BO_DEV_HEAP));
> -	if (IS_ERR(iova)) {
> -		XDNA_ERR(xdna, "Alloc iova failed, ret %ld", PTR_ERR(iova));
> -		return PTR_ERR(iova);
> +	if (amdxdna_iova_on(xdna)) {
> +		if (!sg_page(sgt->sgl)) {
> +			XDNA_ERR(xdna, "sgl is not page backed");
> +			return -EOPNOTSUPP;
> +		}
> +
> +		iova = amdxdna_iommu_alloc_iova(xdna, abo->mem.size, &dma_addr,
> +						(abo->type == AMDXDNA_BO_DEV_HEAP));
> +		if (IS_ERR(iova)) {
> +			XDNA_ERR(xdna, "Alloc iova failed, ret %ld", PTR_ERR(iova));
> +			return PTR_ERR(iova);
> +		}
> +
> +		size = iommu_map_sgtable(xdna->domain, dma_addr, sgt,
> +					 IOMMU_READ | IOMMU_WRITE);
> +		if (size < 0) {
> +			XDNA_ERR(xdna, "iommu_map_sgtable failed: %zd", size);
> +			__free_iova(&xdna->iovad, iova);
> +			return size;
> +		}
> +		if (size < abo->mem.size) {
> +			iommu_unmap(xdna->domain, dma_addr, size);
> +			__free_iova(&xdna->iovad, iova);
> +			return -ENXIO;
> +		}
> +		abo->mem.dma_addr = dma_addr;
> +	} else {
> +		/* Device doesn't support scatter/gather list, fail non-contiguous mapping. */
> +		contig_sz = drm_prime_get_contiguous_size(sgt);
> +		if (contig_sz < abo->mem.size) {
> +			XDNA_ERR(xdna,
> +				 "noncontiguous dma addr, contig size:%ld, expected size:%ld",
> +				 contig_sz, abo->mem.size);
> +			return -EINVAL;
> +		}
> +		abo->mem.dma_addr = sg_dma_address(sgt->sgl);
>   	}
> -
> -	size = iommu_map_sgtable(xdna->domain, dma_addr, sgt,
> -				 IOMMU_READ | IOMMU_WRITE);
> -	if (size < 0) {
> -		XDNA_ERR(xdna, "iommu_map_sgtable failed: %zd", size);
> -		__free_iova(&xdna->iovad, iova);
> -		return size;
> -	}
> -
> -	if (size < abo->mem.size) {
> -		iommu_unmap(xdna->domain, dma_addr, size);
> -		__free_iova(&xdna->iovad, iova);
> -		return -ENXIO;
> -	}
> -
> -	abo->mem.dma_addr = dma_addr;
> -
>   	return 0;
>   }
>   
> -void amdxdna_iommu_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
> +void amdxdna_dma_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo)
>   {
>   	size_t size;
>   
>   	if (abo->mem.dma_addr == AMDXDNA_INVALID_ADDR)
>   		return;
>   
> -	size = iova_align(&xdna->iovad, abo->mem.size);
> -	iommu_unmap(xdna->domain, abo->mem.dma_addr, size);
> -	free_iova(&xdna->iovad, iova_pfn(&xdna->iovad, abo->mem.dma_addr));
> +	if (amdxdna_iova_on(xdna)) {
> +		size = iova_align(&xdna->iovad, abo->mem.size);
> +		iommu_unmap(xdna->domain, abo->mem.dma_addr, size);
> +		free_iova(&xdna->iovad, iova_pfn(&xdna->iovad, abo->mem.dma_addr));
> +	}
>   	abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
>   }
>   
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index 21eddfc538d0..b8c5dbc12489 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -14,6 +14,7 @@
>   #include <linux/iommu.h>
>   #include <linux/pci.h>
>   
> +#include "amdxdna_cbuf.h"
>   #include "amdxdna_ctx.h"
>   #include "amdxdna_gem.h"
>   #include "amdxdna_pci_drv.h"
> @@ -67,11 +68,40 @@ static const struct amdxdna_device_id amdxdna_ids[] = {
>   	{0}
>   };
>   
> +static int amdxdna_sva_init(struct amdxdna_client *client)
> +{
> +	struct amdxdna_dev *xdna = client->xdna;
> +
> +	client->sva = iommu_sva_bind_device(xdna->ddev.dev, client->mm);
> +	if (IS_ERR(client->sva)) {
> +		XDNA_ERR(xdna, "SVA bind device failed, ret %ld", PTR_ERR(client->sva));
> +		return PTR_ERR(client->sva);
> +	}
> +
> +	client->pasid = iommu_sva_get_pasid(client->sva);
> +	if (client->pasid == IOMMU_PASID_INVALID) {
> +		iommu_sva_unbind_device(client->sva);
> +		XDNA_ERR(xdna, "SVA get pasid failed");
> +		return -ENODEV;
> +	}
> +
> +	return 0;
> +}
> +
> +static void amdxdna_sva_fini(struct amdxdna_client *client)
> +{
> +	if (IS_ERR_OR_NULL(client->sva))
> +		return;
> +
> +	iommu_sva_unbind_device(client->sva);
> +	client->sva = NULL;
> +	client->pasid = IOMMU_PASID_INVALID;
> +}
> +
>   static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
>   {
>   	struct amdxdna_dev *xdna = to_xdna_dev(ddev);
>   	struct amdxdna_client *client;
> -	int ret;
>   
>   	client = kzalloc_obj(*client);
>   	if (!client)
> @@ -80,22 +110,13 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
>   	client->pid = pid_nr(rcu_access_pointer(filp->pid));
>   	client->xdna = xdna;
>   	client->pasid = IOMMU_PASID_INVALID;
> +	client->mm = current->mm;
>   
>   	if (!amdxdna_iova_on(xdna)) {
> -		client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm);
> -		if (IS_ERR(client->sva)) {
> -			ret = PTR_ERR(client->sva);
> -			XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret);
> -			goto failed;
> -		}
> -		client->pasid = iommu_sva_get_pasid(client->sva);
> -		if (client->pasid == IOMMU_PASID_INVALID) {
> -			XDNA_ERR(xdna, "SVA get pasid failed");
> -			ret = -ENODEV;
> -			goto unbind_sva;
> -		}
> +		/* No need to fail open since user may use pa + carveout later. */
> +		if (amdxdna_sva_init(client))
> +			XDNA_WARN(xdna, "PASID not available for pid %d", client->pid);
>   	}
> -	client->mm = current->mm;
>   	mmgrab(client->mm);
>   	init_srcu_struct(&client->hwctx_srcu);
>   	xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
> @@ -110,14 +131,6 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
>   
>   	XDNA_DBG(xdna, "pid %d opened", client->pid);
>   	return 0;
> -
> -unbind_sva:
> -	if (!IS_ERR_OR_NULL(client->sva))
> -		iommu_sva_unbind_device(client->sva);
> -failed:
> -	kfree(client);
> -
> -	return ret;
>   }
>   
>   static void amdxdna_client_cleanup(struct amdxdna_client *client)
> @@ -131,11 +144,8 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client)
>   		drm_gem_object_put(to_gobj(client->dev_heap));
>   
>   	mutex_destroy(&client->mm_lock);
> -
> -	if (!IS_ERR_OR_NULL(client->sva))
> -		iommu_sva_unbind_device(client->sva);
>   	mmdrop(client->mm);
> -
> +	amdxdna_sva_fini(client);
>   	kfree(client);
>   }
>   
> @@ -242,15 +252,17 @@ static void amdxdna_show_fdinfo(struct drm_printer *p, struct drm_file *filp)
>   
>   	/*
>   	 * Note for driver specific BO memory usage stat.
> -	 * Total memory alloc = amdxdna-internal-alloc + amdxdna-external-alloc
> +	 * Total memory in use = amdxdna-internal-alloc + amdxdna-external-alloc, which
> +	 * includes both imported and created BOs. To avoid double counts, it includes
> +	 * HEAP BO, but not DEV BO. DEV BO is counted by amdxdna-heap-alloc.
>   	 */
>   	drm_fdinfo_print_size(p, drv_name, "heap", "alloc", heap_usage);
>   	drm_fdinfo_print_size(p, drv_name, "internal", "alloc", internal_usage);
>   	drm_fdinfo_print_size(p, drv_name, "external", "alloc", external_usage);
>   	/*
>   	 * Note for DRM standard BO memory stat.
> -	 * drm-total-memory counts both DEV BO and HEAP BO
> -	 * drm-shared-memory counts BO imported
> +	 * drm-total-memory counts both DEV BO and HEAP BO. The DEV BO size is double counted.
> +	 * drm-shared-memory counts BO shared with other processes/devices.
>   	 */
>   	drm_show_memory_stats(p, filp);
>   }
> @@ -420,7 +432,26 @@ static struct pci_driver amdxdna_pci_driver = {
>   	.sriov_configure = amdxdna_sriov_configure,
>   };
>   
> -module_pci_driver(amdxdna_pci_driver);
> +static int __init amdxdna_mod_init(void)
> +{
> +	int ret;
> +
> +	amdxdna_carveout_init();
> +	ret = pci_register_driver(&amdxdna_pci_driver);
> +	if (ret)
> +		amdxdna_carveout_fini();
> +
> +	return ret;
> +}
> +
> +static void __exit amdxdna_mod_exit(void)
> +{
> +	pci_unregister_driver(&amdxdna_pci_driver);
> +	amdxdna_carveout_fini();
> +}
> +
> +module_init(amdxdna_mod_init);
> +module_exit(amdxdna_mod_exit);
>   
>   MODULE_LICENSE("GPL");
>   MODULE_IMPORT_NS("AMD_PMF");
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> index bdd0dc83f92e..07bd38281452 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> @@ -172,11 +172,11 @@ void amdxdna_sysfs_fini(struct amdxdna_dev *xdna);
>   
>   int amdxdna_iommu_init(struct amdxdna_dev *xdna);
>   void amdxdna_iommu_fini(struct amdxdna_dev *xdna);
> -int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
> -void amdxdna_iommu_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
>   void *amdxdna_iommu_alloc(struct amdxdna_dev *xdna, size_t size, dma_addr_t *dma_addr);
>   void amdxdna_iommu_free(struct amdxdna_dev *xdna, size_t size,
>   			void *cpu_addr, dma_addr_t dma_addr);
> +int amdxdna_dma_map_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
> +void amdxdna_dma_unmap_bo(struct amdxdna_dev *xdna, struct amdxdna_gem_obj *abo);
>   
>   static inline bool amdxdna_iova_on(struct amdxdna_dev *xdna)
>   {
Re: [PATCH V1] accel/amdxdna: Add carveout memory support for non-IOMMU systems
Posted by Lizhi Hou 1 month, 3 weeks ago
On 4/19/26 11:13, Mario Limonciello wrote:
>
>
> On 4/17/26 16:06, Lizhi Hou wrote:
>> From: Max Zhen <max.zhen@amd.com>
>>
>> Add support for allocating buffers from reserved carveout memory when
>> IOMMU is not available. This is useful during debugging or bring-up.
>>
>> In this configuration, the device uses physical addresses and does
>> not support scatter-gather lists, requiring physically contiguous
>> buffers.
>>
>> Implement carveout-backed allocation and integrate it into buffer
>> management to support operation in physical address mode.
>
> Running with IOMMU disabled is unlikely to be a common production 
> scenario.  At first I was thinking this is OK, but considering Greg's 
> strong comments about driver specific module parameters I /wonder/ if 
> this should be gated behind a kconfig option to be used at bringup?
>
> The Kconfig option could be something like XDNA_CARVEOUT and default 
> to 0.  Then it can be set to any value necessary for debugging.
>
> You could go a step further and only let XDNA_CARVEOUT do the 
> reservation when IOMMU wasn't found.

Thanks for your comments. It would be more useful to change the carveout 
setting without recompile the driver. Because this is a debug function, 
so we will implement debugfs to config the carveout address and size.


Lizhi

>
>>
>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>>   drivers/accel/amdxdna/Makefile          |   1 +
>>   drivers/accel/amdxdna/amdxdna_cbuf.c    | 249 ++++++++++++++++++++++++
>>   drivers/accel/amdxdna/amdxdna_cbuf.h    |  16 ++
>>   drivers/accel/amdxdna/amdxdna_gem.c     |  95 +++++++--
>>   drivers/accel/amdxdna/amdxdna_iommu.c   |  77 +++++---
>>   drivers/accel/amdxdna/amdxdna_pci_drv.c |  91 ++++++---
>>   drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +-
>>   7 files changed, 454 insertions(+), 79 deletions(-)
>>   create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.c
>>   create mode 100644 drivers/accel/amdxdna/amdxdna_cbuf.h
>>
>> diff --git a/drivers/accel/amdxdna/Makefile 
>> b/drivers/accel/amdxdna/Makefile
>> index 79369e497540..a055aea36971 100644
>> --- a/drivers/accel/amdxdna/Makefile
>> +++ b/drivers/accel/amdxdna/Makefile
>> @@ -12,6 +12,7 @@ amdxdna-y := \
>>       aie2_solver.o \
>>       aie4_message.o \
>>       aie4_pci.o \
>> +    amdxdna_cbuf.o \
>>       amdxdna_ctx.o \
>>       amdxdna_gem.o \
>>       amdxdna_iommu.o \
>> diff --git a/drivers/accel/amdxdna/amdxdna_cbuf.c 
>> b/drivers/accel/amdxdna/amdxdna_cbuf.c
>> new file mode 100644
>> index 000000000000..4a556199a461
>> --- /dev/null
>> +++ b/drivers/accel/amdxdna/amdxdna_cbuf.c
>> @@ -0,0 +1,249 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2026, Advanced Micro Devices, Inc.
>> + */
>> +
>> +#include <drm/drm_mm.h>
>> +#include <drm/drm_prime.h>
>> +
>> +#include "amdxdna_cbuf.h"
>> +#include "amdxdna_pci_drv.h"
>> +
>> +/*
>> + * This is a platform debug/bringup feature.
>> + *
>> + * Carveout memory is a chunk of memory which is physically 
>> contiguous and
>> + * is reserved during early boot time. There is only one chunk of 
>> such memory
>> + * per system. Once available, all BOs accessible from device should be
>> + * allocated from this memory.
>> + */
>> +u64 carveout_addr;
>> +module_param(carveout_addr, ullong, 0400);
>> +MODULE_PARM_DESC(carveout_addr, "Physical memory address for 
>> reserved memory chunk");
>> +
>> +u64 carveout_size;
>> +module_param(carveout_size, ullong, 0400);
>> +MODULE_PARM_DESC(carveout_size, "Physical memory size for reserved 
>> memory chunk");
>> +
>> +struct amdxdna_carveout {
>> +    struct drm_mm    mm;
>> +    struct mutex    lock; /* protect mm */
>> +} carveout;
>> +
>> +bool amdxdna_use_carveout(void)
>> +{
>> +    return !!carveout_size;
>> +}
>> +
>> +void amdxdna_carveout_init(void)
>> +{
>> +    if (!amdxdna_use_carveout())
>> +        return;
>> +    mutex_init(&carveout.lock);
>> +    drm_mm_init(&carveout.mm, carveout_addr, carveout_size);
>> +    pr_info("Use carveout mem, addr=0x%llx, size=0x%llx\n", 
>> carveout_addr, carveout_size);
>> +}
>> +
>> +void amdxdna_carveout_fini(void)
>> +{
>> +    if (!amdxdna_use_carveout())
>> +        return;
>> +    drm_mm_takedown(&carveout.mm);
>> +    mutex_destroy(&carveout.lock);
>> +}
>> +
>> +struct amdxdna_cbuf_priv {
>> +    struct drm_mm_node node;
>> +};
>> +
>> +static struct sg_table *amdxdna_cbuf_map(struct dma_buf_attachment 
>> *attach,
>> +                     enum dma_data_direction direction)
>> +{
>> +    struct amdxdna_cbuf_priv *cbuf = attach->dmabuf->priv;
>> +    struct device *dev = attach->dev;
>> +    struct scatterlist *sgl, *sg;
>> +    int ret, n_entries, i;
>> +    struct sg_table *sgt;
>> +    dma_addr_t dma_addr;
>> +    size_t dma_size;
>> +    size_t max_seg;
>> +
>> +    sgt = kzalloc_obj(*sgt);
>> +    if (!sgt)
>> +        return ERR_PTR(-ENOMEM);
>> +
>> +    max_seg = min_t(size_t, UINT_MAX, dma_max_mapping_size(dev));
>> +    n_entries = (cbuf->node.size + max_seg - 1) / max_seg;
>> +    sgl = kzalloc_objs(*sg, n_entries);
>> +    if (!sgl) {
>> +        ret = -ENOMEM;
>> +        goto free_sgt;
>> +    }
>> +    sg_init_table(sgl, n_entries);
>> +    sgt->orig_nents = n_entries;
>> +    sgt->nents = n_entries;
>> +    sgt->sgl = sgl;
>> +
>> +    dma_size = cbuf->node.size;
>> +    dma_addr = dma_map_resource(dev, cbuf->node.start, dma_size,
>> +                    direction, DMA_ATTR_SKIP_CPU_SYNC);
>> +    ret = dma_mapping_error(dev, dma_addr);
>> +    if (ret) {
>> +        pr_err("Failed to dma_map_resource carveout dma buf, ret 
>> %d\n", ret);
>> +        goto free_sgl;
>> +    }
>> +
>> +    for_each_sgtable_dma_sg(sgt, sg, i) {
>> +        size_t len = min_t(size_t, max_seg, dma_size);
>> +
>> +        sg_dma_address(sg) = dma_addr;
>> +        sg_dma_len(sg) = len;
>> +        dma_addr += len;
>> +        dma_size -= len;
>> +    }
>> +
>> +    return sgt;
>> +
>> +free_sgl:
>> +    kfree(sgl);
>> +free_sgt:
>> +    kfree(sgt);
>> +    return ERR_PTR(ret);
>> +}
>> +
>> +static void amdxdna_cbuf_unmap(struct dma_buf_attachment *attach,
>> +                   struct sg_table *sgt,
>> +                   enum dma_data_direction direction)
>> +{
>> +    dma_unmap_resource(attach->dev, sg_dma_address(sgt->sgl),
>> +               drm_prime_get_contiguous_size(sgt), direction,
>> +               DMA_ATTR_SKIP_CPU_SYNC);
>> +    sg_free_table(sgt);
>> +    kfree(sgt);
>> +}
>> +
>> +static void amdxdna_cbuf_release(struct dma_buf *dbuf)
>> +{
>> +    struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
>> +
>> +    mutex_lock(&carveout.lock);
>> +    drm_mm_remove_node(&cbuf->node);
>> +    mutex_unlock(&carveout.lock);
>> +
>> +    kfree(cbuf);
>> +}
>> +
>> +static vm_fault_t amdxdna_cbuf_vm_fault(struct vm_fault *vmf)
>> +{
>> +    struct vm_area_struct *vma = vmf->vma;
>> +    struct amdxdna_cbuf_priv *cbuf;
>> +    unsigned long pfn;
>> +    pgoff_t pgoff;
>> +
>> +    cbuf = vma->vm_private_data;
>> +    pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
>> +    pfn = (cbuf->node.start >> PAGE_SHIFT) + pgoff;
>> +
>> +    return vmf_insert_pfn(vma, vmf->address, pfn);
>> +}
>> +
>> +static const struct vm_operations_struct amdxdna_cbuf_vm_ops = {
>> +    .fault = amdxdna_cbuf_vm_fault,
>> +};
>> +
>> +static int amdxdna_cbuf_mmap(struct dma_buf *dbuf, struct 
>> vm_area_struct *vma)
>> +{
>> +    struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
>> +
>> +    vma->vm_ops = &amdxdna_cbuf_vm_ops;
>> +    vma->vm_private_data = cbuf;
>> +    vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
>> +
>> +    return 0;
>> +}
>> +
>> +static int amdxdna_cbuf_vmap(struct dma_buf *dbuf, struct iosys_map 
>> *map)
>> +{
>> +    struct amdxdna_cbuf_priv *cbuf = dbuf->priv;
>> +    void *kva;
>> +
>> +    kva = memremap(cbuf->node.start, cbuf->node.size, MEMREMAP_WB);
>> +    if (!kva) {
>> +        pr_err("Failed to vmap carveout dma buf\n");
>> +        return -ENOMEM;
>> +    }
>> +
>> +    iosys_map_set_vaddr(map, kva);
>> +    return 0;
>> +}
>> +
>> +static void amdxdna_cbuf_vunmap(struct dma_buf *dbuf, struct 
>> iosys_map *map)
>> +{
>> +    memunmap(map->vaddr);
>> +}
>> +
>> +static const struct dma_buf_ops amdxdna_cbuf_dmabuf_ops = {
>> +    .map_dma_buf = amdxdna_cbuf_map,
>> +    .unmap_dma_buf = amdxdna_cbuf_unmap,
>> +    .release = amdxdna_cbuf_release,
>> +    .mmap = amdxdna_cbuf_mmap,
>> +    .vmap = amdxdna_cbuf_vmap,
>> +    .vunmap = amdxdna_cbuf_vunmap,
>> +};
>> +
>> +static int amdxdna_cbuf_clear(struct dma_buf *dbuf)
>> +{
>> +    struct iosys_map vmap = IOSYS_MAP_INIT_VADDR(NULL);
>> +
>> +    dma_buf_vmap(dbuf, &vmap);
>> +    if (!vmap.vaddr)
>> +        return -EFAULT;
>> +
>> +    memset(vmap.vaddr, 0, dbuf->size);
>> +    dma_buf_vunmap(dbuf, &vmap);
>> +
>> +    return 0;
>> +}
>> +
>> +struct dma_buf *amdxdna_get_cbuf(struct drm_device *dev, size_t 
>> size, u64 alignment)
>> +{
>> +    DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
>> +    struct amdxdna_cbuf_priv *cbuf;
>> +    struct dma_buf *dbuf;
>> +    int ret;
>> +
>> +    cbuf = kzalloc_obj(*cbuf);
>> +    if (!cbuf)
>> +        return ERR_PTR(-ENOMEM);
>> +
>> +    mutex_lock(&carveout.lock);
>> +    ret = drm_mm_insert_node_generic(&carveout.mm, &cbuf->node, size,
>> +                     alignment, 0, DRM_MM_INSERT_BEST);
>> +    mutex_unlock(&carveout.lock);
>> +    if (ret)
>> +        goto free_cbuf;
>> +
>> +    exp_info.size = size;
>> +    exp_info.ops = &amdxdna_cbuf_dmabuf_ops;
>> +    exp_info.priv = cbuf;
>> +    exp_info.flags = O_RDWR;
>> +    dbuf = dma_buf_export(&exp_info);
>> +    if (IS_ERR(dbuf)) {
>> +        ret = PTR_ERR(dbuf);
>> +        goto remove_node;
>> +    }
>> +
>> +    ret = amdxdna_cbuf_clear(dbuf);
>> +    if (ret) {
>> +        dma_buf_put(dbuf);
>> +        goto out;
>> +    }
>> +    return dbuf;
>> +
>> +remove_node:
>> +    drm_mm_remove_node(&cbuf->node);
>> +free_cbuf:
>> +    kfree(cbuf);
>> +out:
>> +    return ERR_PTR(ret);
>> +}
>> diff --git a/drivers/accel/amdxdna/amdxdna_cbuf.h 
>> b/drivers/accel/amdxdna/amdxdna_cbuf.h
>> new file mode 100644
>> index 000000000000..15e189ce779e
>> --- /dev/null
>> +++ b/drivers/accel/amdxdna/amdxdna_cbuf.h
>> @@ -0,0 +1,16 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2026, Advanced Micro Devices, Inc.
>> + */
>> +#ifndef _AMDXDNA_CBUF_H_
>> +#define _AMDXDNA_CBUF_H_
>> +
>> +#include <drm/drm_device.h>
>> +#include <linux/dma-buf.h>
>> +
>> +bool amdxdna_use_carveout(void);
>> +void amdxdna_carveout_init(void);
>> +void amdxdna_carveout_fini(void);
>> +struct dma_buf *amdxdna_get_cbuf(struct drm_device *dev, size_t 
>> size, u64 alignment);
>> +
>> +#endif
>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c 
>> b/drivers/accel/amdxdna/amdxdna_gem.c
>> index 238ee244d4a6..905514ec183c 100644
>> --- a/drivers/accel/amdxdna/amdxdna_gem.c
>> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
>> @@ -16,6 +16,7 @@
>>   #include <linux/pagemap.h>
>>   #include <linux/vmalloc.h>
>>   +#include "amdxdna_cbuf.h"
>>   #include "amdxdna_ctx.h"
>>   #include "amdxdna_gem.h"
>>   #include "amdxdna_pci_drv.h"
>> @@ -516,10 +517,6 @@ static void amdxdna_imported_obj_free(struct 
>> amdxdna_gem_obj *abo)
>>   static inline bool
>>   amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
>>   {
>> -    /* Do not count imported BOs since the buffer is not allocated 
>> by us. */
>> -    if (is_import_bo(abo))
>> -        return true;
>> -
>>       /* Already counted as part of HEAP BO */
>>       if (abo->type == AMDXDNA_BO_DEV)
>>           return true;
>> @@ -571,9 +568,7 @@ static void amdxdna_gem_obj_free(struct 
>> drm_gem_object *gobj)
>>       if (abo->type == AMDXDNA_BO_DEV_HEAP)
>>           drm_mm_takedown(&abo->mm);
>>   -    if (amdxdna_iova_on(xdna))
>> -        amdxdna_iommu_unmap_bo(xdna, abo);
>> -
>> +    amdxdna_dma_unmap_bo(xdna, abo);
>>       amdxdna_gem_vunmap(abo);
>>       mutex_destroy(&abo->lock);
>>   @@ -591,18 +586,20 @@ static int amdxdna_gem_obj_open(struct 
>> drm_gem_object *gobj, struct drm_file *fi
>>         guard(mutex)(&abo->lock);
>>       abo->open_ref++;
>> +    if (abo->open_ref > 1)
>> +        return 0;
>>   -    if (abo->open_ref == 1) {
>> -        /* Attached to the client when first opened by it. */
>> -        abo->client = filp->driver_priv;
>> -        amdxdna_gem_add_bo_usage(abo);
>> -    }
>> -    if (amdxdna_iova_on(xdna)) {
>> -        ret = amdxdna_iommu_map_bo(xdna, abo);
>> +    /* Attached to the client when first opened by it. */
>> +    abo->client = filp->driver_priv;
>> +
>> +    /* No need to set up dma addr mapping in PASID mode. */
>> +    if (!amdxdna_pasid_on(abo->client)) {
>> +        ret = amdxdna_dma_map_bo(xdna, abo);
>>           if (ret)
>>               return ret;
>>       }
>>   +    amdxdna_gem_add_bo_usage(abo);
>>       return 0;
>>   }
>>   @@ -620,6 +617,39 @@ static void amdxdna_gem_obj_close(struct 
>> drm_gem_object *gobj, struct drm_file *
>>       }
>>   }
>>   +static int amdxdna_gem_obj_vmap(struct drm_gem_object *obj, struct 
>> iosys_map *map)
>> +{
>> +    struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
>> +    int ret;
>> +
>> +    iosys_map_clear(map);
>> +
>> +    dma_resv_assert_held(obj->resv);
>> +
>> +    if (is_import_bo(abo))
>> +        ret = dma_buf_vmap(abo->dma_buf, map);
>> +    else
>> +        ret = drm_gem_shmem_object_vmap(obj, map);
>> +    if (ret)
>> +        return ret;
>> +    if (!map->vaddr)
>> +        return -ENOMEM;
>> +
>> +    return 0;
>> +}
>> +
>> +static void amdxdna_gem_obj_vunmap(struct drm_gem_object *obj, 
>> struct iosys_map *map)
>> +{
>> +    struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
>> +
>> +    dma_resv_assert_held(obj->resv);
>> +
>> +    if (is_import_bo(abo))
>> +        dma_buf_vunmap(abo->dma_buf, map);
>> +    else
>> +        drm_gem_shmem_object_vunmap(obj, map);
>> +}
>> +
>>   static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, 
>> struct iosys_map *map)
>>   {
>>       struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
>> @@ -645,8 +675,8 @@ static const struct drm_gem_object_funcs 
>> amdxdna_gem_shmem_funcs = {
>>       .pin = drm_gem_shmem_object_pin,
>>       .unpin = drm_gem_shmem_object_unpin,
>>       .get_sg_table = drm_gem_shmem_object_get_sg_table,
>> -    .vmap = drm_gem_shmem_object_vmap,
>> -    .vunmap = drm_gem_shmem_object_vunmap,
>> +    .vmap = amdxdna_gem_obj_vmap,
>> +    .vunmap = amdxdna_gem_obj_vunmap,
>>       .mmap = amdxdna_gem_obj_mmap,
>>       .vm_ops = &drm_gem_shmem_vm_ops,
>>       .export = amdxdna_gem_prime_export,
>> @@ -714,6 +744,36 @@ amdxdna_gem_create_ubuf_object(struct drm_device 
>> *dev, struct amdxdna_drm_create
>>       return to_xdna_obj(gobj);
>>   }
>>   +static struct amdxdna_gem_obj *
>> +amdxdna_gem_create_cbuf_object(struct drm_device *dev, struct 
>> amdxdna_drm_create_bo *args)
>> +{
>> +    struct amdxdna_dev *xdna = to_xdna_dev(dev);
>> +    size_t size = PAGE_ALIGN(args->size);
>> +    struct drm_gem_object *gobj;
>> +    struct amdxdna_gem_obj *ret;
>> +    struct dma_buf *dma_buf;
>> +    u64 align;
>> +
>> +    if (!size) {
>> +        XDNA_ERR(xdna, "Invalid BO size 0x%llx", args->size);
>> +        return ERR_PTR(-EINVAL);
>> +    }
>> +
>> +    align = (args->type == AMDXDNA_BO_DEV_HEAP) ? 
>> xdna->dev_info->dev_mem_size : 0;
>> +    dma_buf = amdxdna_get_cbuf(dev, size, align);
>> +    if (IS_ERR(dma_buf))
>> +        return ERR_CAST(dma_buf);
>> +
>> +    gobj = amdxdna_gem_prime_import(dev, dma_buf);
>> +    if (IS_ERR(gobj))
>> +        ret = ERR_CAST(gobj);
>> +    else
>> +        ret = to_xdna_obj(gobj);
>> +
>> +    dma_buf_put(dma_buf);
>> +    return ret;
>> +}
>> +
>>   struct drm_gem_object *
>>   amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf 
>> *dma_buf)
>>   {
>> @@ -769,6 +829,8 @@ amdxdna_drm_create_share_bo(struct drm_device *dev,
>>         if (args->vaddr)
>>           abo = amdxdna_gem_create_ubuf_object(dev, args);
>> +    else if (amdxdna_use_carveout())
>> +        abo = amdxdna_gem_create_cbuf_object(dev, args);
>>       else
>>           abo = amdxdna_gem_create_shmem_object(dev, args);
>>       if (IS_ERR(abo))
>> @@ -884,7 +946,6 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device 
>> *dev, void *data, struct drm_f
>>            args->type, args->vaddr, args->size, args->flags);
>>       switch (args->type) {
>>       case AMDXDNA_BO_CMD:
>> -        fallthrough;
>>       case AMDXDNA_BO_SHARE:
>>           abo = amdxdna_drm_create_share_bo(dev, args, filp);
>>           break;
>> diff --git a/drivers/accel/amdxdna/amdxdna_iommu.c 
>> b/drivers/accel/amdxdna/amdxdna_iommu.c
>> index 5a9f06183487..eff00131d0f8 100644
>> --- a/drivers/accel/amdxdna/amdxdna_iommu.c
>> +++ b/drivers/accel/amdxdna/amdxdna_iommu.c
>> @@ -35,14 +35,15 @@ static struct iova 
>> *amdxdna_iommu_alloc_iova(struct amdxdna_dev *xdna,
>>       return iova;
>>   }
>>   -int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct 
>> amdxdna_gem_obj *abo)
>> +int amdxdna_dma_map_bo(struct amdxdna_dev *xdna, struct 
>> amdxdna_gem_obj *abo)
>>   {
>> +    unsigned long contig_sz;
>>       struct sg_table *sgt;
>>       dma_addr_t dma_addr;
>>       struct iova *iova;
>>       ssize_t size;
>>   -    if (abo->type != AMDXDNA_BO_DEV_HEAP && abo->type != 
>> AMDXDNA_BO_SHMEM)
>> +    if (abo->type != AMDXDNA_BO_DEV_HEAP && abo->type != 
>> AMDXDNA_BO_SHARE)
>>           return 0;
>>         sgt = drm_gem_shmem_get_pages_sgt(&abo->base);
>> @@ -51,47 +52,63 @@ int amdxdna_iommu_map_bo(struct amdxdna_dev 
>> *xdna, struct amdxdna_gem_obj *abo)
>>           return PTR_ERR(sgt);
>>       }
>>   -    if (!sgt->orig_nents || !sg_page(sgt->sgl)) {
>> -        XDNA_ERR(xdna, "sgl is zero length or not page backed");
>> +    if (!sgt->orig_nents) {
>> +        XDNA_ERR(xdna, "sgl is zero length");
>>           return -EOPNOTSUPP;
>>       }
>>   -    iova = amdxdna_iommu_alloc_iova(xdna, abo->mem.size, &dma_addr,
>> -                    (abo->type == AMDXDNA_BO_DEV_HEAP));
>> -    if (IS_ERR(iova)) {
>> -        XDNA_ERR(xdna, "Alloc iova failed, ret %ld", PTR_ERR(iova));
>> -        return PTR_ERR(iova);
>> +    if (amdxdna_iova_on(xdna)) {
>> +        if (!sg_page(sgt->sgl)) {
>> +            XDNA_ERR(xdna, "sgl is not page backed");
>> +            return -EOPNOTSUPP;
>> +        }
>> +
>> +        iova = amdxdna_iommu_alloc_iova(xdna, abo->mem.size, &dma_addr,
>> +                        (abo->type == AMDXDNA_BO_DEV_HEAP));
>> +        if (IS_ERR(iova)) {
>> +            XDNA_ERR(xdna, "Alloc iova failed, ret %ld", 
>> PTR_ERR(iova));
>> +            return PTR_ERR(iova);
>> +        }
>> +
>> +        size = iommu_map_sgtable(xdna->domain, dma_addr, sgt,
>> +                     IOMMU_READ | IOMMU_WRITE);
>> +        if (size < 0) {
>> +            XDNA_ERR(xdna, "iommu_map_sgtable failed: %zd", size);
>> +            __free_iova(&xdna->iovad, iova);
>> +            return size;
>> +        }
>> +        if (size < abo->mem.size) {
>> +            iommu_unmap(xdna->domain, dma_addr, size);
>> +            __free_iova(&xdna->iovad, iova);
>> +            return -ENXIO;
>> +        }
>> +        abo->mem.dma_addr = dma_addr;
>> +    } else {
>> +        /* Device doesn't support scatter/gather list, fail 
>> non-contiguous mapping. */
>> +        contig_sz = drm_prime_get_contiguous_size(sgt);
>> +        if (contig_sz < abo->mem.size) {
>> +            XDNA_ERR(xdna,
>> +                 "noncontiguous dma addr, contig size:%ld, expected 
>> size:%ld",
>> +                 contig_sz, abo->mem.size);
>> +            return -EINVAL;
>> +        }
>> +        abo->mem.dma_addr = sg_dma_address(sgt->sgl);
>>       }
>> -
>> -    size = iommu_map_sgtable(xdna->domain, dma_addr, sgt,
>> -                 IOMMU_READ | IOMMU_WRITE);
>> -    if (size < 0) {
>> -        XDNA_ERR(xdna, "iommu_map_sgtable failed: %zd", size);
>> -        __free_iova(&xdna->iovad, iova);
>> -        return size;
>> -    }
>> -
>> -    if (size < abo->mem.size) {
>> -        iommu_unmap(xdna->domain, dma_addr, size);
>> -        __free_iova(&xdna->iovad, iova);
>> -        return -ENXIO;
>> -    }
>> -
>> -    abo->mem.dma_addr = dma_addr;
>> -
>>       return 0;
>>   }
>>   -void amdxdna_iommu_unmap_bo(struct amdxdna_dev *xdna, struct 
>> amdxdna_gem_obj *abo)
>> +void amdxdna_dma_unmap_bo(struct amdxdna_dev *xdna, struct 
>> amdxdna_gem_obj *abo)
>>   {
>>       size_t size;
>>         if (abo->mem.dma_addr == AMDXDNA_INVALID_ADDR)
>>           return;
>>   -    size = iova_align(&xdna->iovad, abo->mem.size);
>> -    iommu_unmap(xdna->domain, abo->mem.dma_addr, size);
>> -    free_iova(&xdna->iovad, iova_pfn(&xdna->iovad, abo->mem.dma_addr));
>> +    if (amdxdna_iova_on(xdna)) {
>> +        size = iova_align(&xdna->iovad, abo->mem.size);
>> +        iommu_unmap(xdna->domain, abo->mem.dma_addr, size);
>> +        free_iova(&xdna->iovad, iova_pfn(&xdna->iovad, 
>> abo->mem.dma_addr));
>> +    }
>>       abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
>>   }
>>   diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c 
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> index 21eddfc538d0..b8c5dbc12489 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> @@ -14,6 +14,7 @@
>>   #include <linux/iommu.h>
>>   #include <linux/pci.h>
>>   +#include "amdxdna_cbuf.h"
>>   #include "amdxdna_ctx.h"
>>   #include "amdxdna_gem.h"
>>   #include "amdxdna_pci_drv.h"
>> @@ -67,11 +68,40 @@ static const struct amdxdna_device_id 
>> amdxdna_ids[] = {
>>       {0}
>>   };
>>   +static int amdxdna_sva_init(struct amdxdna_client *client)
>> +{
>> +    struct amdxdna_dev *xdna = client->xdna;
>> +
>> +    client->sva = iommu_sva_bind_device(xdna->ddev.dev, client->mm);
>> +    if (IS_ERR(client->sva)) {
>> +        XDNA_ERR(xdna, "SVA bind device failed, ret %ld", 
>> PTR_ERR(client->sva));
>> +        return PTR_ERR(client->sva);
>> +    }
>> +
>> +    client->pasid = iommu_sva_get_pasid(client->sva);
>> +    if (client->pasid == IOMMU_PASID_INVALID) {
>> +        iommu_sva_unbind_device(client->sva);
>> +        XDNA_ERR(xdna, "SVA get pasid failed");
>> +        return -ENODEV;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void amdxdna_sva_fini(struct amdxdna_client *client)
>> +{
>> +    if (IS_ERR_OR_NULL(client->sva))
>> +        return;
>> +
>> +    iommu_sva_unbind_device(client->sva);
>> +    client->sva = NULL;
>> +    client->pasid = IOMMU_PASID_INVALID;
>> +}
>> +
>>   static int amdxdna_drm_open(struct drm_device *ddev, struct 
>> drm_file *filp)
>>   {
>>       struct amdxdna_dev *xdna = to_xdna_dev(ddev);
>>       struct amdxdna_client *client;
>> -    int ret;
>>         client = kzalloc_obj(*client);
>>       if (!client)
>> @@ -80,22 +110,13 @@ static int amdxdna_drm_open(struct drm_device 
>> *ddev, struct drm_file *filp)
>>       client->pid = pid_nr(rcu_access_pointer(filp->pid));
>>       client->xdna = xdna;
>>       client->pasid = IOMMU_PASID_INVALID;
>> +    client->mm = current->mm;
>>         if (!amdxdna_iova_on(xdna)) {
>> -        client->sva = iommu_sva_bind_device(xdna->ddev.dev, 
>> current->mm);
>> -        if (IS_ERR(client->sva)) {
>> -            ret = PTR_ERR(client->sva);
>> -            XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret);
>> -            goto failed;
>> -        }
>> -        client->pasid = iommu_sva_get_pasid(client->sva);
>> -        if (client->pasid == IOMMU_PASID_INVALID) {
>> -            XDNA_ERR(xdna, "SVA get pasid failed");
>> -            ret = -ENODEV;
>> -            goto unbind_sva;
>> -        }
>> +        /* No need to fail open since user may use pa + carveout 
>> later. */
>> +        if (amdxdna_sva_init(client))
>> +            XDNA_WARN(xdna, "PASID not available for pid %d", 
>> client->pid);
>>       }
>> -    client->mm = current->mm;
>>       mmgrab(client->mm);
>>       init_srcu_struct(&client->hwctx_srcu);
>>       xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
>> @@ -110,14 +131,6 @@ static int amdxdna_drm_open(struct drm_device 
>> *ddev, struct drm_file *filp)
>>         XDNA_DBG(xdna, "pid %d opened", client->pid);
>>       return 0;
>> -
>> -unbind_sva:
>> -    if (!IS_ERR_OR_NULL(client->sva))
>> -        iommu_sva_unbind_device(client->sva);
>> -failed:
>> -    kfree(client);
>> -
>> -    return ret;
>>   }
>>     static void amdxdna_client_cleanup(struct amdxdna_client *client)
>> @@ -131,11 +144,8 @@ static void amdxdna_client_cleanup(struct 
>> amdxdna_client *client)
>>           drm_gem_object_put(to_gobj(client->dev_heap));
>>         mutex_destroy(&client->mm_lock);
>> -
>> -    if (!IS_ERR_OR_NULL(client->sva))
>> -        iommu_sva_unbind_device(client->sva);
>>       mmdrop(client->mm);
>> -
>> +    amdxdna_sva_fini(client);
>>       kfree(client);
>>   }
>>   @@ -242,15 +252,17 @@ static void amdxdna_show_fdinfo(struct 
>> drm_printer *p, struct drm_file *filp)
>>         /*
>>        * Note for driver specific BO memory usage stat.
>> -     * Total memory alloc = amdxdna-internal-alloc + 
>> amdxdna-external-alloc
>> +     * Total memory in use = amdxdna-internal-alloc + 
>> amdxdna-external-alloc, which
>> +     * includes both imported and created BOs. To avoid double 
>> counts, it includes
>> +     * HEAP BO, but not DEV BO. DEV BO is counted by 
>> amdxdna-heap-alloc.
>>        */
>>       drm_fdinfo_print_size(p, drv_name, "heap", "alloc", heap_usage);
>>       drm_fdinfo_print_size(p, drv_name, "internal", "alloc", 
>> internal_usage);
>>       drm_fdinfo_print_size(p, drv_name, "external", "alloc", 
>> external_usage);
>>       /*
>>        * Note for DRM standard BO memory stat.
>> -     * drm-total-memory counts both DEV BO and HEAP BO
>> -     * drm-shared-memory counts BO imported
>> +     * drm-total-memory counts both DEV BO and HEAP BO. The DEV BO 
>> size is double counted.
>> +     * drm-shared-memory counts BO shared with other processes/devices.
>>        */
>>       drm_show_memory_stats(p, filp);
>>   }
>> @@ -420,7 +432,26 @@ static struct pci_driver amdxdna_pci_driver = {
>>       .sriov_configure = amdxdna_sriov_configure,
>>   };
>>   -module_pci_driver(amdxdna_pci_driver);
>> +static int __init amdxdna_mod_init(void)
>> +{
>> +    int ret;
>> +
>> +    amdxdna_carveout_init();
>> +    ret = pci_register_driver(&amdxdna_pci_driver);
>> +    if (ret)
>> +        amdxdna_carveout_fini();
>> +
>> +    return ret;
>> +}
>> +
>> +static void __exit amdxdna_mod_exit(void)
>> +{
>> +    pci_unregister_driver(&amdxdna_pci_driver);
>> +    amdxdna_carveout_fini();
>> +}
>> +
>> +module_init(amdxdna_mod_init);
>> +module_exit(amdxdna_mod_exit);
>>     MODULE_LICENSE("GPL");
>>   MODULE_IMPORT_NS("AMD_PMF");
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h 
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> index bdd0dc83f92e..07bd38281452 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> @@ -172,11 +172,11 @@ void amdxdna_sysfs_fini(struct amdxdna_dev *xdna);
>>     int amdxdna_iommu_init(struct amdxdna_dev *xdna);
>>   void amdxdna_iommu_fini(struct amdxdna_dev *xdna);
>> -int amdxdna_iommu_map_bo(struct amdxdna_dev *xdna, struct 
>> amdxdna_gem_obj *abo);
>> -void amdxdna_iommu_unmap_bo(struct amdxdna_dev *xdna, struct 
>> amdxdna_gem_obj *abo);
>>   void *amdxdna_iommu_alloc(struct amdxdna_dev *xdna, size_t size, 
>> dma_addr_t *dma_addr);
>>   void amdxdna_iommu_free(struct amdxdna_dev *xdna, size_t size,
>>               void *cpu_addr, dma_addr_t dma_addr);
>> +int amdxdna_dma_map_bo(struct amdxdna_dev *xdna, struct 
>> amdxdna_gem_obj *abo);
>> +void amdxdna_dma_unmap_bo(struct amdxdna_dev *xdna, struct 
>> amdxdna_gem_obj *abo);
>>     static inline bool amdxdna_iova_on(struct amdxdna_dev *xdna)
>>   {
>