[PATCH V1] accel/amdxdna: Add per-process BO memory usage query support

Lizhi Hou posted 1 patch 1 week, 2 days ago
drivers/accel/amdxdna/aie2_pci.c        |   4 +
drivers/accel/amdxdna/amdxdna_gem.c     | 134 ++++++++++++++++++++++--
drivers/accel/amdxdna/amdxdna_gem.h     |   7 +-
drivers/accel/amdxdna/amdxdna_pci_drv.c |   6 +-
drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +
include/uapi/drm/amdxdna_accel.h        |  35 +++++++
6 files changed, 177 insertions(+), 13 deletions(-)
[PATCH V1] accel/amdxdna: Add per-process BO memory usage query support
Posted by Lizhi Hou 1 week, 2 days ago
From: Max Zhen <max.zhen@amd.com>

Add support for querying per-process buffer object (BO) memory
usage through the amdxdna GET_ARRAY UAPI.

Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
struct amdxdna_drm_bo_usage to report BO memory usage statistics,
including heap, total, and internal usage.

Track BO memory usage on a per-client basis by maintaining counters
in GEM open/close and heap allocation/free paths. This ensures the
reported statistics reflect the current memory footprint of each
process.

Wire the new query into the GET_ARRAY implementation to expose
the usage information to userspace.

Signed-off-by: Max Zhen <max.zhen@amd.com>
Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_pci.c        |   4 +
 drivers/accel/amdxdna/amdxdna_gem.c     | 134 ++++++++++++++++++++++--
 drivers/accel/amdxdna/amdxdna_gem.h     |   7 +-
 drivers/accel/amdxdna/amdxdna_pci_drv.c |   6 +-
 drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +
 include/uapi/drm/amdxdna_accel.h        |  35 +++++++
 6 files changed, 177 insertions(+), 13 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 9e39bfe75971..f1ac4e00bd9f 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -865,6 +865,7 @@ static int aie2_hwctx_status_cb(struct amdxdna_hwctx *hwctx, void *arg)
 	tmp->command_submissions = hwctx->priv->seq;
 	tmp->command_completions = hwctx->priv->completed;
 	tmp->pasid = hwctx->client->pasid;
+	tmp->heap_usage = hwctx->client->heap_usage;
 	tmp->priority = hwctx->qos.priority;
 	tmp->gops = hwctx->qos.gops;
 	tmp->fps = hwctx->qos.fps;
@@ -1148,6 +1149,9 @@ static int aie2_get_array(struct amdxdna_client *client,
 	case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
 		ret = aie2_get_array_async_error(xdna->dev_handle, args);
 		break;
+	case DRM_AMDXDNA_BO_USAGE:
+		ret = amdxdna_drm_get_bo_usage(&xdna->ddev, args);
+		break;
 	default:
 		XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
 		ret = -EOPNOTSUPP;
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index 27712704e42d..238ee244d4a6 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -63,6 +63,8 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
 		goto unlock_out;
 	}
 
+	client->heap_usage += mem->size;
+
 	drm_gem_object_get(to_gobj(heap));
 
 unlock_out:
@@ -74,16 +76,17 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
 static void
 amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
 {
+	struct amdxdna_client *client = abo->client;
 	struct amdxdna_gem_obj *heap;
 
-	mutex_lock(&abo->client->mm_lock);
+	mutex_lock(&client->mm_lock);
 
 	drm_mm_remove_node(&abo->mm_node);
-
-	heap = abo->client->dev_heap;
+	client->heap_usage -= abo->mem.size;
+	heap = client->dev_heap;
 	drm_gem_object_put(to_gobj(heap));
 
-	mutex_unlock(&abo->client->mm_lock);
+	mutex_unlock(&client->mm_lock);
 }
 
 static struct amdxdna_gem_obj *
@@ -102,6 +105,8 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size)
 	abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
 	abo->mem.uva = AMDXDNA_INVALID_ADDR;
 	abo->mem.size = size;
+	abo->open_ref = 0;
+	abo->internal = false;
 	INIT_LIST_HEAD(&abo->mem.umap_list);
 
 	return abo;
@@ -508,13 +513,55 @@ static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
 	kfree(abo);
 }
 
+static inline bool
+amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
+{
+	/* Do not count imported BOs since the buffer is not allocated by us. */
+	if (is_import_bo(abo))
+		return true;
+
+	/* Already counted as part of HEAP BO */
+	if (abo->type == AMDXDNA_BO_DEV)
+		return true;
+
+	return false;
+}
+
+static void
+amdxdna_gem_add_bo_usage(struct amdxdna_gem_obj *abo)
+{
+	struct amdxdna_client *client = abo->client;
+
+	if (amdxdna_gem_skip_bo_usage(abo))
+		return;
+
+	guard(mutex)(&client->mm_lock);
+
+	client->total_bo_usage += abo->mem.size;
+	if (abo->internal)
+		client->total_int_bo_usage += abo->mem.size;
+}
+
+static void
+amdxdna_gem_del_bo_usage(struct amdxdna_gem_obj *abo)
+{
+	struct amdxdna_client *client = abo->client;
+
+	if (amdxdna_gem_skip_bo_usage(abo))
+		return;
+
+	guard(mutex)(&client->mm_lock);
+
+	client->total_bo_usage -= abo->mem.size;
+	if (abo->internal)
+		client->total_int_bo_usage -= abo->mem.size;
+}
+
 static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
 {
 	struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
 	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
 
-	XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, amdxdna_gem_dev_addr(abo));
-
 	amdxdna_hmm_unregister(abo, NULL);
 	flush_workqueue(xdna->notifier_wq);
 
@@ -543,9 +590,13 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
 	int ret;
 
 	guard(mutex)(&abo->lock);
+	abo->open_ref++;
 
-	if (!abo->client)
+	if (abo->open_ref == 1) {
+		/* Attached to the client when first opened by it. */
 		abo->client = filp->driver_priv;
+		amdxdna_gem_add_bo_usage(abo);
+	}
 	if (amdxdna_iova_on(xdna)) {
 		ret = amdxdna_iommu_map_bo(xdna, abo);
 		if (ret)
@@ -555,6 +606,20 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
 	return 0;
 }
 
+static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file *filp)
+{
+	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+
+	guard(mutex)(&abo->lock);
+	abo->open_ref--;
+
+	if (abo->open_ref == 0) {
+		amdxdna_gem_del_bo_usage(abo);
+		/* Detach from the client when last closed by it. */
+		abo->client = NULL;
+	}
+}
+
 static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
 {
 	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
@@ -575,6 +640,7 @@ static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
 static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
 	.free = amdxdna_gem_obj_free,
 	.open = amdxdna_gem_obj_open,
+	.close = amdxdna_gem_obj_close,
 	.print_info = drm_gem_shmem_object_print_info,
 	.pin = drm_gem_shmem_object_pin,
 	.unpin = drm_gem_shmem_object_unpin,
@@ -708,10 +774,13 @@ amdxdna_drm_create_share_bo(struct drm_device *dev,
 	if (IS_ERR(abo))
 		return ERR_CAST(abo);
 
-	if (args->type == AMDXDNA_BO_DEV_HEAP)
+	if (args->type == AMDXDNA_BO_DEV_HEAP) {
 		abo->type = AMDXDNA_BO_DEV_HEAP;
-	else
+		abo->internal = true;
+	} else {
 		abo->type = AMDXDNA_BO_SHARE;
+		abo->internal = args->type == AMDXDNA_BO_CMD;
+	}
 
 	return abo;
 }
@@ -783,6 +852,11 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev,
 	gobj = to_gobj(abo);
 	gobj->funcs = &amdxdna_gem_dev_obj_funcs;
 	abo->type = AMDXDNA_BO_DEV;
+	abo->internal = true;
+	/*
+	 * DEV BOs cannot be alive when client is gone, it's OK to
+	 * always establish the connection.
+	 */
 	abo->client = client;
 
 	ret = amdxdna_gem_heap_alloc(abo);
@@ -826,7 +900,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
 	if (IS_ERR(abo))
 		return PTR_ERR(abo);
 
-	/* ready to publish object to userspace */
+	/* Ready to publish object to userspace and count for BO usage. */
 	ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
 	if (ret) {
 		XDNA_ERR(xdna, "Create handle failed");
@@ -986,3 +1060,43 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
 	drm_gem_object_put(gobj);
 	return ret;
 }
+
+int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args)
+{
+	size_t min_sz = min(args->element_size, sizeof(struct amdxdna_drm_bo_usage));
+	char __user *buf = u64_to_user_ptr(args->buffer);
+	struct amdxdna_dev *xdna = to_xdna_dev(dev);
+	struct amdxdna_client *tmp_client;
+	struct amdxdna_drm_bo_usage tmp;
+
+	drm_WARN_ON(dev, !mutex_is_locked(&xdna->dev_lock));
+
+	if (args->num_element != 1)
+		return -EINVAL;
+
+	if (copy_from_user(&tmp, buf, min_sz))
+		return -EFAULT;
+
+	if (!tmp.pid)
+		return -EINVAL;
+
+	tmp.total_usage = 0;
+	tmp.internal_usage = 0;
+	tmp.heap_usage = 0;
+
+	list_for_each_entry(tmp_client, &xdna->client_list, node) {
+		if (tmp.pid != tmp_client->pid)
+			continue;
+
+		mutex_lock(&tmp_client->mm_lock);
+		tmp.total_usage += tmp_client->total_bo_usage;
+		tmp.internal_usage += tmp_client->total_int_bo_usage;
+		tmp.heap_usage += tmp_client->heap_usage;
+		mutex_unlock(&tmp_client->mm_lock);
+	}
+
+	if (copy_to_user(buf, &tmp, min_sz))
+		return -EFAULT;
+
+	return 0;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
index a77d9344f8a4..4fc48a1189d2 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.h
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -41,8 +41,9 @@ struct amdxdna_gem_obj {
 	struct amdxdna_client		*client;
 	u8				type;
 	bool				pinned;
-	struct mutex			lock; /* Protects: pinned, mem.kva */
+	struct mutex			lock; /* Protects: pinned, mem.kva, open_ref */
 	struct amdxdna_mem		mem;
+	int				open_ref;
 
 	/* Below members are initialized when needed */
 	struct drm_mm			mm; /* For AMDXDNA_BO_DEV_HEAP */
@@ -50,6 +51,9 @@ struct amdxdna_gem_obj {
 	u32				assigned_hwctx;
 	struct dma_buf			*dma_buf;
 	struct dma_buf_attachment	*attach;
+
+	/* True, if BO is managed by XRT, not application */
+	bool				internal;
 };
 
 #define to_gobj(obj)    (&(obj)->base.base)
@@ -98,5 +102,6 @@ void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
 int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args);
 
 #endif /* _AMDXDNA_GEM_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index d83be00daf2b..b50a7d1f8a11 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -36,9 +36,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
  * 0.5: Support getting telemetry data
  * 0.6: Support preemption
  * 0.7: Support getting power and utilization data
+ * 0.8: Support BO usage query
  */
 #define AMDXDNA_DRIVER_MAJOR		0
-#define AMDXDNA_DRIVER_MINOR		7
+#define AMDXDNA_DRIVER_MINOR		8
 
 /*
  * Bind the driver base on (vendor_id, device_id) pair and later use the
@@ -120,11 +121,12 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client)
 	amdxdna_hwctx_remove_all(client);
 	xa_destroy(&client->hwctx_xa);
 	cleanup_srcu_struct(&client->hwctx_srcu);
-	mutex_destroy(&client->mm_lock);
 
 	if (client->dev_heap)
 		drm_gem_object_put(to_gobj(client->dev_heap));
 
+	mutex_destroy(&client->mm_lock);
+
 	if (!IS_ERR_OR_NULL(client->sva))
 		iommu_sva_unbind_device(client->sva);
 	mmdrop(client->mm);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index e91d14ae5190..0661749917d6 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -138,6 +138,10 @@ struct amdxdna_client {
 	struct iommu_sva		*sva;
 	int				pasid;
 	struct mm_struct		*mm;
+
+	size_t				heap_usage;
+	size_t				total_bo_usage;
+	size_t				total_int_bo_usage;
 };
 
 #define amdxdna_for_each_hwctx(client, hwctx_id, entry)		\
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index bddaaaf945cf..61d3686fa3b1 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -591,8 +591,37 @@ struct amdxdna_async_error {
 	__u64 ex_err_code;
 };
 
+/**
+ * struct amdxdna_drm_bo_usage - all types of BO usage
+ * BOs managed by XRT/SHIM/driver is counted as internal.
+ * Others are counted as external which are managed by applications.
+ *
+ * Among all types of BOs:
+ *   AMDXDNA_BO_DEV_HEAP - is counted for internal.
+ *   AMDXDNA_BO_SHARE    - is counted for external.
+ *   AMDXDNA_BO_CMD      - is counted for internal.
+ *   AMDXDNA_BO_DEV      - is counted by heap_usage only, not internal
+ *                         or external. It does not add to the total memory
+ *                         footprint since its mem comes from heap which is
+ *                         already counted as internal.
+ */
+struct amdxdna_drm_bo_usage {
+	/** @pid: The ID of the process to query from. */
+	__s64 pid;
+	/** @total_usage: Total BO size used by process. */
+	__u64 total_usage;
+	/** @internal_usage: Total internal BO size used by process. */
+	__u64 internal_usage;
+	/** @heap_usage: Total device BO size used by process. */
+	__u64 heap_usage;
+};
+
+/*
+ * Supported params in struct amdxdna_drm_get_array
+ */
 #define DRM_AMDXDNA_HW_CONTEXT_ALL	0
 #define DRM_AMDXDNA_HW_LAST_ASYNC_ERR	2
+#define DRM_AMDXDNA_BO_USAGE		6
 
 /**
  * struct amdxdna_drm_get_array - Get information array.
@@ -605,6 +634,12 @@ struct amdxdna_drm_get_array {
 	 *
 	 * %DRM_AMDXDNA_HW_CONTEXT_ALL:
 	 * Returns all created hardware contexts.
+	 *
+	 * %DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
+	 * Returns last async error.
+	 *
+	 * %DRM_AMDXDNA_BO_USAGE:
+	 * Returns usage of heap/internal/external BOs.
 	 */
 	__u32 param;
 	/**
-- 
2.34.1
Re: [PATCH V1] accel/amdxdna: Add per-process BO memory usage query support
Posted by Mario Limonciello 1 week, 2 days ago

On 3/24/26 11:31, Lizhi Hou wrote:
> From: Max Zhen <max.zhen@amd.com>
> 
> Add support for querying per-process buffer object (BO) memory
> usage through the amdxdna GET_ARRAY UAPI.
> 
> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
> struct amdxdna_drm_bo_usage to report BO memory usage statistics,
> including heap, total, and internal usage.
> 
> Track BO memory usage on a per-client basis by maintaining counters
> in GEM open/close and heap allocation/free paths. This ensures the
> reported statistics reflect the current memory footprint of each
> process.
> 
> Wire the new query into the GET_ARRAY implementation to expose
> the usage information to userspace.
> 
> Signed-off-by: Max Zhen <max.zhen@amd.com>
> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
I'm assuming you also have userspace side ready for this too right?
If you have a link handy can you please include it when committing.

Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>

> ---
>   drivers/accel/amdxdna/aie2_pci.c        |   4 +
>   drivers/accel/amdxdna/amdxdna_gem.c     | 134 ++++++++++++++++++++++--
>   drivers/accel/amdxdna/amdxdna_gem.h     |   7 +-
>   drivers/accel/amdxdna/amdxdna_pci_drv.c |   6 +-
>   drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +
>   include/uapi/drm/amdxdna_accel.h        |  35 +++++++
>   6 files changed, 177 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 9e39bfe75971..f1ac4e00bd9f 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -865,6 +865,7 @@ static int aie2_hwctx_status_cb(struct amdxdna_hwctx *hwctx, void *arg)
>   	tmp->command_submissions = hwctx->priv->seq;
>   	tmp->command_completions = hwctx->priv->completed;
>   	tmp->pasid = hwctx->client->pasid;
> +	tmp->heap_usage = hwctx->client->heap_usage;
>   	tmp->priority = hwctx->qos.priority;
>   	tmp->gops = hwctx->qos.gops;
>   	tmp->fps = hwctx->qos.fps;
> @@ -1148,6 +1149,9 @@ static int aie2_get_array(struct amdxdna_client *client,
>   	case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
>   		ret = aie2_get_array_async_error(xdna->dev_handle, args);
>   		break;
> +	case DRM_AMDXDNA_BO_USAGE:
> +		ret = amdxdna_drm_get_bo_usage(&xdna->ddev, args);
> +		break;
>   	default:
>   		XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
>   		ret = -EOPNOTSUPP;
> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
> index 27712704e42d..238ee244d4a6 100644
> --- a/drivers/accel/amdxdna/amdxdna_gem.c
> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
> @@ -63,6 +63,8 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
>   		goto unlock_out;
>   	}
>   
> +	client->heap_usage += mem->size;
> +
>   	drm_gem_object_get(to_gobj(heap));
>   
>   unlock_out:
> @@ -74,16 +76,17 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
>   static void
>   amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
>   {
> +	struct amdxdna_client *client = abo->client;
>   	struct amdxdna_gem_obj *heap;
>   
> -	mutex_lock(&abo->client->mm_lock);
> +	mutex_lock(&client->mm_lock);
>   
>   	drm_mm_remove_node(&abo->mm_node);
> -
> -	heap = abo->client->dev_heap;
> +	client->heap_usage -= abo->mem.size;
> +	heap = client->dev_heap;
>   	drm_gem_object_put(to_gobj(heap));
>   
> -	mutex_unlock(&abo->client->mm_lock);
> +	mutex_unlock(&client->mm_lock);
>   }
>   
>   static struct amdxdna_gem_obj *
> @@ -102,6 +105,8 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size)
>   	abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
>   	abo->mem.uva = AMDXDNA_INVALID_ADDR;
>   	abo->mem.size = size;
> +	abo->open_ref = 0;
> +	abo->internal = false;
>   	INIT_LIST_HEAD(&abo->mem.umap_list);
>   
>   	return abo;
> @@ -508,13 +513,55 @@ static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
>   	kfree(abo);
>   }
>   
> +static inline bool
> +amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
> +{
> +	/* Do not count imported BOs since the buffer is not allocated by us. */
> +	if (is_import_bo(abo))
> +		return true;
> +
> +	/* Already counted as part of HEAP BO */
> +	if (abo->type == AMDXDNA_BO_DEV)
> +		return true;
> +
> +	return false;
> +}
> +
> +static void
> +amdxdna_gem_add_bo_usage(struct amdxdna_gem_obj *abo)
> +{
> +	struct amdxdna_client *client = abo->client;
> +
> +	if (amdxdna_gem_skip_bo_usage(abo))
> +		return;
> +
> +	guard(mutex)(&client->mm_lock);
> +
> +	client->total_bo_usage += abo->mem.size;
> +	if (abo->internal)
> +		client->total_int_bo_usage += abo->mem.size;
> +}
> +
> +static void
> +amdxdna_gem_del_bo_usage(struct amdxdna_gem_obj *abo)
> +{
> +	struct amdxdna_client *client = abo->client;
> +
> +	if (amdxdna_gem_skip_bo_usage(abo))
> +		return;
> +
> +	guard(mutex)(&client->mm_lock);
> +
> +	client->total_bo_usage -= abo->mem.size;
> +	if (abo->internal)
> +		client->total_int_bo_usage -= abo->mem.size;
> +}
> +
>   static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
>   {
>   	struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
>   	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
>   
> -	XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, amdxdna_gem_dev_addr(abo));
> -
>   	amdxdna_hmm_unregister(abo, NULL);
>   	flush_workqueue(xdna->notifier_wq);
>   
> @@ -543,9 +590,13 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
>   	int ret;
>   
>   	guard(mutex)(&abo->lock);
> +	abo->open_ref++;
>   
> -	if (!abo->client)
> +	if (abo->open_ref == 1) {
> +		/* Attached to the client when first opened by it. */
>   		abo->client = filp->driver_priv;
> +		amdxdna_gem_add_bo_usage(abo);
> +	}
>   	if (amdxdna_iova_on(xdna)) {
>   		ret = amdxdna_iommu_map_bo(xdna, abo);
>   		if (ret)
> @@ -555,6 +606,20 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
>   	return 0;
>   }
>   
> +static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file *filp)
> +{
> +	struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
> +
> +	guard(mutex)(&abo->lock);
> +	abo->open_ref--;
> +
> +	if (abo->open_ref == 0) {
> +		amdxdna_gem_del_bo_usage(abo);
> +		/* Detach from the client when last closed by it. */
> +		abo->client = NULL;
> +	}
> +}
> +
>   static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
>   {
>   	struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
> @@ -575,6 +640,7 @@ static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
>   static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
>   	.free = amdxdna_gem_obj_free,
>   	.open = amdxdna_gem_obj_open,
> +	.close = amdxdna_gem_obj_close,
>   	.print_info = drm_gem_shmem_object_print_info,
>   	.pin = drm_gem_shmem_object_pin,
>   	.unpin = drm_gem_shmem_object_unpin,
> @@ -708,10 +774,13 @@ amdxdna_drm_create_share_bo(struct drm_device *dev,
>   	if (IS_ERR(abo))
>   		return ERR_CAST(abo);
>   
> -	if (args->type == AMDXDNA_BO_DEV_HEAP)
> +	if (args->type == AMDXDNA_BO_DEV_HEAP) {
>   		abo->type = AMDXDNA_BO_DEV_HEAP;
> -	else
> +		abo->internal = true;
> +	} else {
>   		abo->type = AMDXDNA_BO_SHARE;
> +		abo->internal = args->type == AMDXDNA_BO_CMD;
> +	}
>   
>   	return abo;
>   }
> @@ -783,6 +852,11 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev,
>   	gobj = to_gobj(abo);
>   	gobj->funcs = &amdxdna_gem_dev_obj_funcs;
>   	abo->type = AMDXDNA_BO_DEV;
> +	abo->internal = true;
> +	/*
> +	 * DEV BOs cannot be alive when client is gone, it's OK to
> +	 * always establish the connection.
> +	 */
>   	abo->client = client;
>   
>   	ret = amdxdna_gem_heap_alloc(abo);
> @@ -826,7 +900,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
>   	if (IS_ERR(abo))
>   		return PTR_ERR(abo);
>   
> -	/* ready to publish object to userspace */
> +	/* Ready to publish object to userspace and count for BO usage. */
>   	ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
>   	if (ret) {
>   		XDNA_ERR(xdna, "Create handle failed");
> @@ -986,3 +1060,43 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
>   	drm_gem_object_put(gobj);
>   	return ret;
>   }
> +
> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args)
> +{
> +	size_t min_sz = min(args->element_size, sizeof(struct amdxdna_drm_bo_usage));
> +	char __user *buf = u64_to_user_ptr(args->buffer);
> +	struct amdxdna_dev *xdna = to_xdna_dev(dev);
> +	struct amdxdna_client *tmp_client;
> +	struct amdxdna_drm_bo_usage tmp;
> +
> +	drm_WARN_ON(dev, !mutex_is_locked(&xdna->dev_lock));
> +
> +	if (args->num_element != 1)
> +		return -EINVAL;
> +
> +	if (copy_from_user(&tmp, buf, min_sz))
> +		return -EFAULT;
> +
> +	if (!tmp.pid)
> +		return -EINVAL;
> +
> +	tmp.total_usage = 0;
> +	tmp.internal_usage = 0;
> +	tmp.heap_usage = 0;
> +
> +	list_for_each_entry(tmp_client, &xdna->client_list, node) {
> +		if (tmp.pid != tmp_client->pid)
> +			continue;
> +
> +		mutex_lock(&tmp_client->mm_lock);
> +		tmp.total_usage += tmp_client->total_bo_usage;
> +		tmp.internal_usage += tmp_client->total_int_bo_usage;
> +		tmp.heap_usage += tmp_client->heap_usage;
> +		mutex_unlock(&tmp_client->mm_lock);
> +	}
> +
> +	if (copy_to_user(buf, &tmp, min_sz))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
> index a77d9344f8a4..4fc48a1189d2 100644
> --- a/drivers/accel/amdxdna/amdxdna_gem.h
> +++ b/drivers/accel/amdxdna/amdxdna_gem.h
> @@ -41,8 +41,9 @@ struct amdxdna_gem_obj {
>   	struct amdxdna_client		*client;
>   	u8				type;
>   	bool				pinned;
> -	struct mutex			lock; /* Protects: pinned, mem.kva */
> +	struct mutex			lock; /* Protects: pinned, mem.kva, open_ref */
>   	struct amdxdna_mem		mem;
> +	int				open_ref;
>   
>   	/* Below members are initialized when needed */
>   	struct drm_mm			mm; /* For AMDXDNA_BO_DEV_HEAP */
> @@ -50,6 +51,9 @@ struct amdxdna_gem_obj {
>   	u32				assigned_hwctx;
>   	struct dma_buf			*dma_buf;
>   	struct dma_buf_attachment	*attach;
> +
> +	/* True, if BO is managed by XRT, not application */
> +	bool				internal;
>   };
>   
>   #define to_gobj(obj)    (&(obj)->base.base)
> @@ -98,5 +102,6 @@ void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
>   int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
>   int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
>   int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args);
>   
>   #endif /* _AMDXDNA_GEM_H_ */
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index d83be00daf2b..b50a7d1f8a11 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -36,9 +36,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
>    * 0.5: Support getting telemetry data
>    * 0.6: Support preemption
>    * 0.7: Support getting power and utilization data
> + * 0.8: Support BO usage query
>    */
>   #define AMDXDNA_DRIVER_MAJOR		0
> -#define AMDXDNA_DRIVER_MINOR		7
> +#define AMDXDNA_DRIVER_MINOR		8
>   
>   /*
>    * Bind the driver base on (vendor_id, device_id) pair and later use the
> @@ -120,11 +121,12 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client)
>   	amdxdna_hwctx_remove_all(client);
>   	xa_destroy(&client->hwctx_xa);
>   	cleanup_srcu_struct(&client->hwctx_srcu);
> -	mutex_destroy(&client->mm_lock);
>   
>   	if (client->dev_heap)
>   		drm_gem_object_put(to_gobj(client->dev_heap));
>   
> +	mutex_destroy(&client->mm_lock);
> +
>   	if (!IS_ERR_OR_NULL(client->sva))
>   		iommu_sva_unbind_device(client->sva);
>   	mmdrop(client->mm);
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> index e91d14ae5190..0661749917d6 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> @@ -138,6 +138,10 @@ struct amdxdna_client {
>   	struct iommu_sva		*sva;
>   	int				pasid;
>   	struct mm_struct		*mm;
> +
> +	size_t				heap_usage;
> +	size_t				total_bo_usage;
> +	size_t				total_int_bo_usage;
>   };
>   
>   #define amdxdna_for_each_hwctx(client, hwctx_id, entry)		\
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index bddaaaf945cf..61d3686fa3b1 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -591,8 +591,37 @@ struct amdxdna_async_error {
>   	__u64 ex_err_code;
>   };
>   
> +/**
> + * struct amdxdna_drm_bo_usage - all types of BO usage
> + * BOs managed by XRT/SHIM/driver is counted as internal.
> + * Others are counted as external which are managed by applications.
> + *
> + * Among all types of BOs:
> + *   AMDXDNA_BO_DEV_HEAP - is counted for internal.
> + *   AMDXDNA_BO_SHARE    - is counted for external.
> + *   AMDXDNA_BO_CMD      - is counted for internal.
> + *   AMDXDNA_BO_DEV      - is counted by heap_usage only, not internal
> + *                         or external. It does not add to the total memory
> + *                         footprint since its mem comes from heap which is
> + *                         already counted as internal.
> + */
> +struct amdxdna_drm_bo_usage {
> +	/** @pid: The ID of the process to query from. */
> +	__s64 pid;
> +	/** @total_usage: Total BO size used by process. */
> +	__u64 total_usage;
> +	/** @internal_usage: Total internal BO size used by process. */
> +	__u64 internal_usage;
> +	/** @heap_usage: Total device BO size used by process. */
> +	__u64 heap_usage;
> +};
> +
> +/*
> + * Supported params in struct amdxdna_drm_get_array
> + */
>   #define DRM_AMDXDNA_HW_CONTEXT_ALL	0
>   #define DRM_AMDXDNA_HW_LAST_ASYNC_ERR	2
> +#define DRM_AMDXDNA_BO_USAGE		6
>   
>   /**
>    * struct amdxdna_drm_get_array - Get information array.
> @@ -605,6 +634,12 @@ struct amdxdna_drm_get_array {
>   	 *
>   	 * %DRM_AMDXDNA_HW_CONTEXT_ALL:
>   	 * Returns all created hardware contexts.
> +	 *
> +	 * %DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
> +	 * Returns last async error.
> +	 *
> +	 * %DRM_AMDXDNA_BO_USAGE:
> +	 * Returns usage of heap/internal/external BOs.
>   	 */
>   	__u32 param;
>   	/**
Re: [PATCH V1] accel/amdxdna: Add per-process BO memory usage query support
Posted by Lizhi Hou 1 week, 1 day ago
Applied to drm-misc-next.

On 3/24/26 10:01, Mario Limonciello wrote:
>
>
> On 3/24/26 11:31, Lizhi Hou wrote:
>> From: Max Zhen <max.zhen@amd.com>
>>
>> Add support for querying per-process buffer object (BO) memory
>> usage through the amdxdna GET_ARRAY UAPI.
>>
>> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
>> struct amdxdna_drm_bo_usage to report BO memory usage statistics,
>> including heap, total, and internal usage.
>>
>> Track BO memory usage on a per-client basis by maintaining counters
>> in GEM open/close and heap allocation/free paths. This ensures the
>> reported statistics reflect the current memory footprint of each
>> process.
>>
>> Wire the new query into the GET_ARRAY implementation to expose
>> the usage information to userspace.
>>
>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> I'm assuming you also have userspace side ready for this too right?
> If you have a link handy can you please include it when committing.
>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>
>> ---
>>   drivers/accel/amdxdna/aie2_pci.c        |   4 +
>>   drivers/accel/amdxdna/amdxdna_gem.c     | 134 ++++++++++++++++++++++--
>>   drivers/accel/amdxdna/amdxdna_gem.h     |   7 +-
>>   drivers/accel/amdxdna/amdxdna_pci_drv.c |   6 +-
>>   drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +
>>   include/uapi/drm/amdxdna_accel.h        |  35 +++++++
>>   6 files changed, 177 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index 9e39bfe75971..f1ac4e00bd9f 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -865,6 +865,7 @@ static int aie2_hwctx_status_cb(struct 
>> amdxdna_hwctx *hwctx, void *arg)
>>       tmp->command_submissions = hwctx->priv->seq;
>>       tmp->command_completions = hwctx->priv->completed;
>>       tmp->pasid = hwctx->client->pasid;
>> +    tmp->heap_usage = hwctx->client->heap_usage;
>>       tmp->priority = hwctx->qos.priority;
>>       tmp->gops = hwctx->qos.gops;
>>       tmp->fps = hwctx->qos.fps;
>> @@ -1148,6 +1149,9 @@ static int aie2_get_array(struct amdxdna_client 
>> *client,
>>       case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
>>           ret = aie2_get_array_async_error(xdna->dev_handle, args);
>>           break;
>> +    case DRM_AMDXDNA_BO_USAGE:
>> +        ret = amdxdna_drm_get_bo_usage(&xdna->ddev, args);
>> +        break;
>>       default:
>>           XDNA_ERR(xdna, "Not supported request parameter %u", 
>> args->param);
>>           ret = -EOPNOTSUPP;
>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c 
>> b/drivers/accel/amdxdna/amdxdna_gem.c
>> index 27712704e42d..238ee244d4a6 100644
>> --- a/drivers/accel/amdxdna/amdxdna_gem.c
>> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
>> @@ -63,6 +63,8 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
>>           goto unlock_out;
>>       }
>>   +    client->heap_usage += mem->size;
>> +
>>       drm_gem_object_get(to_gobj(heap));
>>     unlock_out:
>> @@ -74,16 +76,17 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
>>   static void
>>   amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
>>   {
>> +    struct amdxdna_client *client = abo->client;
>>       struct amdxdna_gem_obj *heap;
>>   -    mutex_lock(&abo->client->mm_lock);
>> +    mutex_lock(&client->mm_lock);
>>         drm_mm_remove_node(&abo->mm_node);
>> -
>> -    heap = abo->client->dev_heap;
>> +    client->heap_usage -= abo->mem.size;
>> +    heap = client->dev_heap;
>>       drm_gem_object_put(to_gobj(heap));
>>   -    mutex_unlock(&abo->client->mm_lock);
>> +    mutex_unlock(&client->mm_lock);
>>   }
>>     static struct amdxdna_gem_obj *
>> @@ -102,6 +105,8 @@ amdxdna_gem_create_obj(struct drm_device *dev, 
>> size_t size)
>>       abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
>>       abo->mem.uva = AMDXDNA_INVALID_ADDR;
>>       abo->mem.size = size;
>> +    abo->open_ref = 0;
>> +    abo->internal = false;
>>       INIT_LIST_HEAD(&abo->mem.umap_list);
>>         return abo;
>> @@ -508,13 +513,55 @@ static void amdxdna_imported_obj_free(struct 
>> amdxdna_gem_obj *abo)
>>       kfree(abo);
>>   }
>>   +static inline bool
>> +amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
>> +{
>> +    /* Do not count imported BOs since the buffer is not allocated 
>> by us. */
>> +    if (is_import_bo(abo))
>> +        return true;
>> +
>> +    /* Already counted as part of HEAP BO */
>> +    if (abo->type == AMDXDNA_BO_DEV)
>> +        return true;
>> +
>> +    return false;
>> +}
>> +
>> +static void
>> +amdxdna_gem_add_bo_usage(struct amdxdna_gem_obj *abo)
>> +{
>> +    struct amdxdna_client *client = abo->client;
>> +
>> +    if (amdxdna_gem_skip_bo_usage(abo))
>> +        return;
>> +
>> +    guard(mutex)(&client->mm_lock);
>> +
>> +    client->total_bo_usage += abo->mem.size;
>> +    if (abo->internal)
>> +        client->total_int_bo_usage += abo->mem.size;
>> +}
>> +
>> +static void
>> +amdxdna_gem_del_bo_usage(struct amdxdna_gem_obj *abo)
>> +{
>> +    struct amdxdna_client *client = abo->client;
>> +
>> +    if (amdxdna_gem_skip_bo_usage(abo))
>> +        return;
>> +
>> +    guard(mutex)(&client->mm_lock);
>> +
>> +    client->total_bo_usage -= abo->mem.size;
>> +    if (abo->internal)
>> +        client->total_int_bo_usage -= abo->mem.size;
>> +}
>> +
>>   static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
>>   {
>>       struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
>>       struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
>>   -    XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, 
>> amdxdna_gem_dev_addr(abo));
>> -
>>       amdxdna_hmm_unregister(abo, NULL);
>>       flush_workqueue(xdna->notifier_wq);
>>   @@ -543,9 +590,13 @@ static int amdxdna_gem_obj_open(struct 
>> drm_gem_object *gobj, struct drm_file *fi
>>       int ret;
>>         guard(mutex)(&abo->lock);
>> +    abo->open_ref++;
>>   -    if (!abo->client)
>> +    if (abo->open_ref == 1) {
>> +        /* Attached to the client when first opened by it. */
>>           abo->client = filp->driver_priv;
>> +        amdxdna_gem_add_bo_usage(abo);
>> +    }
>>       if (amdxdna_iova_on(xdna)) {
>>           ret = amdxdna_iommu_map_bo(xdna, abo);
>>           if (ret)
>> @@ -555,6 +606,20 @@ static int amdxdna_gem_obj_open(struct 
>> drm_gem_object *gobj, struct drm_file *fi
>>       return 0;
>>   }
>>   +static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, 
>> struct drm_file *filp)
>> +{
>> +    struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
>> +
>> +    guard(mutex)(&abo->lock);
>> +    abo->open_ref--;
>> +
>> +    if (abo->open_ref == 0) {
>> +        amdxdna_gem_del_bo_usage(abo);
>> +        /* Detach from the client when last closed by it. */
>> +        abo->client = NULL;
>> +    }
>> +}
>> +
>>   static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, 
>> struct iosys_map *map)
>>   {
>>       struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
>> @@ -575,6 +640,7 @@ static const struct drm_gem_object_funcs 
>> amdxdna_gem_dev_obj_funcs = {
>>   static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
>>       .free = amdxdna_gem_obj_free,
>>       .open = amdxdna_gem_obj_open,
>> +    .close = amdxdna_gem_obj_close,
>>       .print_info = drm_gem_shmem_object_print_info,
>>       .pin = drm_gem_shmem_object_pin,
>>       .unpin = drm_gem_shmem_object_unpin,
>> @@ -708,10 +774,13 @@ amdxdna_drm_create_share_bo(struct drm_device 
>> *dev,
>>       if (IS_ERR(abo))
>>           return ERR_CAST(abo);
>>   -    if (args->type == AMDXDNA_BO_DEV_HEAP)
>> +    if (args->type == AMDXDNA_BO_DEV_HEAP) {
>>           abo->type = AMDXDNA_BO_DEV_HEAP;
>> -    else
>> +        abo->internal = true;
>> +    } else {
>>           abo->type = AMDXDNA_BO_SHARE;
>> +        abo->internal = args->type == AMDXDNA_BO_CMD;
>> +    }
>>         return abo;
>>   }
>> @@ -783,6 +852,11 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev,
>>       gobj = to_gobj(abo);
>>       gobj->funcs = &amdxdna_gem_dev_obj_funcs;
>>       abo->type = AMDXDNA_BO_DEV;
>> +    abo->internal = true;
>> +    /*
>> +     * DEV BOs cannot be alive when client is gone, it's OK to
>> +     * always establish the connection.
>> +     */
>>       abo->client = client;
>>         ret = amdxdna_gem_heap_alloc(abo);
>> @@ -826,7 +900,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device 
>> *dev, void *data, struct drm_f
>>       if (IS_ERR(abo))
>>           return PTR_ERR(abo);
>>   -    /* ready to publish object to userspace */
>> +    /* Ready to publish object to userspace and count for BO usage. */
>>       ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
>>       if (ret) {
>>           XDNA_ERR(xdna, "Create handle failed");
>> @@ -986,3 +1060,43 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device 
>> *dev,
>>       drm_gem_object_put(gobj);
>>       return ret;
>>   }
>> +
>> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct 
>> amdxdna_drm_get_array *args)
>> +{
>> +    size_t min_sz = min(args->element_size, sizeof(struct 
>> amdxdna_drm_bo_usage));
>> +    char __user *buf = u64_to_user_ptr(args->buffer);
>> +    struct amdxdna_dev *xdna = to_xdna_dev(dev);
>> +    struct amdxdna_client *tmp_client;
>> +    struct amdxdna_drm_bo_usage tmp;
>> +
>> +    drm_WARN_ON(dev, !mutex_is_locked(&xdna->dev_lock));
>> +
>> +    if (args->num_element != 1)
>> +        return -EINVAL;
>> +
>> +    if (copy_from_user(&tmp, buf, min_sz))
>> +        return -EFAULT;
>> +
>> +    if (!tmp.pid)
>> +        return -EINVAL;
>> +
>> +    tmp.total_usage = 0;
>> +    tmp.internal_usage = 0;
>> +    tmp.heap_usage = 0;
>> +
>> +    list_for_each_entry(tmp_client, &xdna->client_list, node) {
>> +        if (tmp.pid != tmp_client->pid)
>> +            continue;
>> +
>> +        mutex_lock(&tmp_client->mm_lock);
>> +        tmp.total_usage += tmp_client->total_bo_usage;
>> +        tmp.internal_usage += tmp_client->total_int_bo_usage;
>> +        tmp.heap_usage += tmp_client->heap_usage;
>> +        mutex_unlock(&tmp_client->mm_lock);
>> +    }
>> +
>> +    if (copy_to_user(buf, &tmp, min_sz))
>> +        return -EFAULT;
>> +
>> +    return 0;
>> +}
>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.h 
>> b/drivers/accel/amdxdna/amdxdna_gem.h
>> index a77d9344f8a4..4fc48a1189d2 100644
>> --- a/drivers/accel/amdxdna/amdxdna_gem.h
>> +++ b/drivers/accel/amdxdna/amdxdna_gem.h
>> @@ -41,8 +41,9 @@ struct amdxdna_gem_obj {
>>       struct amdxdna_client        *client;
>>       u8                type;
>>       bool                pinned;
>> -    struct mutex            lock; /* Protects: pinned, mem.kva */
>> +    struct mutex            lock; /* Protects: pinned, mem.kva, 
>> open_ref */
>>       struct amdxdna_mem        mem;
>> +    int                open_ref;
>>         /* Below members are initialized when needed */
>>       struct drm_mm            mm; /* For AMDXDNA_BO_DEV_HEAP */
>> @@ -50,6 +51,9 @@ struct amdxdna_gem_obj {
>>       u32                assigned_hwctx;
>>       struct dma_buf            *dma_buf;
>>       struct dma_buf_attachment    *attach;
>> +
>> +    /* True, if BO is managed by XRT, not application */
>> +    bool                internal;
>>   };
>>     #define to_gobj(obj)    (&(obj)->base.base)
>> @@ -98,5 +102,6 @@ void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
>>   int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, 
>> struct drm_file *filp);
>>   int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void 
>> *data, struct drm_file *filp);
>>   int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, 
>> struct drm_file *filp);
>> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct 
>> amdxdna_drm_get_array *args);
>>     #endif /* _AMDXDNA_GEM_H_ */
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c 
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> index d83be00daf2b..b50a7d1f8a11 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> @@ -36,9 +36,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
>>    * 0.5: Support getting telemetry data
>>    * 0.6: Support preemption
>>    * 0.7: Support getting power and utilization data
>> + * 0.8: Support BO usage query
>>    */
>>   #define AMDXDNA_DRIVER_MAJOR        0
>> -#define AMDXDNA_DRIVER_MINOR        7
>> +#define AMDXDNA_DRIVER_MINOR        8
>>     /*
>>    * Bind the driver base on (vendor_id, device_id) pair and later 
>> use the
>> @@ -120,11 +121,12 @@ static void amdxdna_client_cleanup(struct 
>> amdxdna_client *client)
>>       amdxdna_hwctx_remove_all(client);
>>       xa_destroy(&client->hwctx_xa);
>>       cleanup_srcu_struct(&client->hwctx_srcu);
>> -    mutex_destroy(&client->mm_lock);
>>         if (client->dev_heap)
>>           drm_gem_object_put(to_gobj(client->dev_heap));
>>   +    mutex_destroy(&client->mm_lock);
>> +
>>       if (!IS_ERR_OR_NULL(client->sva))
>>           iommu_sva_unbind_device(client->sva);
>>       mmdrop(client->mm);
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h 
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> index e91d14ae5190..0661749917d6 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> @@ -138,6 +138,10 @@ struct amdxdna_client {
>>       struct iommu_sva        *sva;
>>       int                pasid;
>>       struct mm_struct        *mm;
>> +
>> +    size_t                heap_usage;
>> +    size_t                total_bo_usage;
>> +    size_t                total_int_bo_usage;
>>   };
>>     #define amdxdna_for_each_hwctx(client, hwctx_id, entry)        \
>> diff --git a/include/uapi/drm/amdxdna_accel.h 
>> b/include/uapi/drm/amdxdna_accel.h
>> index bddaaaf945cf..61d3686fa3b1 100644
>> --- a/include/uapi/drm/amdxdna_accel.h
>> +++ b/include/uapi/drm/amdxdna_accel.h
>> @@ -591,8 +591,37 @@ struct amdxdna_async_error {
>>       __u64 ex_err_code;
>>   };
>>   +/**
>> + * struct amdxdna_drm_bo_usage - all types of BO usage
>> + * BOs managed by XRT/SHIM/driver is counted as internal.
>> + * Others are counted as external which are managed by applications.
>> + *
>> + * Among all types of BOs:
>> + *   AMDXDNA_BO_DEV_HEAP - is counted for internal.
>> + *   AMDXDNA_BO_SHARE    - is counted for external.
>> + *   AMDXDNA_BO_CMD      - is counted for internal.
>> + *   AMDXDNA_BO_DEV      - is counted by heap_usage only, not internal
>> + *                         or external. It does not add to the total 
>> memory
>> + *                         footprint since its mem comes from heap 
>> which is
>> + *                         already counted as internal.
>> + */
>> +struct amdxdna_drm_bo_usage {
>> +    /** @pid: The ID of the process to query from. */
>> +    __s64 pid;
>> +    /** @total_usage: Total BO size used by process. */
>> +    __u64 total_usage;
>> +    /** @internal_usage: Total internal BO size used by process. */
>> +    __u64 internal_usage;
>> +    /** @heap_usage: Total device BO size used by process. */
>> +    __u64 heap_usage;
>> +};
>> +
>> +/*
>> + * Supported params in struct amdxdna_drm_get_array
>> + */
>>   #define DRM_AMDXDNA_HW_CONTEXT_ALL    0
>>   #define DRM_AMDXDNA_HW_LAST_ASYNC_ERR    2
>> +#define DRM_AMDXDNA_BO_USAGE        6
>>     /**
>>    * struct amdxdna_drm_get_array - Get information array.
>> @@ -605,6 +634,12 @@ struct amdxdna_drm_get_array {
>>        *
>>        * %DRM_AMDXDNA_HW_CONTEXT_ALL:
>>        * Returns all created hardware contexts.
>> +     *
>> +     * %DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
>> +     * Returns last async error.
>> +     *
>> +     * %DRM_AMDXDNA_BO_USAGE:
>> +     * Returns usage of heap/internal/external BOs.
>>        */
>>       __u32 param;
>>       /**
>
Re: [PATCH V1] accel/amdxdna: Add per-process BO memory usage query support
Posted by Ian Rogers 3 days, 6 hours ago
On Wed, Mar 25, 2026 at 11:47 AM Lizhi Hou <lizhi.hou@amd.com> wrote:
>
> Applied to drm-misc-next.
>
> On 3/24/26 10:01, Mario Limonciello wrote:
> >
> >
> > On 3/24/26 11:31, Lizhi Hou wrote:
> >> From: Max Zhen <max.zhen@amd.com>
> >>
> >> Add support for querying per-process buffer object (BO) memory
> >> usage through the amdxdna GET_ARRAY UAPI.
> >>
> >> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
> >> struct amdxdna_drm_bo_usage to report BO memory usage statistics,
> >> including heap, total, and internal usage.
> >>
> >> Track BO memory usage on a per-client basis by maintaining counters
> >> in GEM open/close and heap allocation/free paths. This ensures the
> >> reported statistics reflect the current memory footprint of each
> >> process.
> >>
> >> Wire the new query into the GET_ARRAY implementation to expose
> >> the usage information to userspace.
> >>
> >> Signed-off-by: Max Zhen <max.zhen@amd.com>
> >> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
> >> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> > I'm assuming you also have userspace side ready for this too right?
> > If you have a link handy can you please include it when committing.

Sorry for a naive question, would support in procfs be possible?
There's support in perf for displaying DRM usage stat data from there
[1], for example:
```
$ perf list drm-

List of pre-defined events (to be used in -e or -M):


drm:
 drm-active-stolen-system0
      [Total memory active in one or more engines. Unit: drm_i915]
 drm-active-system0
      [Total memory active in one or more engines. Unit: drm_i915]
 drm-engine-capacity-video
      [Engine capacity. Unit: drm_i915]
 drm-engine-copy
      [Utilization in ns. Unit: drm_i915]
 drm-engine-render
      [Utilization in ns. Unit: drm_i915]
 drm-engine-video
      [Utilization in ns. Unit: drm_i915]
 drm-engine-video-enhance
      [Utilization in ns. Unit: drm_i915]
 drm-purgeable-stolen-system0
      [Size of resident and purgeable memory buffers. Unit: drm_i915]
 drm-purgeable-system0
      [Size of resident and purgeable memory buffers. Unit: drm_i915]
 drm-resident-stolen-system0
      [Size of resident memory buffers. Unit: drm_i915]
 drm-resident-system0
      [Size of resident memory buffers. Unit: drm_i915]
 drm-shared-stolen-system0
      [Size of shared memory buffers. Unit: drm_i915]
 drm-shared-system0
      [Size of shared memory buffers. Unit: drm_i915]
 drm-total-stolen-system0
      [Size of shared and private memory. Unit: drm_i915]
 drm-total-system0
      [Size of shared and private memory. Unit: drm_i915]


$ perf stat -e drm-engine-render -a sleep 1

Performance counter stats for 'system wide':

  557,542,732,344 ns   drm-engine-render

      1.001575975 seconds time elapsed
```

Thanks,
Ian

[1] https://lore.kernel.org/lkml/20250403202439.57791-1-irogers@google.com/
Re: [PATCH V1] accel/amdxdna: Add per-process BO memory usage query support
Posted by Lizhi Hou 2 days, 16 hours ago
On 3/30/26 19:30, Ian Rogers wrote:
> On Wed, Mar 25, 2026 at 11:47 AM Lizhi Hou <lizhi.hou@amd.com> wrote:
>> Applied to drm-misc-next.
>>
>> On 3/24/26 10:01, Mario Limonciello wrote:
>>>
>>> On 3/24/26 11:31, Lizhi Hou wrote:
>>>> From: Max Zhen <max.zhen@amd.com>
>>>>
>>>> Add support for querying per-process buffer object (BO) memory
>>>> usage through the amdxdna GET_ARRAY UAPI.
>>>>
>>>> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
>>>> struct amdxdna_drm_bo_usage to report BO memory usage statistics,
>>>> including heap, total, and internal usage.
>>>>
>>>> Track BO memory usage on a per-client basis by maintaining counters
>>>> in GEM open/close and heap allocation/free paths. This ensures the
>>>> reported statistics reflect the current memory footprint of each
>>>> process.
>>>>
>>>> Wire the new query into the GET_ARRAY implementation to expose
>>>> the usage information to userspace.
>>>>
>>>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>>>> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
>>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>> I'm assuming you also have userspace side ready for this too right?
>>> If you have a link handy can you please include it when committing.
> Sorry for a naive question, would support in procfs be possible?

Do you mean fdinfo? And yes, the fdinfo is in our upstream stack. I will 
post the patch later.

Lizhi

> There's support in perf for displaying DRM usage stat data from there
> [1], for example:
> ```
> $ perf list drm-
>
> List of pre-defined events (to be used in -e or -M):
>
>
> drm:
>   drm-active-stolen-system0
>        [Total memory active in one or more engines. Unit: drm_i915]
>   drm-active-system0
>        [Total memory active in one or more engines. Unit: drm_i915]
>   drm-engine-capacity-video
>        [Engine capacity. Unit: drm_i915]
>   drm-engine-copy
>        [Utilization in ns. Unit: drm_i915]
>   drm-engine-render
>        [Utilization in ns. Unit: drm_i915]
>   drm-engine-video
>        [Utilization in ns. Unit: drm_i915]
>   drm-engine-video-enhance
>        [Utilization in ns. Unit: drm_i915]
>   drm-purgeable-stolen-system0
>        [Size of resident and purgeable memory buffers. Unit: drm_i915]
>   drm-purgeable-system0
>        [Size of resident and purgeable memory buffers. Unit: drm_i915]
>   drm-resident-stolen-system0
>        [Size of resident memory buffers. Unit: drm_i915]
>   drm-resident-system0
>        [Size of resident memory buffers. Unit: drm_i915]
>   drm-shared-stolen-system0
>        [Size of shared memory buffers. Unit: drm_i915]
>   drm-shared-system0
>        [Size of shared memory buffers. Unit: drm_i915]
>   drm-total-stolen-system0
>        [Size of shared and private memory. Unit: drm_i915]
>   drm-total-system0
>        [Size of shared and private memory. Unit: drm_i915]
>
>
> $ perf stat -e drm-engine-render -a sleep 1
>
> Performance counter stats for 'system wide':
>
>    557,542,732,344 ns   drm-engine-render
>
>        1.001575975 seconds time elapsed
> ```
>
> Thanks,
> Ian
>
> [1] https://lore.kernel.org/lkml/20250403202439.57791-1-irogers@google.com/
Re: [PATCH V1] accel/amdxdna: Add per-process BO memory usage query support
Posted by Ian Rogers 2 days, 16 hours ago
On Tue, Mar 31, 2026 at 8:53 AM Lizhi Hou <lizhi.hou@amd.com> wrote:
>
>
> On 3/30/26 19:30, Ian Rogers wrote:
> > On Wed, Mar 25, 2026 at 11:47 AM Lizhi Hou <lizhi.hou@amd.com> wrote:
> >> Applied to drm-misc-next.
> >>
> >> On 3/24/26 10:01, Mario Limonciello wrote:
> >>>
> >>> On 3/24/26 11:31, Lizhi Hou wrote:
> >>>> From: Max Zhen <max.zhen@amd.com>
> >>>>
> >>>> Add support for querying per-process buffer object (BO) memory
> >>>> usage through the amdxdna GET_ARRAY UAPI.
> >>>>
> >>>> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
> >>>> struct amdxdna_drm_bo_usage to report BO memory usage statistics,
> >>>> including heap, total, and internal usage.
> >>>>
> >>>> Track BO memory usage on a per-client basis by maintaining counters
> >>>> in GEM open/close and heap allocation/free paths. This ensures the
> >>>> reported statistics reflect the current memory footprint of each
> >>>> process.
> >>>>
> >>>> Wire the new query into the GET_ARRAY implementation to expose
> >>>> the usage information to userspace.
> >>>>
> >>>> Signed-off-by: Max Zhen <max.zhen@amd.com>
> >>>> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
> >>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> >>> I'm assuming you also have userspace side ready for this too right?
> >>> If you have a link handy can you please include it when committing.
> > Sorry for a naive question, would support in procfs be possible?
>
> Do you mean fdinfo? And yes, the fdinfo is in our upstream stack. I will
> post the patch later.

That's great! I did mean fdinfo. I'd be interested to see the patch,
and possibly the strings matched in perf need updating:
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/drm_pmu.c?h=perf-tools-next#n183

Thanks,
Ian

> Lizhi
>
> > There's support in perf for displaying DRM usage stat data from there
> > [1], for example:
> > ```
> > $ perf list drm-
> >
> > List of pre-defined events (to be used in -e or -M):
> >
> >
> > drm:
> >   drm-active-stolen-system0
> >        [Total memory active in one or more engines. Unit: drm_i915]
> >   drm-active-system0
> >        [Total memory active in one or more engines. Unit: drm_i915]
> >   drm-engine-capacity-video
> >        [Engine capacity. Unit: drm_i915]
> >   drm-engine-copy
> >        [Utilization in ns. Unit: drm_i915]
> >   drm-engine-render
> >        [Utilization in ns. Unit: drm_i915]
> >   drm-engine-video
> >        [Utilization in ns. Unit: drm_i915]
> >   drm-engine-video-enhance
> >        [Utilization in ns. Unit: drm_i915]
> >   drm-purgeable-stolen-system0
> >        [Size of resident and purgeable memory buffers. Unit: drm_i915]
> >   drm-purgeable-system0
> >        [Size of resident and purgeable memory buffers. Unit: drm_i915]
> >   drm-resident-stolen-system0
> >        [Size of resident memory buffers. Unit: drm_i915]
> >   drm-resident-system0
> >        [Size of resident memory buffers. Unit: drm_i915]
> >   drm-shared-stolen-system0
> >        [Size of shared memory buffers. Unit: drm_i915]
> >   drm-shared-system0
> >        [Size of shared memory buffers. Unit: drm_i915]
> >   drm-total-stolen-system0
> >        [Size of shared and private memory. Unit: drm_i915]
> >   drm-total-system0
> >        [Size of shared and private memory. Unit: drm_i915]
> >
> >
> > $ perf stat -e drm-engine-render -a sleep 1
> >
> > Performance counter stats for 'system wide':
> >
> >    557,542,732,344 ns   drm-engine-render
> >
> >        1.001575975 seconds time elapsed
> > ```
> >
> > Thanks,
> > Ian
> >
> > [1] https://lore.kernel.org/lkml/20250403202439.57791-1-irogers@google.com/
Re: [PATCH V1] accel/amdxdna: Add per-process BO memory usage query support
Posted by Lizhi Hou 1 week, 2 days ago
On 3/24/26 10:01, Mario Limonciello wrote:
>
>
> On 3/24/26 11:31, Lizhi Hou wrote:
>> From: Max Zhen <max.zhen@amd.com>
>>
>> Add support for querying per-process buffer object (BO) memory
>> usage through the amdxdna GET_ARRAY UAPI.
>>
>> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
>> struct amdxdna_drm_bo_usage to report BO memory usage statistics,
>> including heap, total, and internal usage.
>>
>> Track BO memory usage on a per-client basis by maintaining counters
>> in GEM open/close and heap allocation/free paths. This ensures the
>> reported statistics reflect the current memory footprint of each
>> process.
>>
>> Wire the new query into the GET_ARRAY implementation to expose
>> the usage information to userspace.
>>
>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> I'm assuming you also have userspace side ready for this too right?

Our test case is ready at: 
https://github.com/amd/xdna-driver/blob/main/test/shim_test/shim_test.cpp#L312

And the tool usage is ongoing .

I will put the test code link in commit.


Lizhi

> If you have a link handy can you please include it when committing.
>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>
>> ---
>>   drivers/accel/amdxdna/aie2_pci.c        |   4 +
>>   drivers/accel/amdxdna/amdxdna_gem.c     | 134 ++++++++++++++++++++++--
>>   drivers/accel/amdxdna/amdxdna_gem.h     |   7 +-
>>   drivers/accel/amdxdna/amdxdna_pci_drv.c |   6 +-
>>   drivers/accel/amdxdna/amdxdna_pci_drv.h |   4 +
>>   include/uapi/drm/amdxdna_accel.h        |  35 +++++++
>>   6 files changed, 177 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index 9e39bfe75971..f1ac4e00bd9f 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -865,6 +865,7 @@ static int aie2_hwctx_status_cb(struct 
>> amdxdna_hwctx *hwctx, void *arg)
>>       tmp->command_submissions = hwctx->priv->seq;
>>       tmp->command_completions = hwctx->priv->completed;
>>       tmp->pasid = hwctx->client->pasid;
>> +    tmp->heap_usage = hwctx->client->heap_usage;
>>       tmp->priority = hwctx->qos.priority;
>>       tmp->gops = hwctx->qos.gops;
>>       tmp->fps = hwctx->qos.fps;
>> @@ -1148,6 +1149,9 @@ static int aie2_get_array(struct amdxdna_client 
>> *client,
>>       case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
>>           ret = aie2_get_array_async_error(xdna->dev_handle, args);
>>           break;
>> +    case DRM_AMDXDNA_BO_USAGE:
>> +        ret = amdxdna_drm_get_bo_usage(&xdna->ddev, args);
>> +        break;
>>       default:
>>           XDNA_ERR(xdna, "Not supported request parameter %u", 
>> args->param);
>>           ret = -EOPNOTSUPP;
>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c 
>> b/drivers/accel/amdxdna/amdxdna_gem.c
>> index 27712704e42d..238ee244d4a6 100644
>> --- a/drivers/accel/amdxdna/amdxdna_gem.c
>> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
>> @@ -63,6 +63,8 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
>>           goto unlock_out;
>>       }
>>   +    client->heap_usage += mem->size;
>> +
>>       drm_gem_object_get(to_gobj(heap));
>>     unlock_out:
>> @@ -74,16 +76,17 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
>>   static void
>>   amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
>>   {
>> +    struct amdxdna_client *client = abo->client;
>>       struct amdxdna_gem_obj *heap;
>>   -    mutex_lock(&abo->client->mm_lock);
>> +    mutex_lock(&client->mm_lock);
>>         drm_mm_remove_node(&abo->mm_node);
>> -
>> -    heap = abo->client->dev_heap;
>> +    client->heap_usage -= abo->mem.size;
>> +    heap = client->dev_heap;
>>       drm_gem_object_put(to_gobj(heap));
>>   -    mutex_unlock(&abo->client->mm_lock);
>> +    mutex_unlock(&client->mm_lock);
>>   }
>>     static struct amdxdna_gem_obj *
>> @@ -102,6 +105,8 @@ amdxdna_gem_create_obj(struct drm_device *dev, 
>> size_t size)
>>       abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
>>       abo->mem.uva = AMDXDNA_INVALID_ADDR;
>>       abo->mem.size = size;
>> +    abo->open_ref = 0;
>> +    abo->internal = false;
>>       INIT_LIST_HEAD(&abo->mem.umap_list);
>>         return abo;
>> @@ -508,13 +513,55 @@ static void amdxdna_imported_obj_free(struct 
>> amdxdna_gem_obj *abo)
>>       kfree(abo);
>>   }
>>   +static inline bool
>> +amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
>> +{
>> +    /* Do not count imported BOs since the buffer is not allocated 
>> by us. */
>> +    if (is_import_bo(abo))
>> +        return true;
>> +
>> +    /* Already counted as part of HEAP BO */
>> +    if (abo->type == AMDXDNA_BO_DEV)
>> +        return true;
>> +
>> +    return false;
>> +}
>> +
>> +static void
>> +amdxdna_gem_add_bo_usage(struct amdxdna_gem_obj *abo)
>> +{
>> +    struct amdxdna_client *client = abo->client;
>> +
>> +    if (amdxdna_gem_skip_bo_usage(abo))
>> +        return;
>> +
>> +    guard(mutex)(&client->mm_lock);
>> +
>> +    client->total_bo_usage += abo->mem.size;
>> +    if (abo->internal)
>> +        client->total_int_bo_usage += abo->mem.size;
>> +}
>> +
>> +static void
>> +amdxdna_gem_del_bo_usage(struct amdxdna_gem_obj *abo)
>> +{
>> +    struct amdxdna_client *client = abo->client;
>> +
>> +    if (amdxdna_gem_skip_bo_usage(abo))
>> +        return;
>> +
>> +    guard(mutex)(&client->mm_lock);
>> +
>> +    client->total_bo_usage -= abo->mem.size;
>> +    if (abo->internal)
>> +        client->total_int_bo_usage -= abo->mem.size;
>> +}
>> +
>>   static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
>>   {
>>       struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
>>       struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
>>   -    XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, 
>> amdxdna_gem_dev_addr(abo));
>> -
>>       amdxdna_hmm_unregister(abo, NULL);
>>       flush_workqueue(xdna->notifier_wq);
>>   @@ -543,9 +590,13 @@ static int amdxdna_gem_obj_open(struct 
>> drm_gem_object *gobj, struct drm_file *fi
>>       int ret;
>>         guard(mutex)(&abo->lock);
>> +    abo->open_ref++;
>>   -    if (!abo->client)
>> +    if (abo->open_ref == 1) {
>> +        /* Attached to the client when first opened by it. */
>>           abo->client = filp->driver_priv;
>> +        amdxdna_gem_add_bo_usage(abo);
>> +    }
>>       if (amdxdna_iova_on(xdna)) {
>>           ret = amdxdna_iommu_map_bo(xdna, abo);
>>           if (ret)
>> @@ -555,6 +606,20 @@ static int amdxdna_gem_obj_open(struct 
>> drm_gem_object *gobj, struct drm_file *fi
>>       return 0;
>>   }
>>   +static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, 
>> struct drm_file *filp)
>> +{
>> +    struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
>> +
>> +    guard(mutex)(&abo->lock);
>> +    abo->open_ref--;
>> +
>> +    if (abo->open_ref == 0) {
>> +        amdxdna_gem_del_bo_usage(abo);
>> +        /* Detach from the client when last closed by it. */
>> +        abo->client = NULL;
>> +    }
>> +}
>> +
>>   static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, 
>> struct iosys_map *map)
>>   {
>>       struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
>> @@ -575,6 +640,7 @@ static const struct drm_gem_object_funcs 
>> amdxdna_gem_dev_obj_funcs = {
>>   static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
>>       .free = amdxdna_gem_obj_free,
>>       .open = amdxdna_gem_obj_open,
>> +    .close = amdxdna_gem_obj_close,
>>       .print_info = drm_gem_shmem_object_print_info,
>>       .pin = drm_gem_shmem_object_pin,
>>       .unpin = drm_gem_shmem_object_unpin,
>> @@ -708,10 +774,13 @@ amdxdna_drm_create_share_bo(struct drm_device 
>> *dev,
>>       if (IS_ERR(abo))
>>           return ERR_CAST(abo);
>>   -    if (args->type == AMDXDNA_BO_DEV_HEAP)
>> +    if (args->type == AMDXDNA_BO_DEV_HEAP) {
>>           abo->type = AMDXDNA_BO_DEV_HEAP;
>> -    else
>> +        abo->internal = true;
>> +    } else {
>>           abo->type = AMDXDNA_BO_SHARE;
>> +        abo->internal = args->type == AMDXDNA_BO_CMD;
>> +    }
>>         return abo;
>>   }
>> @@ -783,6 +852,11 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev,
>>       gobj = to_gobj(abo);
>>       gobj->funcs = &amdxdna_gem_dev_obj_funcs;
>>       abo->type = AMDXDNA_BO_DEV;
>> +    abo->internal = true;
>> +    /*
>> +     * DEV BOs cannot be alive when client is gone, it's OK to
>> +     * always establish the connection.
>> +     */
>>       abo->client = client;
>>         ret = amdxdna_gem_heap_alloc(abo);
>> @@ -826,7 +900,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device 
>> *dev, void *data, struct drm_f
>>       if (IS_ERR(abo))
>>           return PTR_ERR(abo);
>>   -    /* ready to publish object to userspace */
>> +    /* Ready to publish object to userspace and count for BO usage. */
>>       ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
>>       if (ret) {
>>           XDNA_ERR(xdna, "Create handle failed");
>> @@ -986,3 +1060,43 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device 
>> *dev,
>>       drm_gem_object_put(gobj);
>>       return ret;
>>   }
>> +
>> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct 
>> amdxdna_drm_get_array *args)
>> +{
>> +    size_t min_sz = min(args->element_size, sizeof(struct 
>> amdxdna_drm_bo_usage));
>> +    char __user *buf = u64_to_user_ptr(args->buffer);
>> +    struct amdxdna_dev *xdna = to_xdna_dev(dev);
>> +    struct amdxdna_client *tmp_client;
>> +    struct amdxdna_drm_bo_usage tmp;
>> +
>> +    drm_WARN_ON(dev, !mutex_is_locked(&xdna->dev_lock));
>> +
>> +    if (args->num_element != 1)
>> +        return -EINVAL;
>> +
>> +    if (copy_from_user(&tmp, buf, min_sz))
>> +        return -EFAULT;
>> +
>> +    if (!tmp.pid)
>> +        return -EINVAL;
>> +
>> +    tmp.total_usage = 0;
>> +    tmp.internal_usage = 0;
>> +    tmp.heap_usage = 0;
>> +
>> +    list_for_each_entry(tmp_client, &xdna->client_list, node) {
>> +        if (tmp.pid != tmp_client->pid)
>> +            continue;
>> +
>> +        mutex_lock(&tmp_client->mm_lock);
>> +        tmp.total_usage += tmp_client->total_bo_usage;
>> +        tmp.internal_usage += tmp_client->total_int_bo_usage;
>> +        tmp.heap_usage += tmp_client->heap_usage;
>> +        mutex_unlock(&tmp_client->mm_lock);
>> +    }
>> +
>> +    if (copy_to_user(buf, &tmp, min_sz))
>> +        return -EFAULT;
>> +
>> +    return 0;
>> +}
>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.h 
>> b/drivers/accel/amdxdna/amdxdna_gem.h
>> index a77d9344f8a4..4fc48a1189d2 100644
>> --- a/drivers/accel/amdxdna/amdxdna_gem.h
>> +++ b/drivers/accel/amdxdna/amdxdna_gem.h
>> @@ -41,8 +41,9 @@ struct amdxdna_gem_obj {
>>       struct amdxdna_client        *client;
>>       u8                type;
>>       bool                pinned;
>> -    struct mutex            lock; /* Protects: pinned, mem.kva */
>> +    struct mutex            lock; /* Protects: pinned, mem.kva, 
>> open_ref */
>>       struct amdxdna_mem        mem;
>> +    int                open_ref;
>>         /* Below members are initialized when needed */
>>       struct drm_mm            mm; /* For AMDXDNA_BO_DEV_HEAP */
>> @@ -50,6 +51,9 @@ struct amdxdna_gem_obj {
>>       u32                assigned_hwctx;
>>       struct dma_buf            *dma_buf;
>>       struct dma_buf_attachment    *attach;
>> +
>> +    /* True, if BO is managed by XRT, not application */
>> +    bool                internal;
>>   };
>>     #define to_gobj(obj)    (&(obj)->base.base)
>> @@ -98,5 +102,6 @@ void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
>>   int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, 
>> struct drm_file *filp);
>>   int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void 
>> *data, struct drm_file *filp);
>>   int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, 
>> struct drm_file *filp);
>> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct 
>> amdxdna_drm_get_array *args);
>>     #endif /* _AMDXDNA_GEM_H_ */
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c 
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> index d83be00daf2b..b50a7d1f8a11 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> @@ -36,9 +36,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
>>    * 0.5: Support getting telemetry data
>>    * 0.6: Support preemption
>>    * 0.7: Support getting power and utilization data
>> + * 0.8: Support BO usage query
>>    */
>>   #define AMDXDNA_DRIVER_MAJOR        0
>> -#define AMDXDNA_DRIVER_MINOR        7
>> +#define AMDXDNA_DRIVER_MINOR        8
>>     /*
>>    * Bind the driver base on (vendor_id, device_id) pair and later 
>> use the
>> @@ -120,11 +121,12 @@ static void amdxdna_client_cleanup(struct 
>> amdxdna_client *client)
>>       amdxdna_hwctx_remove_all(client);
>>       xa_destroy(&client->hwctx_xa);
>>       cleanup_srcu_struct(&client->hwctx_srcu);
>> -    mutex_destroy(&client->mm_lock);
>>         if (client->dev_heap)
>>           drm_gem_object_put(to_gobj(client->dev_heap));
>>   +    mutex_destroy(&client->mm_lock);
>> +
>>       if (!IS_ERR_OR_NULL(client->sva))
>>           iommu_sva_unbind_device(client->sva);
>>       mmdrop(client->mm);
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h 
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> index e91d14ae5190..0661749917d6 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> @@ -138,6 +138,10 @@ struct amdxdna_client {
>>       struct iommu_sva        *sva;
>>       int                pasid;
>>       struct mm_struct        *mm;
>> +
>> +    size_t                heap_usage;
>> +    size_t                total_bo_usage;
>> +    size_t                total_int_bo_usage;
>>   };
>>     #define amdxdna_for_each_hwctx(client, hwctx_id, entry)        \
>> diff --git a/include/uapi/drm/amdxdna_accel.h 
>> b/include/uapi/drm/amdxdna_accel.h
>> index bddaaaf945cf..61d3686fa3b1 100644
>> --- a/include/uapi/drm/amdxdna_accel.h
>> +++ b/include/uapi/drm/amdxdna_accel.h
>> @@ -591,8 +591,37 @@ struct amdxdna_async_error {
>>       __u64 ex_err_code;
>>   };
>>   +/**
>> + * struct amdxdna_drm_bo_usage - all types of BO usage
>> + * BOs managed by XRT/SHIM/driver is counted as internal.
>> + * Others are counted as external which are managed by applications.
>> + *
>> + * Among all types of BOs:
>> + *   AMDXDNA_BO_DEV_HEAP - is counted for internal.
>> + *   AMDXDNA_BO_SHARE    - is counted for external.
>> + *   AMDXDNA_BO_CMD      - is counted for internal.
>> + *   AMDXDNA_BO_DEV      - is counted by heap_usage only, not internal
>> + *                         or external. It does not add to the total 
>> memory
>> + *                         footprint since its mem comes from heap 
>> which is
>> + *                         already counted as internal.
>> + */
>> +struct amdxdna_drm_bo_usage {
>> +    /** @pid: The ID of the process to query from. */
>> +    __s64 pid;
>> +    /** @total_usage: Total BO size used by process. */
>> +    __u64 total_usage;
>> +    /** @internal_usage: Total internal BO size used by process. */
>> +    __u64 internal_usage;
>> +    /** @heap_usage: Total device BO size used by process. */
>> +    __u64 heap_usage;
>> +};
>> +
>> +/*
>> + * Supported params in struct amdxdna_drm_get_array
>> + */
>>   #define DRM_AMDXDNA_HW_CONTEXT_ALL    0
>>   #define DRM_AMDXDNA_HW_LAST_ASYNC_ERR    2
>> +#define DRM_AMDXDNA_BO_USAGE        6
>>     /**
>>    * struct amdxdna_drm_get_array - Get information array.
>> @@ -605,6 +634,12 @@ struct amdxdna_drm_get_array {
>>        *
>>        * %DRM_AMDXDNA_HW_CONTEXT_ALL:
>>        * Returns all created hardware contexts.
>> +     *
>> +     * %DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
>> +     * Returns last async error.
>> +     *
>> +     * %DRM_AMDXDNA_BO_USAGE:
>> +     * Returns usage of heap/internal/external BOs.
>>        */
>>       __u32 param;
>>       /**
>