drivers/accel/amdxdna/aie2_pci.c | 4 + drivers/accel/amdxdna/amdxdna_gem.c | 134 ++++++++++++++++++++++-- drivers/accel/amdxdna/amdxdna_gem.h | 7 +- drivers/accel/amdxdna/amdxdna_pci_drv.c | 6 +- drivers/accel/amdxdna/amdxdna_pci_drv.h | 4 + include/uapi/drm/amdxdna_accel.h | 35 +++++++ 6 files changed, 177 insertions(+), 13 deletions(-)
From: Max Zhen <max.zhen@amd.com>
Add support for querying per-process buffer object (BO) memory
usage through the amdxdna GET_ARRAY UAPI.
Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
struct amdxdna_drm_bo_usage to report BO memory usage statistics,
including heap, total, and internal usage.
Track BO memory usage on a per-client basis by maintaining counters
in GEM open/close and heap allocation/free paths. This ensures the
reported statistics reflect the current memory footprint of each
process.
Wire the new query into the GET_ARRAY implementation to expose
the usage information to userspace.
Signed-off-by: Max Zhen <max.zhen@amd.com>
Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/aie2_pci.c | 4 +
drivers/accel/amdxdna/amdxdna_gem.c | 134 ++++++++++++++++++++++--
drivers/accel/amdxdna/amdxdna_gem.h | 7 +-
drivers/accel/amdxdna/amdxdna_pci_drv.c | 6 +-
drivers/accel/amdxdna/amdxdna_pci_drv.h | 4 +
include/uapi/drm/amdxdna_accel.h | 35 +++++++
6 files changed, 177 insertions(+), 13 deletions(-)
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 9e39bfe75971..f1ac4e00bd9f 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -865,6 +865,7 @@ static int aie2_hwctx_status_cb(struct amdxdna_hwctx *hwctx, void *arg)
tmp->command_submissions = hwctx->priv->seq;
tmp->command_completions = hwctx->priv->completed;
tmp->pasid = hwctx->client->pasid;
+ tmp->heap_usage = hwctx->client->heap_usage;
tmp->priority = hwctx->qos.priority;
tmp->gops = hwctx->qos.gops;
tmp->fps = hwctx->qos.fps;
@@ -1148,6 +1149,9 @@ static int aie2_get_array(struct amdxdna_client *client,
case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
ret = aie2_get_array_async_error(xdna->dev_handle, args);
break;
+ case DRM_AMDXDNA_BO_USAGE:
+ ret = amdxdna_drm_get_bo_usage(&xdna->ddev, args);
+ break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index 27712704e42d..238ee244d4a6 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -63,6 +63,8 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
goto unlock_out;
}
+ client->heap_usage += mem->size;
+
drm_gem_object_get(to_gobj(heap));
unlock_out:
@@ -74,16 +76,17 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
static void
amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
{
+ struct amdxdna_client *client = abo->client;
struct amdxdna_gem_obj *heap;
- mutex_lock(&abo->client->mm_lock);
+ mutex_lock(&client->mm_lock);
drm_mm_remove_node(&abo->mm_node);
-
- heap = abo->client->dev_heap;
+ client->heap_usage -= abo->mem.size;
+ heap = client->dev_heap;
drm_gem_object_put(to_gobj(heap));
- mutex_unlock(&abo->client->mm_lock);
+ mutex_unlock(&client->mm_lock);
}
static struct amdxdna_gem_obj *
@@ -102,6 +105,8 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size)
abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
abo->mem.uva = AMDXDNA_INVALID_ADDR;
abo->mem.size = size;
+ abo->open_ref = 0;
+ abo->internal = false;
INIT_LIST_HEAD(&abo->mem.umap_list);
return abo;
@@ -508,13 +513,55 @@ static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
kfree(abo);
}
+static inline bool
+amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
+{
+ /* Do not count imported BOs since the buffer is not allocated by us. */
+ if (is_import_bo(abo))
+ return true;
+
+ /* Already counted as part of HEAP BO */
+ if (abo->type == AMDXDNA_BO_DEV)
+ return true;
+
+ return false;
+}
+
+static void
+amdxdna_gem_add_bo_usage(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_client *client = abo->client;
+
+ if (amdxdna_gem_skip_bo_usage(abo))
+ return;
+
+ guard(mutex)(&client->mm_lock);
+
+ client->total_bo_usage += abo->mem.size;
+ if (abo->internal)
+ client->total_int_bo_usage += abo->mem.size;
+}
+
+static void
+amdxdna_gem_del_bo_usage(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_client *client = abo->client;
+
+ if (amdxdna_gem_skip_bo_usage(abo))
+ return;
+
+ guard(mutex)(&client->mm_lock);
+
+ client->total_bo_usage -= abo->mem.size;
+ if (abo->internal)
+ client->total_int_bo_usage -= abo->mem.size;
+}
+
static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
{
struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
- XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, amdxdna_gem_dev_addr(abo));
-
amdxdna_hmm_unregister(abo, NULL);
flush_workqueue(xdna->notifier_wq);
@@ -543,9 +590,13 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
int ret;
guard(mutex)(&abo->lock);
+ abo->open_ref++;
- if (!abo->client)
+ if (abo->open_ref == 1) {
+ /* Attached to the client when first opened by it. */
abo->client = filp->driver_priv;
+ amdxdna_gem_add_bo_usage(abo);
+ }
if (amdxdna_iova_on(xdna)) {
ret = amdxdna_iommu_map_bo(xdna, abo);
if (ret)
@@ -555,6 +606,20 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
return 0;
}
+static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file *filp)
+{
+ struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
+
+ guard(mutex)(&abo->lock);
+ abo->open_ref--;
+
+ if (abo->open_ref == 0) {
+ amdxdna_gem_del_bo_usage(abo);
+ /* Detach from the client when last closed by it. */
+ abo->client = NULL;
+ }
+}
+
static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
{
struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
@@ -575,6 +640,7 @@ static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
.free = amdxdna_gem_obj_free,
.open = amdxdna_gem_obj_open,
+ .close = amdxdna_gem_obj_close,
.print_info = drm_gem_shmem_object_print_info,
.pin = drm_gem_shmem_object_pin,
.unpin = drm_gem_shmem_object_unpin,
@@ -708,10 +774,13 @@ amdxdna_drm_create_share_bo(struct drm_device *dev,
if (IS_ERR(abo))
return ERR_CAST(abo);
- if (args->type == AMDXDNA_BO_DEV_HEAP)
+ if (args->type == AMDXDNA_BO_DEV_HEAP) {
abo->type = AMDXDNA_BO_DEV_HEAP;
- else
+ abo->internal = true;
+ } else {
abo->type = AMDXDNA_BO_SHARE;
+ abo->internal = args->type == AMDXDNA_BO_CMD;
+ }
return abo;
}
@@ -783,6 +852,11 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev,
gobj = to_gobj(abo);
gobj->funcs = &amdxdna_gem_dev_obj_funcs;
abo->type = AMDXDNA_BO_DEV;
+ abo->internal = true;
+ /*
+ * DEV BOs cannot be alive when client is gone, it's OK to
+ * always establish the connection.
+ */
abo->client = client;
ret = amdxdna_gem_heap_alloc(abo);
@@ -826,7 +900,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
if (IS_ERR(abo))
return PTR_ERR(abo);
- /* ready to publish object to userspace */
+ /* Ready to publish object to userspace and count for BO usage. */
ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
if (ret) {
XDNA_ERR(xdna, "Create handle failed");
@@ -986,3 +1060,43 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
drm_gem_object_put(gobj);
return ret;
}
+
+int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args)
+{
+ size_t min_sz = min(args->element_size, sizeof(struct amdxdna_drm_bo_usage));
+ char __user *buf = u64_to_user_ptr(args->buffer);
+ struct amdxdna_dev *xdna = to_xdna_dev(dev);
+ struct amdxdna_client *tmp_client;
+ struct amdxdna_drm_bo_usage tmp;
+
+ drm_WARN_ON(dev, !mutex_is_locked(&xdna->dev_lock));
+
+ if (args->num_element != 1)
+ return -EINVAL;
+
+ if (copy_from_user(&tmp, buf, min_sz))
+ return -EFAULT;
+
+ if (!tmp.pid)
+ return -EINVAL;
+
+ tmp.total_usage = 0;
+ tmp.internal_usage = 0;
+ tmp.heap_usage = 0;
+
+ list_for_each_entry(tmp_client, &xdna->client_list, node) {
+ if (tmp.pid != tmp_client->pid)
+ continue;
+
+ mutex_lock(&tmp_client->mm_lock);
+ tmp.total_usage += tmp_client->total_bo_usage;
+ tmp.internal_usage += tmp_client->total_int_bo_usage;
+ tmp.heap_usage += tmp_client->heap_usage;
+ mutex_unlock(&tmp_client->mm_lock);
+ }
+
+ if (copy_to_user(buf, &tmp, min_sz))
+ return -EFAULT;
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
index a77d9344f8a4..4fc48a1189d2 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.h
+++ b/drivers/accel/amdxdna/amdxdna_gem.h
@@ -41,8 +41,9 @@ struct amdxdna_gem_obj {
struct amdxdna_client *client;
u8 type;
bool pinned;
- struct mutex lock; /* Protects: pinned, mem.kva */
+ struct mutex lock; /* Protects: pinned, mem.kva, open_ref */
struct amdxdna_mem mem;
+ int open_ref;
/* Below members are initialized when needed */
struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */
@@ -50,6 +51,9 @@ struct amdxdna_gem_obj {
u32 assigned_hwctx;
struct dma_buf *dma_buf;
struct dma_buf_attachment *attach;
+
+ /* True, if BO is managed by XRT, not application */
+ bool internal;
};
#define to_gobj(obj) (&(obj)->base.base)
@@ -98,5 +102,6 @@ void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args);
#endif /* _AMDXDNA_GEM_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index d83be00daf2b..b50a7d1f8a11 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -36,9 +36,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
* 0.5: Support getting telemetry data
* 0.6: Support preemption
* 0.7: Support getting power and utilization data
+ * 0.8: Support BO usage query
*/
#define AMDXDNA_DRIVER_MAJOR 0
-#define AMDXDNA_DRIVER_MINOR 7
+#define AMDXDNA_DRIVER_MINOR 8
/*
* Bind the driver base on (vendor_id, device_id) pair and later use the
@@ -120,11 +121,12 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client)
amdxdna_hwctx_remove_all(client);
xa_destroy(&client->hwctx_xa);
cleanup_srcu_struct(&client->hwctx_srcu);
- mutex_destroy(&client->mm_lock);
if (client->dev_heap)
drm_gem_object_put(to_gobj(client->dev_heap));
+ mutex_destroy(&client->mm_lock);
+
if (!IS_ERR_OR_NULL(client->sva))
iommu_sva_unbind_device(client->sva);
mmdrop(client->mm);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index e91d14ae5190..0661749917d6 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -138,6 +138,10 @@ struct amdxdna_client {
struct iommu_sva *sva;
int pasid;
struct mm_struct *mm;
+
+ size_t heap_usage;
+ size_t total_bo_usage;
+ size_t total_int_bo_usage;
};
#define amdxdna_for_each_hwctx(client, hwctx_id, entry) \
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index bddaaaf945cf..61d3686fa3b1 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -591,8 +591,37 @@ struct amdxdna_async_error {
__u64 ex_err_code;
};
+/**
+ * struct amdxdna_drm_bo_usage - all types of BO usage
+ * BOs managed by XRT/SHIM/driver is counted as internal.
+ * Others are counted as external which are managed by applications.
+ *
+ * Among all types of BOs:
+ * AMDXDNA_BO_DEV_HEAP - is counted for internal.
+ * AMDXDNA_BO_SHARE - is counted for external.
+ * AMDXDNA_BO_CMD - is counted for internal.
+ * AMDXDNA_BO_DEV - is counted by heap_usage only, not internal
+ * or external. It does not add to the total memory
+ * footprint since its mem comes from heap which is
+ * already counted as internal.
+ */
+struct amdxdna_drm_bo_usage {
+ /** @pid: The ID of the process to query from. */
+ __s64 pid;
+ /** @total_usage: Total BO size used by process. */
+ __u64 total_usage;
+ /** @internal_usage: Total internal BO size used by process. */
+ __u64 internal_usage;
+ /** @heap_usage: Total device BO size used by process. */
+ __u64 heap_usage;
+};
+
+/*
+ * Supported params in struct amdxdna_drm_get_array
+ */
#define DRM_AMDXDNA_HW_CONTEXT_ALL 0
#define DRM_AMDXDNA_HW_LAST_ASYNC_ERR 2
+#define DRM_AMDXDNA_BO_USAGE 6
/**
* struct amdxdna_drm_get_array - Get information array.
@@ -605,6 +634,12 @@ struct amdxdna_drm_get_array {
*
* %DRM_AMDXDNA_HW_CONTEXT_ALL:
* Returns all created hardware contexts.
+ *
+ * %DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
+ * Returns last async error.
+ *
+ * %DRM_AMDXDNA_BO_USAGE:
+ * Returns usage of heap/internal/external BOs.
*/
__u32 param;
/**
--
2.34.1
On 3/24/26 11:31, Lizhi Hou wrote:
> From: Max Zhen <max.zhen@amd.com>
>
> Add support for querying per-process buffer object (BO) memory
> usage through the amdxdna GET_ARRAY UAPI.
>
> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
> struct amdxdna_drm_bo_usage to report BO memory usage statistics,
> including heap, total, and internal usage.
>
> Track BO memory usage on a per-client basis by maintaining counters
> in GEM open/close and heap allocation/free paths. This ensures the
> reported statistics reflect the current memory footprint of each
> process.
>
> Wire the new query into the GET_ARRAY implementation to expose
> the usage information to userspace.
>
> Signed-off-by: Max Zhen <max.zhen@amd.com>
> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
I'm assuming you also have userspace side ready for this too right?
If you have a link handy can you please include it when committing.
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
> drivers/accel/amdxdna/aie2_pci.c | 4 +
> drivers/accel/amdxdna/amdxdna_gem.c | 134 ++++++++++++++++++++++--
> drivers/accel/amdxdna/amdxdna_gem.h | 7 +-
> drivers/accel/amdxdna/amdxdna_pci_drv.c | 6 +-
> drivers/accel/amdxdna/amdxdna_pci_drv.h | 4 +
> include/uapi/drm/amdxdna_accel.h | 35 +++++++
> 6 files changed, 177 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 9e39bfe75971..f1ac4e00bd9f 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -865,6 +865,7 @@ static int aie2_hwctx_status_cb(struct amdxdna_hwctx *hwctx, void *arg)
> tmp->command_submissions = hwctx->priv->seq;
> tmp->command_completions = hwctx->priv->completed;
> tmp->pasid = hwctx->client->pasid;
> + tmp->heap_usage = hwctx->client->heap_usage;
> tmp->priority = hwctx->qos.priority;
> tmp->gops = hwctx->qos.gops;
> tmp->fps = hwctx->qos.fps;
> @@ -1148,6 +1149,9 @@ static int aie2_get_array(struct amdxdna_client *client,
> case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
> ret = aie2_get_array_async_error(xdna->dev_handle, args);
> break;
> + case DRM_AMDXDNA_BO_USAGE:
> + ret = amdxdna_drm_get_bo_usage(&xdna->ddev, args);
> + break;
> default:
> XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
> ret = -EOPNOTSUPP;
> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
> index 27712704e42d..238ee244d4a6 100644
> --- a/drivers/accel/amdxdna/amdxdna_gem.c
> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
> @@ -63,6 +63,8 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
> goto unlock_out;
> }
>
> + client->heap_usage += mem->size;
> +
> drm_gem_object_get(to_gobj(heap));
>
> unlock_out:
> @@ -74,16 +76,17 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
> static void
> amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
> {
> + struct amdxdna_client *client = abo->client;
> struct amdxdna_gem_obj *heap;
>
> - mutex_lock(&abo->client->mm_lock);
> + mutex_lock(&client->mm_lock);
>
> drm_mm_remove_node(&abo->mm_node);
> -
> - heap = abo->client->dev_heap;
> + client->heap_usage -= abo->mem.size;
> + heap = client->dev_heap;
> drm_gem_object_put(to_gobj(heap));
>
> - mutex_unlock(&abo->client->mm_lock);
> + mutex_unlock(&client->mm_lock);
> }
>
> static struct amdxdna_gem_obj *
> @@ -102,6 +105,8 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size)
> abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
> abo->mem.uva = AMDXDNA_INVALID_ADDR;
> abo->mem.size = size;
> + abo->open_ref = 0;
> + abo->internal = false;
> INIT_LIST_HEAD(&abo->mem.umap_list);
>
> return abo;
> @@ -508,13 +513,55 @@ static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo)
> kfree(abo);
> }
>
> +static inline bool
> +amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
> +{
> + /* Do not count imported BOs since the buffer is not allocated by us. */
> + if (is_import_bo(abo))
> + return true;
> +
> + /* Already counted as part of HEAP BO */
> + if (abo->type == AMDXDNA_BO_DEV)
> + return true;
> +
> + return false;
> +}
> +
> +static void
> +amdxdna_gem_add_bo_usage(struct amdxdna_gem_obj *abo)
> +{
> + struct amdxdna_client *client = abo->client;
> +
> + if (amdxdna_gem_skip_bo_usage(abo))
> + return;
> +
> + guard(mutex)(&client->mm_lock);
> +
> + client->total_bo_usage += abo->mem.size;
> + if (abo->internal)
> + client->total_int_bo_usage += abo->mem.size;
> +}
> +
> +static void
> +amdxdna_gem_del_bo_usage(struct amdxdna_gem_obj *abo)
> +{
> + struct amdxdna_client *client = abo->client;
> +
> + if (amdxdna_gem_skip_bo_usage(abo))
> + return;
> +
> + guard(mutex)(&client->mm_lock);
> +
> + client->total_bo_usage -= abo->mem.size;
> + if (abo->internal)
> + client->total_int_bo_usage -= abo->mem.size;
> +}
> +
> static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
> {
> struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
> struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
>
> - XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, amdxdna_gem_dev_addr(abo));
> -
> amdxdna_hmm_unregister(abo, NULL);
> flush_workqueue(xdna->notifier_wq);
>
> @@ -543,9 +590,13 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
> int ret;
>
> guard(mutex)(&abo->lock);
> + abo->open_ref++;
>
> - if (!abo->client)
> + if (abo->open_ref == 1) {
> + /* Attached to the client when first opened by it. */
> abo->client = filp->driver_priv;
> + amdxdna_gem_add_bo_usage(abo);
> + }
> if (amdxdna_iova_on(xdna)) {
> ret = amdxdna_iommu_map_bo(xdna, abo);
> if (ret)
> @@ -555,6 +606,20 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi
> return 0;
> }
>
> +static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file *filp)
> +{
> + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
> +
> + guard(mutex)(&abo->lock);
> + abo->open_ref--;
> +
> + if (abo->open_ref == 0) {
> + amdxdna_gem_del_bo_usage(abo);
> + /* Detach from the client when last closed by it. */
> + abo->client = NULL;
> + }
> +}
> +
> static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
> {
> struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
> @@ -575,6 +640,7 @@ static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = {
> static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
> .free = amdxdna_gem_obj_free,
> .open = amdxdna_gem_obj_open,
> + .close = amdxdna_gem_obj_close,
> .print_info = drm_gem_shmem_object_print_info,
> .pin = drm_gem_shmem_object_pin,
> .unpin = drm_gem_shmem_object_unpin,
> @@ -708,10 +774,13 @@ amdxdna_drm_create_share_bo(struct drm_device *dev,
> if (IS_ERR(abo))
> return ERR_CAST(abo);
>
> - if (args->type == AMDXDNA_BO_DEV_HEAP)
> + if (args->type == AMDXDNA_BO_DEV_HEAP) {
> abo->type = AMDXDNA_BO_DEV_HEAP;
> - else
> + abo->internal = true;
> + } else {
> abo->type = AMDXDNA_BO_SHARE;
> + abo->internal = args->type == AMDXDNA_BO_CMD;
> + }
>
> return abo;
> }
> @@ -783,6 +852,11 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev,
> gobj = to_gobj(abo);
> gobj->funcs = &amdxdna_gem_dev_obj_funcs;
> abo->type = AMDXDNA_BO_DEV;
> + abo->internal = true;
> + /*
> + * DEV BOs cannot be alive when client is gone, it's OK to
> + * always establish the connection.
> + */
> abo->client = client;
>
> ret = amdxdna_gem_heap_alloc(abo);
> @@ -826,7 +900,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
> if (IS_ERR(abo))
> return PTR_ERR(abo);
>
> - /* ready to publish object to userspace */
> + /* Ready to publish object to userspace and count for BO usage. */
> ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
> if (ret) {
> XDNA_ERR(xdna, "Create handle failed");
> @@ -986,3 +1060,43 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev,
> drm_gem_object_put(gobj);
> return ret;
> }
> +
> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args)
> +{
> + size_t min_sz = min(args->element_size, sizeof(struct amdxdna_drm_bo_usage));
> + char __user *buf = u64_to_user_ptr(args->buffer);
> + struct amdxdna_dev *xdna = to_xdna_dev(dev);
> + struct amdxdna_client *tmp_client;
> + struct amdxdna_drm_bo_usage tmp;
> +
> + drm_WARN_ON(dev, !mutex_is_locked(&xdna->dev_lock));
> +
> + if (args->num_element != 1)
> + return -EINVAL;
> +
> + if (copy_from_user(&tmp, buf, min_sz))
> + return -EFAULT;
> +
> + if (!tmp.pid)
> + return -EINVAL;
> +
> + tmp.total_usage = 0;
> + tmp.internal_usage = 0;
> + tmp.heap_usage = 0;
> +
> + list_for_each_entry(tmp_client, &xdna->client_list, node) {
> + if (tmp.pid != tmp_client->pid)
> + continue;
> +
> + mutex_lock(&tmp_client->mm_lock);
> + tmp.total_usage += tmp_client->total_bo_usage;
> + tmp.internal_usage += tmp_client->total_int_bo_usage;
> + tmp.heap_usage += tmp_client->heap_usage;
> + mutex_unlock(&tmp_client->mm_lock);
> + }
> +
> + if (copy_to_user(buf, &tmp, min_sz))
> + return -EFAULT;
> +
> + return 0;
> +}
> diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h
> index a77d9344f8a4..4fc48a1189d2 100644
> --- a/drivers/accel/amdxdna/amdxdna_gem.h
> +++ b/drivers/accel/amdxdna/amdxdna_gem.h
> @@ -41,8 +41,9 @@ struct amdxdna_gem_obj {
> struct amdxdna_client *client;
> u8 type;
> bool pinned;
> - struct mutex lock; /* Protects: pinned, mem.kva */
> + struct mutex lock; /* Protects: pinned, mem.kva, open_ref */
> struct amdxdna_mem mem;
> + int open_ref;
>
> /* Below members are initialized when needed */
> struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */
> @@ -50,6 +51,9 @@ struct amdxdna_gem_obj {
> u32 assigned_hwctx;
> struct dma_buf *dma_buf;
> struct dma_buf_attachment *attach;
> +
> + /* True, if BO is managed by XRT, not application */
> + bool internal;
> };
>
> #define to_gobj(obj) (&(obj)->base.base)
> @@ -98,5 +102,6 @@ void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
> int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args);
>
> #endif /* _AMDXDNA_GEM_H_ */
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index d83be00daf2b..b50a7d1f8a11 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -36,9 +36,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
> * 0.5: Support getting telemetry data
> * 0.6: Support preemption
> * 0.7: Support getting power and utilization data
> + * 0.8: Support BO usage query
> */
> #define AMDXDNA_DRIVER_MAJOR 0
> -#define AMDXDNA_DRIVER_MINOR 7
> +#define AMDXDNA_DRIVER_MINOR 8
>
> /*
> * Bind the driver base on (vendor_id, device_id) pair and later use the
> @@ -120,11 +121,12 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client)
> amdxdna_hwctx_remove_all(client);
> xa_destroy(&client->hwctx_xa);
> cleanup_srcu_struct(&client->hwctx_srcu);
> - mutex_destroy(&client->mm_lock);
>
> if (client->dev_heap)
> drm_gem_object_put(to_gobj(client->dev_heap));
>
> + mutex_destroy(&client->mm_lock);
> +
> if (!IS_ERR_OR_NULL(client->sva))
> iommu_sva_unbind_device(client->sva);
> mmdrop(client->mm);
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> index e91d14ae5190..0661749917d6 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> @@ -138,6 +138,10 @@ struct amdxdna_client {
> struct iommu_sva *sva;
> int pasid;
> struct mm_struct *mm;
> +
> + size_t heap_usage;
> + size_t total_bo_usage;
> + size_t total_int_bo_usage;
> };
>
> #define amdxdna_for_each_hwctx(client, hwctx_id, entry) \
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index bddaaaf945cf..61d3686fa3b1 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -591,8 +591,37 @@ struct amdxdna_async_error {
> __u64 ex_err_code;
> };
>
> +/**
> + * struct amdxdna_drm_bo_usage - all types of BO usage
> + * BOs managed by XRT/SHIM/driver is counted as internal.
> + * Others are counted as external which are managed by applications.
> + *
> + * Among all types of BOs:
> + * AMDXDNA_BO_DEV_HEAP - is counted for internal.
> + * AMDXDNA_BO_SHARE - is counted for external.
> + * AMDXDNA_BO_CMD - is counted for internal.
> + * AMDXDNA_BO_DEV - is counted by heap_usage only, not internal
> + * or external. It does not add to the total memory
> + * footprint since its mem comes from heap which is
> + * already counted as internal.
> + */
> +struct amdxdna_drm_bo_usage {
> + /** @pid: The ID of the process to query from. */
> + __s64 pid;
> + /** @total_usage: Total BO size used by process. */
> + __u64 total_usage;
> + /** @internal_usage: Total internal BO size used by process. */
> + __u64 internal_usage;
> + /** @heap_usage: Total device BO size used by process. */
> + __u64 heap_usage;
> +};
> +
> +/*
> + * Supported params in struct amdxdna_drm_get_array
> + */
> #define DRM_AMDXDNA_HW_CONTEXT_ALL 0
> #define DRM_AMDXDNA_HW_LAST_ASYNC_ERR 2
> +#define DRM_AMDXDNA_BO_USAGE 6
>
> /**
> * struct amdxdna_drm_get_array - Get information array.
> @@ -605,6 +634,12 @@ struct amdxdna_drm_get_array {
> *
> * %DRM_AMDXDNA_HW_CONTEXT_ALL:
> * Returns all created hardware contexts.
> + *
> + * %DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
> + * Returns last async error.
> + *
> + * %DRM_AMDXDNA_BO_USAGE:
> + * Returns usage of heap/internal/external BOs.
> */
> __u32 param;
> /**
Applied to drm-misc-next.
On 3/24/26 10:01, Mario Limonciello wrote:
>
>
> On 3/24/26 11:31, Lizhi Hou wrote:
>> From: Max Zhen <max.zhen@amd.com>
>>
>> Add support for querying per-process buffer object (BO) memory
>> usage through the amdxdna GET_ARRAY UAPI.
>>
>> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
>> struct amdxdna_drm_bo_usage to report BO memory usage statistics,
>> including heap, total, and internal usage.
>>
>> Track BO memory usage on a per-client basis by maintaining counters
>> in GEM open/close and heap allocation/free paths. This ensures the
>> reported statistics reflect the current memory footprint of each
>> process.
>>
>> Wire the new query into the GET_ARRAY implementation to expose
>> the usage information to userspace.
>>
>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> I'm assuming you also have userspace side ready for this too right?
> If you have a link handy can you please include it when committing.
>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>
>> ---
>> drivers/accel/amdxdna/aie2_pci.c | 4 +
>> drivers/accel/amdxdna/amdxdna_gem.c | 134 ++++++++++++++++++++++--
>> drivers/accel/amdxdna/amdxdna_gem.h | 7 +-
>> drivers/accel/amdxdna/amdxdna_pci_drv.c | 6 +-
>> drivers/accel/amdxdna/amdxdna_pci_drv.h | 4 +
>> include/uapi/drm/amdxdna_accel.h | 35 +++++++
>> 6 files changed, 177 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index 9e39bfe75971..f1ac4e00bd9f 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -865,6 +865,7 @@ static int aie2_hwctx_status_cb(struct
>> amdxdna_hwctx *hwctx, void *arg)
>> tmp->command_submissions = hwctx->priv->seq;
>> tmp->command_completions = hwctx->priv->completed;
>> tmp->pasid = hwctx->client->pasid;
>> + tmp->heap_usage = hwctx->client->heap_usage;
>> tmp->priority = hwctx->qos.priority;
>> tmp->gops = hwctx->qos.gops;
>> tmp->fps = hwctx->qos.fps;
>> @@ -1148,6 +1149,9 @@ static int aie2_get_array(struct amdxdna_client
>> *client,
>> case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
>> ret = aie2_get_array_async_error(xdna->dev_handle, args);
>> break;
>> + case DRM_AMDXDNA_BO_USAGE:
>> + ret = amdxdna_drm_get_bo_usage(&xdna->ddev, args);
>> + break;
>> default:
>> XDNA_ERR(xdna, "Not supported request parameter %u",
>> args->param);
>> ret = -EOPNOTSUPP;
>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c
>> b/drivers/accel/amdxdna/amdxdna_gem.c
>> index 27712704e42d..238ee244d4a6 100644
>> --- a/drivers/accel/amdxdna/amdxdna_gem.c
>> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
>> @@ -63,6 +63,8 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
>> goto unlock_out;
>> }
>> + client->heap_usage += mem->size;
>> +
>> drm_gem_object_get(to_gobj(heap));
>> unlock_out:
>> @@ -74,16 +76,17 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
>> static void
>> amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
>> {
>> + struct amdxdna_client *client = abo->client;
>> struct amdxdna_gem_obj *heap;
>> - mutex_lock(&abo->client->mm_lock);
>> + mutex_lock(&client->mm_lock);
>> drm_mm_remove_node(&abo->mm_node);
>> -
>> - heap = abo->client->dev_heap;
>> + client->heap_usage -= abo->mem.size;
>> + heap = client->dev_heap;
>> drm_gem_object_put(to_gobj(heap));
>> - mutex_unlock(&abo->client->mm_lock);
>> + mutex_unlock(&client->mm_lock);
>> }
>> static struct amdxdna_gem_obj *
>> @@ -102,6 +105,8 @@ amdxdna_gem_create_obj(struct drm_device *dev,
>> size_t size)
>> abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
>> abo->mem.uva = AMDXDNA_INVALID_ADDR;
>> abo->mem.size = size;
>> + abo->open_ref = 0;
>> + abo->internal = false;
>> INIT_LIST_HEAD(&abo->mem.umap_list);
>> return abo;
>> @@ -508,13 +513,55 @@ static void amdxdna_imported_obj_free(struct
>> amdxdna_gem_obj *abo)
>> kfree(abo);
>> }
>> +static inline bool
>> +amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
>> +{
>> + /* Do not count imported BOs since the buffer is not allocated
>> by us. */
>> + if (is_import_bo(abo))
>> + return true;
>> +
>> + /* Already counted as part of HEAP BO */
>> + if (abo->type == AMDXDNA_BO_DEV)
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> +static void
>> +amdxdna_gem_add_bo_usage(struct amdxdna_gem_obj *abo)
>> +{
>> + struct amdxdna_client *client = abo->client;
>> +
>> + if (amdxdna_gem_skip_bo_usage(abo))
>> + return;
>> +
>> + guard(mutex)(&client->mm_lock);
>> +
>> + client->total_bo_usage += abo->mem.size;
>> + if (abo->internal)
>> + client->total_int_bo_usage += abo->mem.size;
>> +}
>> +
>> +static void
>> +amdxdna_gem_del_bo_usage(struct amdxdna_gem_obj *abo)
>> +{
>> + struct amdxdna_client *client = abo->client;
>> +
>> + if (amdxdna_gem_skip_bo_usage(abo))
>> + return;
>> +
>> + guard(mutex)(&client->mm_lock);
>> +
>> + client->total_bo_usage -= abo->mem.size;
>> + if (abo->internal)
>> + client->total_int_bo_usage -= abo->mem.size;
>> +}
>> +
>> static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
>> {
>> struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
>> struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
>> - XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type,
>> amdxdna_gem_dev_addr(abo));
>> -
>> amdxdna_hmm_unregister(abo, NULL);
>> flush_workqueue(xdna->notifier_wq);
>> @@ -543,9 +590,13 @@ static int amdxdna_gem_obj_open(struct
>> drm_gem_object *gobj, struct drm_file *fi
>> int ret;
>> guard(mutex)(&abo->lock);
>> + abo->open_ref++;
>> - if (!abo->client)
>> + if (abo->open_ref == 1) {
>> + /* Attached to the client when first opened by it. */
>> abo->client = filp->driver_priv;
>> + amdxdna_gem_add_bo_usage(abo);
>> + }
>> if (amdxdna_iova_on(xdna)) {
>> ret = amdxdna_iommu_map_bo(xdna, abo);
>> if (ret)
>> @@ -555,6 +606,20 @@ static int amdxdna_gem_obj_open(struct
>> drm_gem_object *gobj, struct drm_file *fi
>> return 0;
>> }
>> +static void amdxdna_gem_obj_close(struct drm_gem_object *gobj,
>> struct drm_file *filp)
>> +{
>> + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
>> +
>> + guard(mutex)(&abo->lock);
>> + abo->open_ref--;
>> +
>> + if (abo->open_ref == 0) {
>> + amdxdna_gem_del_bo_usage(abo);
>> + /* Detach from the client when last closed by it. */
>> + abo->client = NULL;
>> + }
>> +}
>> +
>> static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj,
>> struct iosys_map *map)
>> {
>> struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
>> @@ -575,6 +640,7 @@ static const struct drm_gem_object_funcs
>> amdxdna_gem_dev_obj_funcs = {
>> static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
>> .free = amdxdna_gem_obj_free,
>> .open = amdxdna_gem_obj_open,
>> + .close = amdxdna_gem_obj_close,
>> .print_info = drm_gem_shmem_object_print_info,
>> .pin = drm_gem_shmem_object_pin,
>> .unpin = drm_gem_shmem_object_unpin,
>> @@ -708,10 +774,13 @@ amdxdna_drm_create_share_bo(struct drm_device
>> *dev,
>> if (IS_ERR(abo))
>> return ERR_CAST(abo);
>> - if (args->type == AMDXDNA_BO_DEV_HEAP)
>> + if (args->type == AMDXDNA_BO_DEV_HEAP) {
>> abo->type = AMDXDNA_BO_DEV_HEAP;
>> - else
>> + abo->internal = true;
>> + } else {
>> abo->type = AMDXDNA_BO_SHARE;
>> + abo->internal = args->type == AMDXDNA_BO_CMD;
>> + }
>> return abo;
>> }
>> @@ -783,6 +852,11 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev,
>> gobj = to_gobj(abo);
>> gobj->funcs = &amdxdna_gem_dev_obj_funcs;
>> abo->type = AMDXDNA_BO_DEV;
>> + abo->internal = true;
>> + /*
>> + * DEV BOs cannot be alive when client is gone, it's OK to
>> + * always establish the connection.
>> + */
>> abo->client = client;
>> ret = amdxdna_gem_heap_alloc(abo);
>> @@ -826,7 +900,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device
>> *dev, void *data, struct drm_f
>> if (IS_ERR(abo))
>> return PTR_ERR(abo);
>> - /* ready to publish object to userspace */
>> + /* Ready to publish object to userspace and count for BO usage. */
>> ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
>> if (ret) {
>> XDNA_ERR(xdna, "Create handle failed");
>> @@ -986,3 +1060,43 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device
>> *dev,
>> drm_gem_object_put(gobj);
>> return ret;
>> }
>> +
>> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct
>> amdxdna_drm_get_array *args)
>> +{
>> + size_t min_sz = min(args->element_size, sizeof(struct
>> amdxdna_drm_bo_usage));
>> + char __user *buf = u64_to_user_ptr(args->buffer);
>> + struct amdxdna_dev *xdna = to_xdna_dev(dev);
>> + struct amdxdna_client *tmp_client;
>> + struct amdxdna_drm_bo_usage tmp;
>> +
>> + drm_WARN_ON(dev, !mutex_is_locked(&xdna->dev_lock));
>> +
>> + if (args->num_element != 1)
>> + return -EINVAL;
>> +
>> + if (copy_from_user(&tmp, buf, min_sz))
>> + return -EFAULT;
>> +
>> + if (!tmp.pid)
>> + return -EINVAL;
>> +
>> + tmp.total_usage = 0;
>> + tmp.internal_usage = 0;
>> + tmp.heap_usage = 0;
>> +
>> + list_for_each_entry(tmp_client, &xdna->client_list, node) {
>> + if (tmp.pid != tmp_client->pid)
>> + continue;
>> +
>> + mutex_lock(&tmp_client->mm_lock);
>> + tmp.total_usage += tmp_client->total_bo_usage;
>> + tmp.internal_usage += tmp_client->total_int_bo_usage;
>> + tmp.heap_usage += tmp_client->heap_usage;
>> + mutex_unlock(&tmp_client->mm_lock);
>> + }
>> +
>> + if (copy_to_user(buf, &tmp, min_sz))
>> + return -EFAULT;
>> +
>> + return 0;
>> +}
>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.h
>> b/drivers/accel/amdxdna/amdxdna_gem.h
>> index a77d9344f8a4..4fc48a1189d2 100644
>> --- a/drivers/accel/amdxdna/amdxdna_gem.h
>> +++ b/drivers/accel/amdxdna/amdxdna_gem.h
>> @@ -41,8 +41,9 @@ struct amdxdna_gem_obj {
>> struct amdxdna_client *client;
>> u8 type;
>> bool pinned;
>> - struct mutex lock; /* Protects: pinned, mem.kva */
>> + struct mutex lock; /* Protects: pinned, mem.kva,
>> open_ref */
>> struct amdxdna_mem mem;
>> + int open_ref;
>> /* Below members are initialized when needed */
>> struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */
>> @@ -50,6 +51,9 @@ struct amdxdna_gem_obj {
>> u32 assigned_hwctx;
>> struct dma_buf *dma_buf;
>> struct dma_buf_attachment *attach;
>> +
>> + /* True, if BO is managed by XRT, not application */
>> + bool internal;
>> };
>> #define to_gobj(obj) (&(obj)->base.base)
>> @@ -98,5 +102,6 @@ void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
>> int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *filp);
>> int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void
>> *data, struct drm_file *filp);
>> int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *filp);
>> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct
>> amdxdna_drm_get_array *args);
>> #endif /* _AMDXDNA_GEM_H_ */
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> index d83be00daf2b..b50a7d1f8a11 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> @@ -36,9 +36,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
>> * 0.5: Support getting telemetry data
>> * 0.6: Support preemption
>> * 0.7: Support getting power and utilization data
>> + * 0.8: Support BO usage query
>> */
>> #define AMDXDNA_DRIVER_MAJOR 0
>> -#define AMDXDNA_DRIVER_MINOR 7
>> +#define AMDXDNA_DRIVER_MINOR 8
>> /*
>> * Bind the driver base on (vendor_id, device_id) pair and later
>> use the
>> @@ -120,11 +121,12 @@ static void amdxdna_client_cleanup(struct
>> amdxdna_client *client)
>> amdxdna_hwctx_remove_all(client);
>> xa_destroy(&client->hwctx_xa);
>> cleanup_srcu_struct(&client->hwctx_srcu);
>> - mutex_destroy(&client->mm_lock);
>> if (client->dev_heap)
>> drm_gem_object_put(to_gobj(client->dev_heap));
>> + mutex_destroy(&client->mm_lock);
>> +
>> if (!IS_ERR_OR_NULL(client->sva))
>> iommu_sva_unbind_device(client->sva);
>> mmdrop(client->mm);
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> index e91d14ae5190..0661749917d6 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> @@ -138,6 +138,10 @@ struct amdxdna_client {
>> struct iommu_sva *sva;
>> int pasid;
>> struct mm_struct *mm;
>> +
>> + size_t heap_usage;
>> + size_t total_bo_usage;
>> + size_t total_int_bo_usage;
>> };
>> #define amdxdna_for_each_hwctx(client, hwctx_id, entry) \
>> diff --git a/include/uapi/drm/amdxdna_accel.h
>> b/include/uapi/drm/amdxdna_accel.h
>> index bddaaaf945cf..61d3686fa3b1 100644
>> --- a/include/uapi/drm/amdxdna_accel.h
>> +++ b/include/uapi/drm/amdxdna_accel.h
>> @@ -591,8 +591,37 @@ struct amdxdna_async_error {
>> __u64 ex_err_code;
>> };
>> +/**
>> + * struct amdxdna_drm_bo_usage - all types of BO usage
>> + * BOs managed by XRT/SHIM/driver is counted as internal.
>> + * Others are counted as external which are managed by applications.
>> + *
>> + * Among all types of BOs:
>> + * AMDXDNA_BO_DEV_HEAP - is counted for internal.
>> + * AMDXDNA_BO_SHARE - is counted for external.
>> + * AMDXDNA_BO_CMD - is counted for internal.
>> + * AMDXDNA_BO_DEV - is counted by heap_usage only, not internal
>> + * or external. It does not add to the total
>> memory
>> + * footprint since its mem comes from heap
>> which is
>> + * already counted as internal.
>> + */
>> +struct amdxdna_drm_bo_usage {
>> + /** @pid: The ID of the process to query from. */
>> + __s64 pid;
>> + /** @total_usage: Total BO size used by process. */
>> + __u64 total_usage;
>> + /** @internal_usage: Total internal BO size used by process. */
>> + __u64 internal_usage;
>> + /** @heap_usage: Total device BO size used by process. */
>> + __u64 heap_usage;
>> +};
>> +
>> +/*
>> + * Supported params in struct amdxdna_drm_get_array
>> + */
>> #define DRM_AMDXDNA_HW_CONTEXT_ALL 0
>> #define DRM_AMDXDNA_HW_LAST_ASYNC_ERR 2
>> +#define DRM_AMDXDNA_BO_USAGE 6
>> /**
>> * struct amdxdna_drm_get_array - Get information array.
>> @@ -605,6 +634,12 @@ struct amdxdna_drm_get_array {
>> *
>> * %DRM_AMDXDNA_HW_CONTEXT_ALL:
>> * Returns all created hardware contexts.
>> + *
>> + * %DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
>> + * Returns last async error.
>> + *
>> + * %DRM_AMDXDNA_BO_USAGE:
>> + * Returns usage of heap/internal/external BOs.
>> */
>> __u32 param;
>> /**
>
On Wed, Mar 25, 2026 at 11:47 AM Lizhi Hou <lizhi.hou@amd.com> wrote:
>
> Applied to drm-misc-next.
>
> On 3/24/26 10:01, Mario Limonciello wrote:
> >
> >
> > On 3/24/26 11:31, Lizhi Hou wrote:
> >> From: Max Zhen <max.zhen@amd.com>
> >>
> >> Add support for querying per-process buffer object (BO) memory
> >> usage through the amdxdna GET_ARRAY UAPI.
> >>
> >> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
> >> struct amdxdna_drm_bo_usage to report BO memory usage statistics,
> >> including heap, total, and internal usage.
> >>
> >> Track BO memory usage on a per-client basis by maintaining counters
> >> in GEM open/close and heap allocation/free paths. This ensures the
> >> reported statistics reflect the current memory footprint of each
> >> process.
> >>
> >> Wire the new query into the GET_ARRAY implementation to expose
> >> the usage information to userspace.
> >>
> >> Signed-off-by: Max Zhen <max.zhen@amd.com>
> >> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
> >> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> > I'm assuming you also have userspace side ready for this too right?
> > If you have a link handy can you please include it when committing.
Sorry for a naive question, would support in procfs be possible?
There's support in perf for displaying DRM usage stat data from there
[1], for example:
```
$ perf list drm-
List of pre-defined events (to be used in -e or -M):
drm:
drm-active-stolen-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-active-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-engine-capacity-video
[Engine capacity. Unit: drm_i915]
drm-engine-copy
[Utilization in ns. Unit: drm_i915]
drm-engine-render
[Utilization in ns. Unit: drm_i915]
drm-engine-video
[Utilization in ns. Unit: drm_i915]
drm-engine-video-enhance
[Utilization in ns. Unit: drm_i915]
drm-purgeable-stolen-system0
[Size of resident and purgeable memory buffers. Unit: drm_i915]
drm-purgeable-system0
[Size of resident and purgeable memory buffers. Unit: drm_i915]
drm-resident-stolen-system0
[Size of resident memory buffers. Unit: drm_i915]
drm-resident-system0
[Size of resident memory buffers. Unit: drm_i915]
drm-shared-stolen-system0
[Size of shared memory buffers. Unit: drm_i915]
drm-shared-system0
[Size of shared memory buffers. Unit: drm_i915]
drm-total-stolen-system0
[Size of shared and private memory. Unit: drm_i915]
drm-total-system0
[Size of shared and private memory. Unit: drm_i915]
$ perf stat -e drm-engine-render -a sleep 1
Performance counter stats for 'system wide':
557,542,732,344 ns drm-engine-render
1.001575975 seconds time elapsed
```
Thanks,
Ian
[1] https://lore.kernel.org/lkml/20250403202439.57791-1-irogers@google.com/
On 3/30/26 19:30, Ian Rogers wrote: > On Wed, Mar 25, 2026 at 11:47 AM Lizhi Hou <lizhi.hou@amd.com> wrote: >> Applied to drm-misc-next. >> >> On 3/24/26 10:01, Mario Limonciello wrote: >>> >>> On 3/24/26 11:31, Lizhi Hou wrote: >>>> From: Max Zhen <max.zhen@amd.com> >>>> >>>> Add support for querying per-process buffer object (BO) memory >>>> usage through the amdxdna GET_ARRAY UAPI. >>>> >>>> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with >>>> struct amdxdna_drm_bo_usage to report BO memory usage statistics, >>>> including heap, total, and internal usage. >>>> >>>> Track BO memory usage on a per-client basis by maintaining counters >>>> in GEM open/close and heap allocation/free paths. This ensures the >>>> reported statistics reflect the current memory footprint of each >>>> process. >>>> >>>> Wire the new query into the GET_ARRAY implementation to expose >>>> the usage information to userspace. >>>> >>>> Signed-off-by: Max Zhen <max.zhen@amd.com> >>>> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com> >>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com> >>> I'm assuming you also have userspace side ready for this too right? >>> If you have a link handy can you please include it when committing. > Sorry for a naive question, would support in procfs be possible? Do you mean fdinfo? And yes, the fdinfo is in our upstream stack. I will post the patch later. Lizhi > There's support in perf for displaying DRM usage stat data from there > [1], for example: > ``` > $ perf list drm- > > List of pre-defined events (to be used in -e or -M): > > > drm: > drm-active-stolen-system0 > [Total memory active in one or more engines. Unit: drm_i915] > drm-active-system0 > [Total memory active in one or more engines. Unit: drm_i915] > drm-engine-capacity-video > [Engine capacity. Unit: drm_i915] > drm-engine-copy > [Utilization in ns. Unit: drm_i915] > drm-engine-render > [Utilization in ns. Unit: drm_i915] > drm-engine-video > [Utilization in ns. Unit: drm_i915] > drm-engine-video-enhance > [Utilization in ns. Unit: drm_i915] > drm-purgeable-stolen-system0 > [Size of resident and purgeable memory buffers. Unit: drm_i915] > drm-purgeable-system0 > [Size of resident and purgeable memory buffers. Unit: drm_i915] > drm-resident-stolen-system0 > [Size of resident memory buffers. Unit: drm_i915] > drm-resident-system0 > [Size of resident memory buffers. Unit: drm_i915] > drm-shared-stolen-system0 > [Size of shared memory buffers. Unit: drm_i915] > drm-shared-system0 > [Size of shared memory buffers. Unit: drm_i915] > drm-total-stolen-system0 > [Size of shared and private memory. Unit: drm_i915] > drm-total-system0 > [Size of shared and private memory. Unit: drm_i915] > > > $ perf stat -e drm-engine-render -a sleep 1 > > Performance counter stats for 'system wide': > > 557,542,732,344 ns drm-engine-render > > 1.001575975 seconds time elapsed > ``` > > Thanks, > Ian > > [1] https://lore.kernel.org/lkml/20250403202439.57791-1-irogers@google.com/
On Tue, Mar 31, 2026 at 8:53 AM Lizhi Hou <lizhi.hou@amd.com> wrote: > > > On 3/30/26 19:30, Ian Rogers wrote: > > On Wed, Mar 25, 2026 at 11:47 AM Lizhi Hou <lizhi.hou@amd.com> wrote: > >> Applied to drm-misc-next. > >> > >> On 3/24/26 10:01, Mario Limonciello wrote: > >>> > >>> On 3/24/26 11:31, Lizhi Hou wrote: > >>>> From: Max Zhen <max.zhen@amd.com> > >>>> > >>>> Add support for querying per-process buffer object (BO) memory > >>>> usage through the amdxdna GET_ARRAY UAPI. > >>>> > >>>> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with > >>>> struct amdxdna_drm_bo_usage to report BO memory usage statistics, > >>>> including heap, total, and internal usage. > >>>> > >>>> Track BO memory usage on a per-client basis by maintaining counters > >>>> in GEM open/close and heap allocation/free paths. This ensures the > >>>> reported statistics reflect the current memory footprint of each > >>>> process. > >>>> > >>>> Wire the new query into the GET_ARRAY implementation to expose > >>>> the usage information to userspace. > >>>> > >>>> Signed-off-by: Max Zhen <max.zhen@amd.com> > >>>> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com> > >>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com> > >>> I'm assuming you also have userspace side ready for this too right? > >>> If you have a link handy can you please include it when committing. > > Sorry for a naive question, would support in procfs be possible? > > Do you mean fdinfo? And yes, the fdinfo is in our upstream stack. I will > post the patch later. That's great! I did mean fdinfo. I'd be interested to see the patch, and possibly the strings matched in perf need updating: https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/drm_pmu.c?h=perf-tools-next#n183 Thanks, Ian > Lizhi > > > There's support in perf for displaying DRM usage stat data from there > > [1], for example: > > ``` > > $ perf list drm- > > > > List of pre-defined events (to be used in -e or -M): > > > > > > drm: > > drm-active-stolen-system0 > > [Total memory active in one or more engines. Unit: drm_i915] > > drm-active-system0 > > [Total memory active in one or more engines. Unit: drm_i915] > > drm-engine-capacity-video > > [Engine capacity. Unit: drm_i915] > > drm-engine-copy > > [Utilization in ns. Unit: drm_i915] > > drm-engine-render > > [Utilization in ns. Unit: drm_i915] > > drm-engine-video > > [Utilization in ns. Unit: drm_i915] > > drm-engine-video-enhance > > [Utilization in ns. Unit: drm_i915] > > drm-purgeable-stolen-system0 > > [Size of resident and purgeable memory buffers. Unit: drm_i915] > > drm-purgeable-system0 > > [Size of resident and purgeable memory buffers. Unit: drm_i915] > > drm-resident-stolen-system0 > > [Size of resident memory buffers. Unit: drm_i915] > > drm-resident-system0 > > [Size of resident memory buffers. Unit: drm_i915] > > drm-shared-stolen-system0 > > [Size of shared memory buffers. Unit: drm_i915] > > drm-shared-system0 > > [Size of shared memory buffers. Unit: drm_i915] > > drm-total-stolen-system0 > > [Size of shared and private memory. Unit: drm_i915] > > drm-total-system0 > > [Size of shared and private memory. Unit: drm_i915] > > > > > > $ perf stat -e drm-engine-render -a sleep 1 > > > > Performance counter stats for 'system wide': > > > > 557,542,732,344 ns drm-engine-render > > > > 1.001575975 seconds time elapsed > > ``` > > > > Thanks, > > Ian > > > > [1] https://lore.kernel.org/lkml/20250403202439.57791-1-irogers@google.com/
On 3/24/26 10:01, Mario Limonciello wrote:
>
>
> On 3/24/26 11:31, Lizhi Hou wrote:
>> From: Max Zhen <max.zhen@amd.com>
>>
>> Add support for querying per-process buffer object (BO) memory
>> usage through the amdxdna GET_ARRAY UAPI.
>>
>> Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with
>> struct amdxdna_drm_bo_usage to report BO memory usage statistics,
>> including heap, total, and internal usage.
>>
>> Track BO memory usage on a per-client basis by maintaining counters
>> in GEM open/close and heap allocation/free paths. This ensures the
>> reported statistics reflect the current memory footprint of each
>> process.
>>
>> Wire the new query into the GET_ARRAY implementation to expose
>> the usage information to userspace.
>>
>> Signed-off-by: Max Zhen <max.zhen@amd.com>
>> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> I'm assuming you also have userspace side ready for this too right?
Our test case is ready at:
https://github.com/amd/xdna-driver/blob/main/test/shim_test/shim_test.cpp#L312
And the tool usage is ongoing .
I will put the test code link in commit.
Lizhi
> If you have a link handy can you please include it when committing.
>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>
>> ---
>> drivers/accel/amdxdna/aie2_pci.c | 4 +
>> drivers/accel/amdxdna/amdxdna_gem.c | 134 ++++++++++++++++++++++--
>> drivers/accel/amdxdna/amdxdna_gem.h | 7 +-
>> drivers/accel/amdxdna/amdxdna_pci_drv.c | 6 +-
>> drivers/accel/amdxdna/amdxdna_pci_drv.h | 4 +
>> include/uapi/drm/amdxdna_accel.h | 35 +++++++
>> 6 files changed, 177 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index 9e39bfe75971..f1ac4e00bd9f 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -865,6 +865,7 @@ static int aie2_hwctx_status_cb(struct
>> amdxdna_hwctx *hwctx, void *arg)
>> tmp->command_submissions = hwctx->priv->seq;
>> tmp->command_completions = hwctx->priv->completed;
>> tmp->pasid = hwctx->client->pasid;
>> + tmp->heap_usage = hwctx->client->heap_usage;
>> tmp->priority = hwctx->qos.priority;
>> tmp->gops = hwctx->qos.gops;
>> tmp->fps = hwctx->qos.fps;
>> @@ -1148,6 +1149,9 @@ static int aie2_get_array(struct amdxdna_client
>> *client,
>> case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
>> ret = aie2_get_array_async_error(xdna->dev_handle, args);
>> break;
>> + case DRM_AMDXDNA_BO_USAGE:
>> + ret = amdxdna_drm_get_bo_usage(&xdna->ddev, args);
>> + break;
>> default:
>> XDNA_ERR(xdna, "Not supported request parameter %u",
>> args->param);
>> ret = -EOPNOTSUPP;
>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c
>> b/drivers/accel/amdxdna/amdxdna_gem.c
>> index 27712704e42d..238ee244d4a6 100644
>> --- a/drivers/accel/amdxdna/amdxdna_gem.c
>> +++ b/drivers/accel/amdxdna/amdxdna_gem.c
>> @@ -63,6 +63,8 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
>> goto unlock_out;
>> }
>> + client->heap_usage += mem->size;
>> +
>> drm_gem_object_get(to_gobj(heap));
>> unlock_out:
>> @@ -74,16 +76,17 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo)
>> static void
>> amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo)
>> {
>> + struct amdxdna_client *client = abo->client;
>> struct amdxdna_gem_obj *heap;
>> - mutex_lock(&abo->client->mm_lock);
>> + mutex_lock(&client->mm_lock);
>> drm_mm_remove_node(&abo->mm_node);
>> -
>> - heap = abo->client->dev_heap;
>> + client->heap_usage -= abo->mem.size;
>> + heap = client->dev_heap;
>> drm_gem_object_put(to_gobj(heap));
>> - mutex_unlock(&abo->client->mm_lock);
>> + mutex_unlock(&client->mm_lock);
>> }
>> static struct amdxdna_gem_obj *
>> @@ -102,6 +105,8 @@ amdxdna_gem_create_obj(struct drm_device *dev,
>> size_t size)
>> abo->mem.dma_addr = AMDXDNA_INVALID_ADDR;
>> abo->mem.uva = AMDXDNA_INVALID_ADDR;
>> abo->mem.size = size;
>> + abo->open_ref = 0;
>> + abo->internal = false;
>> INIT_LIST_HEAD(&abo->mem.umap_list);
>> return abo;
>> @@ -508,13 +513,55 @@ static void amdxdna_imported_obj_free(struct
>> amdxdna_gem_obj *abo)
>> kfree(abo);
>> }
>> +static inline bool
>> +amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo)
>> +{
>> + /* Do not count imported BOs since the buffer is not allocated
>> by us. */
>> + if (is_import_bo(abo))
>> + return true;
>> +
>> + /* Already counted as part of HEAP BO */
>> + if (abo->type == AMDXDNA_BO_DEV)
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> +static void
>> +amdxdna_gem_add_bo_usage(struct amdxdna_gem_obj *abo)
>> +{
>> + struct amdxdna_client *client = abo->client;
>> +
>> + if (amdxdna_gem_skip_bo_usage(abo))
>> + return;
>> +
>> + guard(mutex)(&client->mm_lock);
>> +
>> + client->total_bo_usage += abo->mem.size;
>> + if (abo->internal)
>> + client->total_int_bo_usage += abo->mem.size;
>> +}
>> +
>> +static void
>> +amdxdna_gem_del_bo_usage(struct amdxdna_gem_obj *abo)
>> +{
>> + struct amdxdna_client *client = abo->client;
>> +
>> + if (amdxdna_gem_skip_bo_usage(abo))
>> + return;
>> +
>> + guard(mutex)(&client->mm_lock);
>> +
>> + client->total_bo_usage -= abo->mem.size;
>> + if (abo->internal)
>> + client->total_int_bo_usage -= abo->mem.size;
>> +}
>> +
>> static void amdxdna_gem_obj_free(struct drm_gem_object *gobj)
>> {
>> struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev);
>> struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
>> - XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type,
>> amdxdna_gem_dev_addr(abo));
>> -
>> amdxdna_hmm_unregister(abo, NULL);
>> flush_workqueue(xdna->notifier_wq);
>> @@ -543,9 +590,13 @@ static int amdxdna_gem_obj_open(struct
>> drm_gem_object *gobj, struct drm_file *fi
>> int ret;
>> guard(mutex)(&abo->lock);
>> + abo->open_ref++;
>> - if (!abo->client)
>> + if (abo->open_ref == 1) {
>> + /* Attached to the client when first opened by it. */
>> abo->client = filp->driver_priv;
>> + amdxdna_gem_add_bo_usage(abo);
>> + }
>> if (amdxdna_iova_on(xdna)) {
>> ret = amdxdna_iommu_map_bo(xdna, abo);
>> if (ret)
>> @@ -555,6 +606,20 @@ static int amdxdna_gem_obj_open(struct
>> drm_gem_object *gobj, struct drm_file *fi
>> return 0;
>> }
>> +static void amdxdna_gem_obj_close(struct drm_gem_object *gobj,
>> struct drm_file *filp)
>> +{
>> + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
>> +
>> + guard(mutex)(&abo->lock);
>> + abo->open_ref--;
>> +
>> + if (abo->open_ref == 0) {
>> + amdxdna_gem_del_bo_usage(abo);
>> + /* Detach from the client when last closed by it. */
>> + abo->client = NULL;
>> + }
>> +}
>> +
>> static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj,
>> struct iosys_map *map)
>> {
>> struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
>> @@ -575,6 +640,7 @@ static const struct drm_gem_object_funcs
>> amdxdna_gem_dev_obj_funcs = {
>> static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
>> .free = amdxdna_gem_obj_free,
>> .open = amdxdna_gem_obj_open,
>> + .close = amdxdna_gem_obj_close,
>> .print_info = drm_gem_shmem_object_print_info,
>> .pin = drm_gem_shmem_object_pin,
>> .unpin = drm_gem_shmem_object_unpin,
>> @@ -708,10 +774,13 @@ amdxdna_drm_create_share_bo(struct drm_device
>> *dev,
>> if (IS_ERR(abo))
>> return ERR_CAST(abo);
>> - if (args->type == AMDXDNA_BO_DEV_HEAP)
>> + if (args->type == AMDXDNA_BO_DEV_HEAP) {
>> abo->type = AMDXDNA_BO_DEV_HEAP;
>> - else
>> + abo->internal = true;
>> + } else {
>> abo->type = AMDXDNA_BO_SHARE;
>> + abo->internal = args->type == AMDXDNA_BO_CMD;
>> + }
>> return abo;
>> }
>> @@ -783,6 +852,11 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev,
>> gobj = to_gobj(abo);
>> gobj->funcs = &amdxdna_gem_dev_obj_funcs;
>> abo->type = AMDXDNA_BO_DEV;
>> + abo->internal = true;
>> + /*
>> + * DEV BOs cannot be alive when client is gone, it's OK to
>> + * always establish the connection.
>> + */
>> abo->client = client;
>> ret = amdxdna_gem_heap_alloc(abo);
>> @@ -826,7 +900,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device
>> *dev, void *data, struct drm_f
>> if (IS_ERR(abo))
>> return PTR_ERR(abo);
>> - /* ready to publish object to userspace */
>> + /* Ready to publish object to userspace and count for BO usage. */
>> ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle);
>> if (ret) {
>> XDNA_ERR(xdna, "Create handle failed");
>> @@ -986,3 +1060,43 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device
>> *dev,
>> drm_gem_object_put(gobj);
>> return ret;
>> }
>> +
>> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct
>> amdxdna_drm_get_array *args)
>> +{
>> + size_t min_sz = min(args->element_size, sizeof(struct
>> amdxdna_drm_bo_usage));
>> + char __user *buf = u64_to_user_ptr(args->buffer);
>> + struct amdxdna_dev *xdna = to_xdna_dev(dev);
>> + struct amdxdna_client *tmp_client;
>> + struct amdxdna_drm_bo_usage tmp;
>> +
>> + drm_WARN_ON(dev, !mutex_is_locked(&xdna->dev_lock));
>> +
>> + if (args->num_element != 1)
>> + return -EINVAL;
>> +
>> + if (copy_from_user(&tmp, buf, min_sz))
>> + return -EFAULT;
>> +
>> + if (!tmp.pid)
>> + return -EINVAL;
>> +
>> + tmp.total_usage = 0;
>> + tmp.internal_usage = 0;
>> + tmp.heap_usage = 0;
>> +
>> + list_for_each_entry(tmp_client, &xdna->client_list, node) {
>> + if (tmp.pid != tmp_client->pid)
>> + continue;
>> +
>> + mutex_lock(&tmp_client->mm_lock);
>> + tmp.total_usage += tmp_client->total_bo_usage;
>> + tmp.internal_usage += tmp_client->total_int_bo_usage;
>> + tmp.heap_usage += tmp_client->heap_usage;
>> + mutex_unlock(&tmp_client->mm_lock);
>> + }
>> +
>> + if (copy_to_user(buf, &tmp, min_sz))
>> + return -EFAULT;
>> +
>> + return 0;
>> +}
>> diff --git a/drivers/accel/amdxdna/amdxdna_gem.h
>> b/drivers/accel/amdxdna/amdxdna_gem.h
>> index a77d9344f8a4..4fc48a1189d2 100644
>> --- a/drivers/accel/amdxdna/amdxdna_gem.h
>> +++ b/drivers/accel/amdxdna/amdxdna_gem.h
>> @@ -41,8 +41,9 @@ struct amdxdna_gem_obj {
>> struct amdxdna_client *client;
>> u8 type;
>> bool pinned;
>> - struct mutex lock; /* Protects: pinned, mem.kva */
>> + struct mutex lock; /* Protects: pinned, mem.kva,
>> open_ref */
>> struct amdxdna_mem mem;
>> + int open_ref;
>> /* Below members are initialized when needed */
>> struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */
>> @@ -50,6 +51,9 @@ struct amdxdna_gem_obj {
>> u32 assigned_hwctx;
>> struct dma_buf *dma_buf;
>> struct dma_buf_attachment *attach;
>> +
>> + /* True, if BO is managed by XRT, not application */
>> + bool internal;
>> };
>> #define to_gobj(obj) (&(obj)->base.base)
>> @@ -98,5 +102,6 @@ void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo);
>> int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *filp);
>> int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void
>> *data, struct drm_file *filp);
>> int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *filp);
>> +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct
>> amdxdna_drm_get_array *args);
>> #endif /* _AMDXDNA_GEM_H_ */
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> index d83be00daf2b..b50a7d1f8a11 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> @@ -36,9 +36,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin");
>> * 0.5: Support getting telemetry data
>> * 0.6: Support preemption
>> * 0.7: Support getting power and utilization data
>> + * 0.8: Support BO usage query
>> */
>> #define AMDXDNA_DRIVER_MAJOR 0
>> -#define AMDXDNA_DRIVER_MINOR 7
>> +#define AMDXDNA_DRIVER_MINOR 8
>> /*
>> * Bind the driver base on (vendor_id, device_id) pair and later
>> use the
>> @@ -120,11 +121,12 @@ static void amdxdna_client_cleanup(struct
>> amdxdna_client *client)
>> amdxdna_hwctx_remove_all(client);
>> xa_destroy(&client->hwctx_xa);
>> cleanup_srcu_struct(&client->hwctx_srcu);
>> - mutex_destroy(&client->mm_lock);
>> if (client->dev_heap)
>> drm_gem_object_put(to_gobj(client->dev_heap));
>> + mutex_destroy(&client->mm_lock);
>> +
>> if (!IS_ERR_OR_NULL(client->sva))
>> iommu_sva_unbind_device(client->sva);
>> mmdrop(client->mm);
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> index e91d14ae5190..0661749917d6 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> @@ -138,6 +138,10 @@ struct amdxdna_client {
>> struct iommu_sva *sva;
>> int pasid;
>> struct mm_struct *mm;
>> +
>> + size_t heap_usage;
>> + size_t total_bo_usage;
>> + size_t total_int_bo_usage;
>> };
>> #define amdxdna_for_each_hwctx(client, hwctx_id, entry) \
>> diff --git a/include/uapi/drm/amdxdna_accel.h
>> b/include/uapi/drm/amdxdna_accel.h
>> index bddaaaf945cf..61d3686fa3b1 100644
>> --- a/include/uapi/drm/amdxdna_accel.h
>> +++ b/include/uapi/drm/amdxdna_accel.h
>> @@ -591,8 +591,37 @@ struct amdxdna_async_error {
>> __u64 ex_err_code;
>> };
>> +/**
>> + * struct amdxdna_drm_bo_usage - all types of BO usage
>> + * BOs managed by XRT/SHIM/driver is counted as internal.
>> + * Others are counted as external which are managed by applications.
>> + *
>> + * Among all types of BOs:
>> + * AMDXDNA_BO_DEV_HEAP - is counted for internal.
>> + * AMDXDNA_BO_SHARE - is counted for external.
>> + * AMDXDNA_BO_CMD - is counted for internal.
>> + * AMDXDNA_BO_DEV - is counted by heap_usage only, not internal
>> + * or external. It does not add to the total
>> memory
>> + * footprint since its mem comes from heap
>> which is
>> + * already counted as internal.
>> + */
>> +struct amdxdna_drm_bo_usage {
>> + /** @pid: The ID of the process to query from. */
>> + __s64 pid;
>> + /** @total_usage: Total BO size used by process. */
>> + __u64 total_usage;
>> + /** @internal_usage: Total internal BO size used by process. */
>> + __u64 internal_usage;
>> + /** @heap_usage: Total device BO size used by process. */
>> + __u64 heap_usage;
>> +};
>> +
>> +/*
>> + * Supported params in struct amdxdna_drm_get_array
>> + */
>> #define DRM_AMDXDNA_HW_CONTEXT_ALL 0
>> #define DRM_AMDXDNA_HW_LAST_ASYNC_ERR 2
>> +#define DRM_AMDXDNA_BO_USAGE 6
>> /**
>> * struct amdxdna_drm_get_array - Get information array.
>> @@ -605,6 +634,12 @@ struct amdxdna_drm_get_array {
>> *
>> * %DRM_AMDXDNA_HW_CONTEXT_ALL:
>> * Returns all created hardware contexts.
>> + *
>> + * %DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
>> + * Returns last async error.
>> + *
>> + * %DRM_AMDXDNA_BO_USAGE:
>> + * Returns usage of heap/internal/external BOs.
>> */
>> __u32 param;
>> /**
>
© 2016 - 2026 Red Hat, Inc.