drivers/accel/amdxdna/Makefile | 1 + drivers/accel/amdxdna/aie2_ctx.c | 42 ++++++++--- drivers/accel/amdxdna/aie2_message.c | 28 ++++---- drivers/accel/amdxdna/aie2_pci.c | 79 ++++++++++---------- drivers/accel/amdxdna/aie2_pci.h | 3 +- drivers/accel/amdxdna/aie2_smu.c | 28 ++++++-- drivers/accel/amdxdna/amdxdna_ctx.c | 54 +++++++------- drivers/accel/amdxdna/amdxdna_mailbox.c | 13 ++-- drivers/accel/amdxdna/amdxdna_pci_drv.c | 56 ++------------- drivers/accel/amdxdna/amdxdna_pci_drv.h | 2 + drivers/accel/amdxdna/amdxdna_pm.c | 96 +++++++++++++++++++++++++ drivers/accel/amdxdna/amdxdna_pm.h | 18 +++++ 12 files changed, 262 insertions(+), 158 deletions(-) create mode 100644 drivers/accel/amdxdna/amdxdna_pm.c create mode 100644 drivers/accel/amdxdna/amdxdna_pm.h
Currently, pm_runtime_resume_and_get() is invoked in the driver's open
callback, and pm_runtime_put_autosuspend() is called in the close
callback. As a result, the device remains active whenever an application
opens it, even if no I/O is performed, leading to unnecessary power
consumption.
Move the runtime PM calls to the AIE2 callbacks that actually interact
with the hardware. The device will automatically suspend after 5 seconds
of inactivity (no hardware accesses and no pending commands), and it will
be resumed on the next hardware access.
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/Makefile | 1 +
drivers/accel/amdxdna/aie2_ctx.c | 42 ++++++++---
drivers/accel/amdxdna/aie2_message.c | 28 ++++----
drivers/accel/amdxdna/aie2_pci.c | 79 ++++++++++----------
drivers/accel/amdxdna/aie2_pci.h | 3 +-
drivers/accel/amdxdna/aie2_smu.c | 28 ++++++--
drivers/accel/amdxdna/amdxdna_ctx.c | 54 +++++++-------
drivers/accel/amdxdna/amdxdna_mailbox.c | 13 ++--
drivers/accel/amdxdna/amdxdna_pci_drv.c | 56 ++-------------
drivers/accel/amdxdna/amdxdna_pci_drv.h | 2 +
drivers/accel/amdxdna/amdxdna_pm.c | 96 +++++++++++++++++++++++++
drivers/accel/amdxdna/amdxdna_pm.h | 18 +++++
12 files changed, 262 insertions(+), 158 deletions(-)
create mode 100644 drivers/accel/amdxdna/amdxdna_pm.c
create mode 100644 drivers/accel/amdxdna/amdxdna_pm.h
diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
index 6797dac65efa..6344aaf523fa 100644
--- a/drivers/accel/amdxdna/Makefile
+++ b/drivers/accel/amdxdna/Makefile
@@ -14,6 +14,7 @@ amdxdna-y := \
amdxdna_mailbox.o \
amdxdna_mailbox_helper.o \
amdxdna_pci_drv.o \
+ amdxdna_pm.o \
amdxdna_sysfs.o \
amdxdna_ubuf.o \
npu1_regs.o \
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index e9f9b1fa5dc1..691fdb3b008f 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -21,6 +21,7 @@
#include "amdxdna_gem.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
static bool force_cmdlist;
module_param(force_cmdlist, bool, 0600);
@@ -88,7 +89,7 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw
goto out;
}
- ret = aie2_config_cu(hwctx);
+ ret = aie2_config_cu(hwctx, NULL);
if (ret) {
XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
goto out;
@@ -167,14 +168,11 @@ static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
int aie2_hwctx_resume(struct amdxdna_client *client)
{
- struct amdxdna_dev *xdna = client->xdna;
-
/*
* The resume path cannot guarantee that mailbox channel can be
* regenerated. If this happen, when submit message to this
* mailbox channel, error will return.
*/
- drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
}
@@ -184,6 +182,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
struct dma_fence *fence = job->fence;
trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
+
+ amdxdna_pm_suspend_put(job->hwctx->client->xdna);
job->hwctx->priv->completed++;
dma_fence_signal(fence);
@@ -531,7 +531,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
.num_rqs = DRM_SCHED_PRIORITY_COUNT,
.credit_limit = HWCTX_MAX_CMDS,
.timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
- .name = hwctx->name,
+ .name = "amdxdna_js",
.dev = xdna->ddev.dev,
};
struct drm_gpu_scheduler *sched;
@@ -697,6 +697,14 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
kfree(hwctx->cus);
}
+static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size)
+{
+ struct amdxdna_hwctx *hwctx = handle;
+
+ amdxdna_pm_suspend_put(hwctx->client->xdna);
+ return 0;
+}
+
static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
{
struct amdxdna_hwctx_param_config_cu *config = buf;
@@ -728,10 +736,14 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
if (!hwctx->cus)
return -ENOMEM;
- ret = aie2_config_cu(hwctx);
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto free_cus;
+
+ ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
if (ret) {
XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
- goto free_cus;
+ goto pm_suspend_put;
}
wmb(); /* To avoid locking in command submit when check status */
@@ -739,6 +751,8 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
return 0;
+pm_suspend_put:
+ amdxdna_pm_suspend_put(xdna);
free_cus:
kfree(hwctx->cus);
hwctx->cus = NULL;
@@ -862,11 +876,15 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
goto free_chain;
}
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto cleanup_job;
+
retry:
ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
if (ret) {
XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
- goto cleanup_job;
+ goto suspend_put;
}
for (i = 0; i < job->bo_cnt; i++) {
@@ -874,7 +892,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
if (ret) {
XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
- goto cleanup_job;
+ goto suspend_put;
}
}
@@ -889,12 +907,12 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
} else if (time_after(jiffies, timeout)) {
ret = -ETIME;
- goto cleanup_job;
+ goto suspend_put;
}
ret = aie2_populate_range(abo);
if (ret)
- goto cleanup_job;
+ goto suspend_put;
goto retry;
}
}
@@ -920,6 +938,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
return 0;
+suspend_put:
+ amdxdna_pm_suspend_put(xdna);
cleanup_job:
drm_sched_job_cleanup(&job->base);
free_chain:
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 9caad083543d..4660e8297ed8 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -37,7 +37,7 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
if (!ndev->mgmt_chann)
return -ENODEV;
- drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna->dev_lock));
ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
if (ret == -ETIME) {
xdna_mailbox_stop_channel(ndev->mgmt_chann);
@@ -377,15 +377,17 @@ int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr,
return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
}
-int aie2_config_cu(struct amdxdna_hwctx *hwctx)
+int aie2_config_cu(struct amdxdna_hwctx *hwctx,
+ int (*notify_cb)(void *, void __iomem *, size_t))
{
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
struct amdxdna_dev *xdna = hwctx->client->xdna;
u32 shift = xdna->dev_info->dev_mem_buf_shift;
- DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
+ struct config_cu_req req = { 0 };
+ struct xdna_mailbox_msg msg;
struct drm_gem_object *gobj;
struct amdxdna_gem_obj *abo;
- int ret, i;
+ int i;
if (!chann)
return -ENODEV;
@@ -423,18 +425,12 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx)
}
req.num_cus = hwctx->cus->num_cus;
- ret = xdna_send_msg_wait(xdna, chann, &msg);
- if (ret == -ETIME)
- aie2_destroy_context(xdna->dev_handle, hwctx);
-
- if (resp.status == AIE2_STATUS_SUCCESS) {
- XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret);
- return 0;
- }
-
- XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d",
- msg.opcode, resp.status, ret);
- return ret;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ msg.handle = hwctx;
+ msg.opcode = MSG_OP_CONFIG_CU;
+ msg.notify_cb = notify_cb;
+ return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
}
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 6e39c769bb6d..f46a3dcd0580 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -25,6 +25,7 @@
#include "amdxdna_gem.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
static int aie2_max_col = XRS_MAX_COL;
module_param(aie2_max_col, uint, 0600);
@@ -223,15 +224,6 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
return ret;
}
- if (!ndev->async_events)
- return 0;
-
- ret = aie2_error_async_events_send(ndev);
- if (ret) {
- XDNA_ERR(ndev->xdna, "Send async events failed");
- return ret;
- }
-
return 0;
}
@@ -257,6 +249,8 @@ static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
return ret;
}
+ ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
+
return 0;
}
@@ -338,6 +332,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
ndev->mbox = NULL;
aie2_psp_stop(ndev->psp_hdl);
aie2_smu_fini(ndev);
+ aie2_error_async_events_free(ndev);
pci_disable_device(pdev);
ndev->dev_status = AIE2_DEV_INIT;
@@ -424,6 +419,18 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
goto destroy_mgmt_chann;
}
+ ret = aie2_mgmt_fw_query(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
+ ret = aie2_error_async_events_alloc(ndev);
+ if (ret) {
+ XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
+ goto destroy_mgmt_chann;
+ }
+
ndev->dev_status = AIE2_DEV_START;
return 0;
@@ -459,7 +466,6 @@ static int aie2_hw_resume(struct amdxdna_dev *xdna)
struct amdxdna_client *client;
int ret;
- guard(mutex)(&xdna->dev_lock);
ret = aie2_hw_start(xdna);
if (ret) {
XDNA_ERR(xdna, "Start hardware failed, %d", ret);
@@ -565,13 +571,6 @@ static int aie2_init(struct amdxdna_dev *xdna)
goto release_fw;
}
- ret = aie2_mgmt_fw_query(ndev);
- if (ret) {
- XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
- goto stop_hw;
- }
- ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
-
xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
@@ -587,30 +586,10 @@ static int aie2_init(struct amdxdna_dev *xdna)
goto stop_hw;
}
- ret = aie2_error_async_events_alloc(ndev);
- if (ret) {
- XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
- goto stop_hw;
- }
-
- ret = aie2_error_async_events_send(ndev);
- if (ret) {
- XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
- goto async_event_free;
- }
-
- /* Issue a command to make sure firmware handled async events */
- ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
- if (ret) {
- XDNA_ERR(xdna, "Re-query firmware version failed");
- goto async_event_free;
- }
-
release_firmware(fw);
+ amdxdna_rpm_init(xdna);
return 0;
-async_event_free:
- aie2_error_async_events_free(ndev);
stop_hw:
aie2_hw_stop(xdna);
release_fw:
@@ -621,10 +600,8 @@ static int aie2_init(struct amdxdna_dev *xdna)
static void aie2_fini(struct amdxdna_dev *xdna)
{
- struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
-
+ amdxdna_rpm_fini(xdna);
aie2_hw_stop(xdna);
- aie2_error_async_events_free(ndev);
}
static int aie2_get_aie_status(struct amdxdna_client *client,
@@ -856,6 +833,10 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto dev_exit;
+
switch (args->param) {
case DRM_AMDXDNA_QUERY_AIE_STATUS:
ret = aie2_get_aie_status(client, args);
@@ -882,8 +863,11 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
}
+
+ amdxdna_pm_suspend_put(xdna);
XDNA_DBG(xdna, "Got param %d", args->param);
+dev_exit:
drm_dev_exit(idx);
return ret;
}
@@ -932,6 +916,10 @@ static int aie2_get_array(struct amdxdna_client *client,
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto dev_exit;
+
switch (args->param) {
case DRM_AMDXDNA_HW_CONTEXT_ALL:
ret = aie2_query_ctx_status_array(client, args);
@@ -940,8 +928,11 @@ static int aie2_get_array(struct amdxdna_client *client,
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
}
+
+ amdxdna_pm_suspend_put(xdna);
XDNA_DBG(xdna, "Got param %d", args->param);
+dev_exit:
drm_dev_exit(idx);
return ret;
}
@@ -980,6 +971,10 @@ static int aie2_set_state(struct amdxdna_client *client,
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
+ ret = amdxdna_pm_resume_get(xdna);
+ if (ret)
+ goto dev_exit;
+
switch (args->param) {
case DRM_AMDXDNA_SET_POWER_MODE:
ret = aie2_set_power_mode(client, args);
@@ -990,6 +985,8 @@ static int aie2_set_state(struct amdxdna_client *client,
break;
}
+ amdxdna_pm_suspend_put(xdna);
+dev_exit:
drm_dev_exit(idx);
return ret;
}
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 91a8e948f82a..289a23ecd5f1 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -272,7 +272,8 @@ int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u6
int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
void *handle, int (*cb)(void*, void __iomem *, size_t));
-int aie2_config_cu(struct amdxdna_hwctx *hwctx);
+int aie2_config_cu(struct amdxdna_hwctx *hwctx,
+ int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int (*notify_cb)(void *, void __iomem *, size_t));
int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
index d303701b0ded..7f292a615ed8 100644
--- a/drivers/accel/amdxdna/aie2_smu.c
+++ b/drivers/accel/amdxdna/aie2_smu.c
@@ -11,6 +11,7 @@
#include "aie2_pci.h"
#include "amdxdna_pci_drv.h"
+#include "amdxdna_pm.h"
#define SMU_RESULT_OK 1
@@ -59,12 +60,16 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
u32 freq;
int ret;
+ ret = amdxdna_pm_resume_get(ndev->xdna);
+ if (ret)
+ return ret;
+
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
if (ret) {
XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
- return ret;
+ goto suspend_put;
}
ndev->npuclk_freq = freq;
@@ -73,8 +78,10 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
if (ret) {
XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
- return ret;
+ goto suspend_put;
}
+
+ amdxdna_pm_suspend_put(ndev->xdna);
ndev->hclk_freq = freq;
ndev->dpm_level = dpm_level;
@@ -82,26 +89,35 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
ndev->npuclk_freq, ndev->hclk_freq);
return 0;
+
+suspend_put:
+ amdxdna_pm_suspend_put(ndev->xdna);
+ return ret;
}
int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
{
int ret;
+ ret = amdxdna_pm_resume_get(ndev->xdna);
+ if (ret)
+ return ret;
+
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
if (ret) {
XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
dpm_level, ret);
- return ret;
+ goto suspend_put;
}
ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
if (ret) {
XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
dpm_level, ret);
- return ret;
+ goto suspend_put;
}
+ amdxdna_pm_suspend_put(ndev->xdna);
ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
ndev->dpm_level = dpm_level;
@@ -110,6 +126,10 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
ndev->npuclk_freq, ndev->hclk_freq);
return 0;
+
+suspend_put:
+ amdxdna_pm_suspend_put(ndev->xdna);
+ return ret;
}
int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 4bfe4ef20550..c3300eff7460 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -161,14 +161,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
if (args->ext || args->ext_flags)
return -EINVAL;
- if (!drm_dev_enter(dev, &idx))
- return -ENODEV;
-
hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
- if (!hwctx) {
- ret = -ENOMEM;
- goto exit;
- }
+ if (!hwctx)
+ return -ENOMEM;
if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) {
XDNA_ERR(xdna, "Access QoS info failed");
@@ -181,30 +176,36 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
hwctx->num_tiles = args->num_tiles;
hwctx->mem_size = args->mem_size;
hwctx->max_opc = args->max_opc;
- ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
- XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
- &client->next_hwctxid, GFP_KERNEL);
- if (ret < 0) {
- XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
+
+ guard(mutex)(&xdna->dev_lock);
+
+ if (!drm_dev_enter(dev, &idx)) {
+ ret = -ENODEV;
goto free_hwctx;
}
- hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id);
+ ret = xdna->dev_info->ops->hwctx_init(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
+ goto dev_exit;
+ }
+
+ hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->fw_ctx_id);
if (!hwctx->name) {
ret = -ENOMEM;
- goto rm_id;
+ goto fini_hwctx;
}
- mutex_lock(&xdna->dev_lock);
- ret = xdna->dev_info->ops->hwctx_init(hwctx);
- if (ret) {
- mutex_unlock(&xdna->dev_lock);
- XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
+ ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
+ XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
+ &client->next_hwctxid, GFP_KERNEL);
+ if (ret < 0) {
+ XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
goto free_name;
}
+
args->handle = hwctx->id;
args->syncobj_handle = hwctx->syncobj_hdl;
- mutex_unlock(&xdna->dev_lock);
atomic64_set(&hwctx->job_submit_cnt, 0);
atomic64_set(&hwctx->job_free_cnt, 0);
@@ -214,12 +215,12 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
free_name:
kfree(hwctx->name);
-rm_id:
- xa_erase(&client->hwctx_xa, hwctx->id);
+fini_hwctx:
+ xdna->dev_info->ops->hwctx_fini(hwctx);
+dev_exit:
+ drm_dev_exit(idx);
free_hwctx:
kfree(hwctx);
-exit:
- drm_dev_exit(idx);
return ret;
}
@@ -431,11 +432,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
goto unlock_srcu;
}
- if (hwctx->status != HWCTX_STAT_READY) {
- XDNA_ERR(xdna, "HW Context is not ready");
- ret = -EINVAL;
- goto unlock_srcu;
- }
job->hwctx = hwctx;
job->mm = current->mm;
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
index da1ac89bb78f..24258dcc18eb 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
@@ -194,7 +194,8 @@ static void mailbox_release_msg(struct mailbox_channel *mb_chann,
{
MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
- mb_msg->notify_cb(mb_msg->handle, NULL, 0);
+ if (mb_msg->notify_cb)
+ mb_msg->notify_cb(mb_msg->handle, NULL, 0);
kfree(mb_msg);
}
@@ -248,7 +249,7 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade
{
struct mailbox_msg *mb_msg;
int msg_id;
- int ret;
+ int ret = 0;
msg_id = header->id;
if (!mailbox_validate_msgid(msg_id)) {
@@ -265,9 +266,11 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade
MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
header->opcode, header->total_size, header->id);
- ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
- if (unlikely(ret))
- MB_ERR(mb_chann, "Message callback ret %d", ret);
+ if (mb_msg->notify_cb) {
+ ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
+ if (unlikely(ret))
+ MB_ERR(mb_chann, "Message callback ret %d", ret);
+ }
kfree(mb_msg);
return ret;
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 569cd703729d..aa04452310e5 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -13,13 +13,11 @@
#include <drm/gpu_scheduler.h>
#include <linux/iommu.h>
#include <linux/pci.h>
-#include <linux/pm_runtime.h>
#include "amdxdna_ctx.h"
#include "amdxdna_gem.h"
#include "amdxdna_pci_drv.h"
-
-#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
+#include "amdxdna_pm.h"
MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
@@ -61,17 +59,9 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
struct amdxdna_client *client;
int ret;
- ret = pm_runtime_resume_and_get(ddev->dev);
- if (ret) {
- XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret);
- return ret;
- }
-
client = kzalloc(sizeof(*client), GFP_KERNEL);
- if (!client) {
- ret = -ENOMEM;
- goto put_rpm;
- }
+ if (!client)
+ return -ENOMEM;
client->pid = pid_nr(rcu_access_pointer(filp->pid));
client->xdna = xdna;
@@ -106,9 +96,6 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
iommu_sva_unbind_device(client->sva);
failed:
kfree(client);
-put_rpm:
- pm_runtime_mark_last_busy(ddev->dev);
- pm_runtime_put_autosuspend(ddev->dev);
return ret;
}
@@ -130,8 +117,6 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
XDNA_DBG(xdna, "pid %d closed", client->pid);
kfree(client);
- pm_runtime_mark_last_busy(ddev->dev);
- pm_runtime_put_autosuspend(ddev->dev);
}
static int amdxdna_flush(struct file *f, fl_owner_t id)
@@ -310,19 +295,12 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto failed_dev_fini;
}
- pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
- pm_runtime_use_autosuspend(dev);
- pm_runtime_allow(dev);
-
ret = drm_dev_register(&xdna->ddev, 0);
if (ret) {
XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
- pm_runtime_forbid(dev);
goto failed_sysfs_fini;
}
- pm_runtime_mark_last_busy(dev);
- pm_runtime_put_autosuspend(dev);
return 0;
failed_sysfs_fini:
@@ -339,14 +317,10 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
static void amdxdna_remove(struct pci_dev *pdev)
{
struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
- struct device *dev = &pdev->dev;
struct amdxdna_client *client;
destroy_workqueue(xdna->notifier_wq);
- pm_runtime_get_noresume(dev);
- pm_runtime_forbid(dev);
-
drm_dev_unplug(&xdna->ddev);
amdxdna_sysfs_fini(xdna);
@@ -365,29 +339,9 @@ static void amdxdna_remove(struct pci_dev *pdev)
mutex_unlock(&xdna->dev_lock);
}
-static int amdxdna_pmops_suspend(struct device *dev)
-{
- struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
-
- if (!xdna->dev_info->ops->suspend)
- return -EOPNOTSUPP;
-
- return xdna->dev_info->ops->suspend(xdna);
-}
-
-static int amdxdna_pmops_resume(struct device *dev)
-{
- struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
-
- if (!xdna->dev_info->ops->resume)
- return -EOPNOTSUPP;
-
- return xdna->dev_info->ops->resume(xdna);
-}
-
static const struct dev_pm_ops amdxdna_pm_ops = {
- SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
- RUNTIME_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume, NULL)
+ SYSTEM_SLEEP_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume)
+ RUNTIME_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume, NULL)
};
static struct pci_driver amdxdna_pci_driver = {
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index 72d6696d49da..626beebf730e 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -6,6 +6,7 @@
#ifndef _AMDXDNA_PCI_DRV_H_
#define _AMDXDNA_PCI_DRV_H_
+#include <drm/drm_print.h>
#include <linux/workqueue.h>
#include <linux/xarray.h>
@@ -99,6 +100,7 @@ struct amdxdna_dev {
struct amdxdna_fw_ver fw_ver;
struct rw_semaphore notifier_lock; /* for mmu notifier*/
struct workqueue_struct *notifier_wq;
+ bool rpm_on;
};
/*
diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c
new file mode 100644
index 000000000000..fad14f60b99b
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pm.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_drv.h>
+#include <linux/pm_runtime.h>
+
+#include "amdxdna_pm.h"
+
+#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
+
+int amdxdna_pm_suspend(struct device *dev)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
+ int ret = -EOPNOTSUPP;
+ bool rpm;
+
+ if (xdna->dev_info->ops->suspend) {
+ rpm = xdna->rpm_on;
+ xdna->rpm_on = false;
+ ret = xdna->dev_info->ops->suspend(xdna);
+ xdna->rpm_on = rpm;
+ }
+
+ XDNA_DBG(xdna, "Suspend done ret %d", ret);
+ return ret;
+}
+
+int amdxdna_pm_resume(struct device *dev)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
+ int ret = -EOPNOTSUPP;
+ bool rpm;
+
+ if (xdna->dev_info->ops->resume) {
+ rpm = xdna->rpm_on;
+ xdna->rpm_on = false;
+ ret = xdna->dev_info->ops->resume(xdna);
+ xdna->rpm_on = rpm;
+ }
+
+ XDNA_DBG(xdna, "Resume done ret %d", ret);
+ return ret;
+}
+
+int amdxdna_pm_resume_get(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+ int ret;
+
+ if (!xdna->rpm_on)
+ return 0;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret) {
+ XDNA_ERR(xdna, "Resume failed: %d", ret);
+ pm_runtime_set_suspended(dev);
+ }
+
+ return ret;
+}
+
+void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+
+ if (!xdna->rpm_on)
+ return;
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+}
+
+void amdxdna_rpm_init(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+
+ pm_runtime_set_active(dev);
+ pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_allow(dev);
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+ xdna->rpm_on = true;
+}
+
+void amdxdna_rpm_fini(struct amdxdna_dev *xdna)
+{
+ struct device *dev = xdna->ddev.dev;
+
+ xdna->rpm_on = false;
+ pm_runtime_get_noresume(dev);
+ pm_runtime_forbid(dev);
+}
diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/amdxdna/amdxdna_pm.h
new file mode 100644
index 000000000000..11c295b6d64a
--- /dev/null
+++ b/drivers/accel/amdxdna/amdxdna_pm.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#ifndef _AMDXDNA_PM_H_
+#define _AMDXDNA_PM_H_
+
+#include "amdxdna_pci_drv.h"
+
+int amdxdna_pm_suspend(struct device *dev);
+int amdxdna_pm_resume(struct device *dev);
+int amdxdna_pm_resume_get(struct amdxdna_dev *xdna);
+void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna);
+void amdxdna_rpm_init(struct amdxdna_dev *xdna);
+void amdxdna_rpm_fini(struct amdxdna_dev *xdna);
+
+#endif /* _AMDXDNA_PM_H_ */
--
2.34.1
On 9/18/2025 12:24 PM, Lizhi Hou wrote:
> Currently, pm_runtime_resume_and_get() is invoked in the driver's open
> callback, and pm_runtime_put_autosuspend() is called in the close
> callback. As a result, the device remains active whenever an application
> opens it, even if no I/O is performed, leading to unnecessary power
> consumption.
>
> Move the runtime PM calls to the AIE2 callbacks that actually interact
> with the hardware. The device will automatically suspend after 5 seconds
> of inactivity (no hardware accesses and no pending commands), and it will
> be resumed on the next hardware access.
>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
> drivers/accel/amdxdna/Makefile | 1 +
> drivers/accel/amdxdna/aie2_ctx.c | 42 ++++++++---
> drivers/accel/amdxdna/aie2_message.c | 28 ++++----
> drivers/accel/amdxdna/aie2_pci.c | 79 ++++++++++----------
> drivers/accel/amdxdna/aie2_pci.h | 3 +-
> drivers/accel/amdxdna/aie2_smu.c | 28 ++++++--
> drivers/accel/amdxdna/amdxdna_ctx.c | 54 +++++++-------
> drivers/accel/amdxdna/amdxdna_mailbox.c | 13 ++--
> drivers/accel/amdxdna/amdxdna_pci_drv.c | 56 ++-------------
> drivers/accel/amdxdna/amdxdna_pci_drv.h | 2 +
> drivers/accel/amdxdna/amdxdna_pm.c | 96 +++++++++++++++++++++++++
> drivers/accel/amdxdna/amdxdna_pm.h | 18 +++++
> 12 files changed, 262 insertions(+), 158 deletions(-)
> create mode 100644 drivers/accel/amdxdna/amdxdna_pm.c
> create mode 100644 drivers/accel/amdxdna/amdxdna_pm.h
>
> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
> index 6797dac65efa..6344aaf523fa 100644
> --- a/drivers/accel/amdxdna/Makefile
> +++ b/drivers/accel/amdxdna/Makefile
> @@ -14,6 +14,7 @@ amdxdna-y := \
> amdxdna_mailbox.o \
> amdxdna_mailbox_helper.o \
> amdxdna_pci_drv.o \
> + amdxdna_pm.o \
> amdxdna_sysfs.o \
> amdxdna_ubuf.o \
> npu1_regs.o \
> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
> index e9f9b1fa5dc1..691fdb3b008f 100644
> --- a/drivers/accel/amdxdna/aie2_ctx.c
> +++ b/drivers/accel/amdxdna/aie2_ctx.c
> @@ -21,6 +21,7 @@
> #include "amdxdna_gem.h"
> #include "amdxdna_mailbox.h"
> #include "amdxdna_pci_drv.h"
> +#include "amdxdna_pm.h"
>
> static bool force_cmdlist;
> module_param(force_cmdlist, bool, 0600);
> @@ -88,7 +89,7 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw
> goto out;
> }
>
> - ret = aie2_config_cu(hwctx);
> + ret = aie2_config_cu(hwctx, NULL);
> if (ret) {
> XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
> goto out;
> @@ -167,14 +168,11 @@ static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
>
> int aie2_hwctx_resume(struct amdxdna_client *client)
> {
> - struct amdxdna_dev *xdna = client->xdna;
> -
> /*
> * The resume path cannot guarantee that mailbox channel can be
> * regenerated. If this happen, when submit message to this
> * mailbox channel, error will return.
> */
> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
> return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
> }
>
> @@ -184,6 +182,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
> struct dma_fence *fence = job->fence;
>
> trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
> +
> + amdxdna_pm_suspend_put(job->hwctx->client->xdna);
> job->hwctx->priv->completed++;
> dma_fence_signal(fence);
>
> @@ -531,7 +531,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
> .num_rqs = DRM_SCHED_PRIORITY_COUNT,
> .credit_limit = HWCTX_MAX_CMDS,
> .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
> - .name = hwctx->name,
> + .name = "amdxdna_js",
> .dev = xdna->ddev.dev,
> };
> struct drm_gpu_scheduler *sched;
> @@ -697,6 +697,14 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
> kfree(hwctx->cus);
> }
>
> +static int aie2_config_cu_resp_handler(void *handle, void __iomem *data, size_t size)
> +{
> + struct amdxdna_hwctx *hwctx = handle;
> +
> + amdxdna_pm_suspend_put(hwctx->client->xdna);
> + return 0;
> +}
> +
> static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
> {
> struct amdxdna_hwctx_param_config_cu *config = buf;
> @@ -728,10 +736,14 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
> if (!hwctx->cus)
> return -ENOMEM;
>
> - ret = aie2_config_cu(hwctx);
> + ret = amdxdna_pm_resume_get(xdna);
> + if (ret)
> + goto free_cus;
> +
> + ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
> if (ret) {
> XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
> - goto free_cus;
> + goto pm_suspend_put;
> }
>
> wmb(); /* To avoid locking in command submit when check status */
> @@ -739,6 +751,8 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
>
> return 0;
>
> +pm_suspend_put:
> + amdxdna_pm_suspend_put(xdna);
> free_cus:
> kfree(hwctx->cus);
> hwctx->cus = NULL;
> @@ -862,11 +876,15 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
> goto free_chain;
> }
>
> + ret = amdxdna_pm_resume_get(xdna);
> + if (ret)
> + goto cleanup_job;
> +
> retry:
> ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
> if (ret) {
> XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
> - goto cleanup_job;
> + goto suspend_put;
> }
>
> for (i = 0; i < job->bo_cnt; i++) {
> @@ -874,7 +892,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
> if (ret) {
> XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
> drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
> - goto cleanup_job;
> + goto suspend_put;
> }
> }
>
> @@ -889,12 +907,12 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
> msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
> } else if (time_after(jiffies, timeout)) {
> ret = -ETIME;
> - goto cleanup_job;
> + goto suspend_put;
> }
>
> ret = aie2_populate_range(abo);
> if (ret)
> - goto cleanup_job;
> + goto suspend_put;
> goto retry;
> }
> }
> @@ -920,6 +938,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
>
> return 0;
>
> +suspend_put:
> + amdxdna_pm_suspend_put(xdna);
> cleanup_job:
> drm_sched_job_cleanup(&job->base);
> free_chain:
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index 9caad083543d..4660e8297ed8 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -37,7 +37,7 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
> if (!ndev->mgmt_chann)
> return -ENODEV;
>
> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
> + drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna->dev_lock));
> ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
> if (ret == -ETIME) {
> xdna_mailbox_stop_channel(ndev->mgmt_chann);
> @@ -377,15 +377,17 @@ int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr,
> return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
> }
>
> -int aie2_config_cu(struct amdxdna_hwctx *hwctx)
> +int aie2_config_cu(struct amdxdna_hwctx *hwctx,
> + int (*notify_cb)(void *, void __iomem *, size_t))
> {
> struct mailbox_channel *chann = hwctx->priv->mbox_chann;
> struct amdxdna_dev *xdna = hwctx->client->xdna;
> u32 shift = xdna->dev_info->dev_mem_buf_shift;
> - DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
> + struct config_cu_req req = { 0 };
> + struct xdna_mailbox_msg msg;
> struct drm_gem_object *gobj;
> struct amdxdna_gem_obj *abo;
> - int ret, i;
> + int i;
>
> if (!chann)
> return -ENODEV;
> @@ -423,18 +425,12 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx)
> }
> req.num_cus = hwctx->cus->num_cus;
>
> - ret = xdna_send_msg_wait(xdna, chann, &msg);
> - if (ret == -ETIME)
> - aie2_destroy_context(xdna->dev_handle, hwctx);
> -
> - if (resp.status == AIE2_STATUS_SUCCESS) {
> - XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret);
> - return 0;
> - }
> -
> - XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d",
> - msg.opcode, resp.status, ret);
> - return ret;
> + msg.send_data = (u8 *)&req;
> + msg.send_size = sizeof(req);
> + msg.handle = hwctx;
> + msg.opcode = MSG_OP_CONFIG_CU;
> + msg.notify_cb = notify_cb;
> + return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
> }
>
> int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 6e39c769bb6d..f46a3dcd0580 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -25,6 +25,7 @@
> #include "amdxdna_gem.h"
> #include "amdxdna_mailbox.h"
> #include "amdxdna_pci_drv.h"
> +#include "amdxdna_pm.h"
>
> static int aie2_max_col = XRS_MAX_COL;
> module_param(aie2_max_col, uint, 0600);
> @@ -223,15 +224,6 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
> return ret;
> }
>
> - if (!ndev->async_events)
> - return 0;
> -
> - ret = aie2_error_async_events_send(ndev);
> - if (ret) {
> - XDNA_ERR(ndev->xdna, "Send async events failed");
> - return ret;
> - }
> -
> return 0;
> }
>
> @@ -257,6 +249,8 @@ static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
> return ret;
> }
>
> + ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
> +
> return 0;
> }
>
> @@ -338,6 +332,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
> ndev->mbox = NULL;
> aie2_psp_stop(ndev->psp_hdl);
> aie2_smu_fini(ndev);
> + aie2_error_async_events_free(ndev);
> pci_disable_device(pdev);
>
> ndev->dev_status = AIE2_DEV_INIT;
> @@ -424,6 +419,18 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
> goto destroy_mgmt_chann;
> }
>
> + ret = aie2_mgmt_fw_query(ndev);
> + if (ret) {
> + XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
> + goto destroy_mgmt_chann;
> + }
> +
> + ret = aie2_error_async_events_alloc(ndev);
> + if (ret) {
> + XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
> + goto destroy_mgmt_chann;
> + }
> +
> ndev->dev_status = AIE2_DEV_START;
>
> return 0;
> @@ -459,7 +466,6 @@ static int aie2_hw_resume(struct amdxdna_dev *xdna)
> struct amdxdna_client *client;
> int ret;
>
> - guard(mutex)(&xdna->dev_lock);
> ret = aie2_hw_start(xdna);
> if (ret) {
> XDNA_ERR(xdna, "Start hardware failed, %d", ret);
> @@ -565,13 +571,6 @@ static int aie2_init(struct amdxdna_dev *xdna)
> goto release_fw;
> }
>
> - ret = aie2_mgmt_fw_query(ndev);
> - if (ret) {
> - XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
> - goto stop_hw;
> - }
> - ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
> -
> xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
> for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
> xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
> @@ -587,30 +586,10 @@ static int aie2_init(struct amdxdna_dev *xdna)
> goto stop_hw;
> }
>
> - ret = aie2_error_async_events_alloc(ndev);
> - if (ret) {
> - XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
> - goto stop_hw;
> - }
> -
> - ret = aie2_error_async_events_send(ndev);
> - if (ret) {
> - XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
> - goto async_event_free;
> - }
> -
> - /* Issue a command to make sure firmware handled async events */
> - ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
> - if (ret) {
> - XDNA_ERR(xdna, "Re-query firmware version failed");
> - goto async_event_free;
> - }
> -
> release_firmware(fw);
> + amdxdna_rpm_init(xdna);
> return 0;
>
> -async_event_free:
> - aie2_error_async_events_free(ndev);
> stop_hw:
> aie2_hw_stop(xdna);
> release_fw:
> @@ -621,10 +600,8 @@ static int aie2_init(struct amdxdna_dev *xdna)
>
> static void aie2_fini(struct amdxdna_dev *xdna)
> {
> - struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
> -
> + amdxdna_rpm_fini(xdna);
> aie2_hw_stop(xdna);
> - aie2_error_async_events_free(ndev);
> }
>
> static int aie2_get_aie_status(struct amdxdna_client *client,
> @@ -856,6 +833,10 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
> if (!drm_dev_enter(&xdna->ddev, &idx))
> return -ENODEV;
>
> + ret = amdxdna_pm_resume_get(xdna);
> + if (ret)
> + goto dev_exit;
> +
> switch (args->param) {
> case DRM_AMDXDNA_QUERY_AIE_STATUS:
> ret = aie2_get_aie_status(client, args);
> @@ -882,8 +863,11 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
> XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
> ret = -EOPNOTSUPP;
> }
> +
> + amdxdna_pm_suspend_put(xdna);
> XDNA_DBG(xdna, "Got param %d", args->param);
>
> +dev_exit:
> drm_dev_exit(idx);
> return ret;
> }
> @@ -932,6 +916,10 @@ static int aie2_get_array(struct amdxdna_client *client,
> if (!drm_dev_enter(&xdna->ddev, &idx))
> return -ENODEV;
>
> + ret = amdxdna_pm_resume_get(xdna);
> + if (ret)
> + goto dev_exit;
> +
> switch (args->param) {
> case DRM_AMDXDNA_HW_CONTEXT_ALL:
> ret = aie2_query_ctx_status_array(client, args);
> @@ -940,8 +928,11 @@ static int aie2_get_array(struct amdxdna_client *client,
> XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
> ret = -EOPNOTSUPP;
> }
> +
> + amdxdna_pm_suspend_put(xdna);
> XDNA_DBG(xdna, "Got param %d", args->param);
>
> +dev_exit:
> drm_dev_exit(idx);
> return ret;
> }
> @@ -980,6 +971,10 @@ static int aie2_set_state(struct amdxdna_client *client,
> if (!drm_dev_enter(&xdna->ddev, &idx))
> return -ENODEV;
>
> + ret = amdxdna_pm_resume_get(xdna);
> + if (ret)
> + goto dev_exit;
> +
> switch (args->param) {
> case DRM_AMDXDNA_SET_POWER_MODE:
> ret = aie2_set_power_mode(client, args);
> @@ -990,6 +985,8 @@ static int aie2_set_state(struct amdxdna_client *client,
> break;
> }
>
> + amdxdna_pm_suspend_put(xdna);
> +dev_exit:
> drm_dev_exit(idx);
> return ret;
> }
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index 91a8e948f82a..289a23ecd5f1 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -272,7 +272,8 @@ int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u6
> int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
> int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
> void *handle, int (*cb)(void*, void __iomem *, size_t));
> -int aie2_config_cu(struct amdxdna_hwctx *hwctx);
> +int aie2_config_cu(struct amdxdna_hwctx *hwctx,
> + int (*notify_cb)(void *, void __iomem *, size_t));
> int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
> int (*notify_cb)(void *, void __iomem *, size_t));
> int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
> diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
> index d303701b0ded..7f292a615ed8 100644
> --- a/drivers/accel/amdxdna/aie2_smu.c
> +++ b/drivers/accel/amdxdna/aie2_smu.c
> @@ -11,6 +11,7 @@
>
> #include "aie2_pci.h"
> #include "amdxdna_pci_drv.h"
> +#include "amdxdna_pm.h"
>
> #define SMU_RESULT_OK 1
>
> @@ -59,12 +60,16 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
> u32 freq;
> int ret;
>
> + ret = amdxdna_pm_resume_get(ndev->xdna);
> + if (ret)
> + return ret;
> +
> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
> if (ret) {
> XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
> - return ret;
> + goto suspend_put;
> }
> ndev->npuclk_freq = freq;
>
> @@ -73,8 +78,10 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
> if (ret) {
> XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
> ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
> - return ret;
> + goto suspend_put;
> }
> +
> + amdxdna_pm_suspend_put(ndev->xdna);
> ndev->hclk_freq = freq;
> ndev->dpm_level = dpm_level;
>
> @@ -82,26 +89,35 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
> ndev->npuclk_freq, ndev->hclk_freq);
>
> return 0;
> +
> +suspend_put:
> + amdxdna_pm_suspend_put(ndev->xdna);
> + return ret;
> }
>
> int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
> {
> int ret;
>
> + ret = amdxdna_pm_resume_get(ndev->xdna);
> + if (ret)
> + return ret;
> +
> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
> if (ret) {
> XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
> dpm_level, ret);
> - return ret;
> + goto suspend_put;
> }
>
> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
> if (ret) {
> XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
> dpm_level, ret);
> - return ret;
> + goto suspend_put;
> }
>
> + amdxdna_pm_suspend_put(ndev->xdna);
> ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
> ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
> ndev->dpm_level = dpm_level;
> @@ -110,6 +126,10 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
> ndev->npuclk_freq, ndev->hclk_freq);
>
> return 0;
> +
> +suspend_put:
> + amdxdna_pm_suspend_put(ndev->xdna);
> + return ret;
> }
>
> int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
> index 4bfe4ef20550..c3300eff7460 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
> @@ -161,14 +161,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
> if (args->ext || args->ext_flags)
> return -EINVAL;
>
> - if (!drm_dev_enter(dev, &idx))
> - return -ENODEV;
> -
> hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
> - if (!hwctx) {
> - ret = -ENOMEM;
> - goto exit;
> - }
> + if (!hwctx)
> + return -ENOMEM;
>
> if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) {
> XDNA_ERR(xdna, "Access QoS info failed");
> @@ -181,30 +176,36 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
> hwctx->num_tiles = args->num_tiles;
> hwctx->mem_size = args->mem_size;
> hwctx->max_opc = args->max_opc;
> - ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
> - XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
> - &client->next_hwctxid, GFP_KERNEL);
> - if (ret < 0) {
> - XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
> +
> + guard(mutex)(&xdna->dev_lock);
> +
> + if (!drm_dev_enter(dev, &idx)) {
> + ret = -ENODEV;
> goto free_hwctx;
> }
>
> - hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id);
> + ret = xdna->dev_info->ops->hwctx_init(hwctx);
> + if (ret) {
> + XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
> + goto dev_exit;
> + }
> +
> + hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->fw_ctx_id);
> if (!hwctx->name) {
> ret = -ENOMEM;
> - goto rm_id;
> + goto fini_hwctx;
> }
>
> - mutex_lock(&xdna->dev_lock);
> - ret = xdna->dev_info->ops->hwctx_init(hwctx);
> - if (ret) {
> - mutex_unlock(&xdna->dev_lock);
> - XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
> + ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
> + XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
> + &client->next_hwctxid, GFP_KERNEL);
> + if (ret < 0) {
> + XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
> goto free_name;
> }
> +
> args->handle = hwctx->id;
> args->syncobj_handle = hwctx->syncobj_hdl;
> - mutex_unlock(&xdna->dev_lock);
>
> atomic64_set(&hwctx->job_submit_cnt, 0);
> atomic64_set(&hwctx->job_free_cnt, 0);
> @@ -214,12 +215,12 @@ int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
>
> free_name:
> kfree(hwctx->name);
> -rm_id:
> - xa_erase(&client->hwctx_xa, hwctx->id);
> +fini_hwctx:
> + xdna->dev_info->ops->hwctx_fini(hwctx);
> +dev_exit:
> + drm_dev_exit(idx);
> free_hwctx:
> kfree(hwctx);
> -exit:
> - drm_dev_exit(idx);
> return ret;
> }
>
> @@ -431,11 +432,6 @@ int amdxdna_cmd_submit(struct amdxdna_client *client,
> goto unlock_srcu;
> }
>
> - if (hwctx->status != HWCTX_STAT_READY) {
> - XDNA_ERR(xdna, "HW Context is not ready");
> - ret = -EINVAL;
> - goto unlock_srcu;
> - }
>
> job->hwctx = hwctx;
> job->mm = current->mm;
> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c
> index da1ac89bb78f..24258dcc18eb 100644
> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
> @@ -194,7 +194,8 @@ static void mailbox_release_msg(struct mailbox_channel *mb_chann,
> {
> MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
> mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
> - mb_msg->notify_cb(mb_msg->handle, NULL, 0);
> + if (mb_msg->notify_cb)
> + mb_msg->notify_cb(mb_msg->handle, NULL, 0);
> kfree(mb_msg);
> }
>
> @@ -248,7 +249,7 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade
> {
> struct mailbox_msg *mb_msg;
> int msg_id;
> - int ret;
> + int ret = 0;
>
> msg_id = header->id;
> if (!mailbox_validate_msgid(msg_id)) {
> @@ -265,9 +266,11 @@ mailbox_get_resp(struct mailbox_channel *mb_chann, struct xdna_msg_header *heade
>
> MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
> header->opcode, header->total_size, header->id);
> - ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
> - if (unlikely(ret))
> - MB_ERR(mb_chann, "Message callback ret %d", ret);
> + if (mb_msg->notify_cb) {
> + ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
> + if (unlikely(ret))
> + MB_ERR(mb_chann, "Message callback ret %d", ret);
> + }
>
> kfree(mb_msg);
> return ret;
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index 569cd703729d..aa04452310e5 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -13,13 +13,11 @@
> #include <drm/gpu_scheduler.h>
> #include <linux/iommu.h>
> #include <linux/pci.h>
> -#include <linux/pm_runtime.h>
>
> #include "amdxdna_ctx.h"
> #include "amdxdna_gem.h"
> #include "amdxdna_pci_drv.h"
> -
> -#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
> +#include "amdxdna_pm.h"
>
> MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
> MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
> @@ -61,17 +59,9 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
> struct amdxdna_client *client;
> int ret;
>
> - ret = pm_runtime_resume_and_get(ddev->dev);
> - if (ret) {
> - XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret);
> - return ret;
> - }
> -
> client = kzalloc(sizeof(*client), GFP_KERNEL);
> - if (!client) {
> - ret = -ENOMEM;
> - goto put_rpm;
> - }
> + if (!client)
> + return -ENOMEM;
>
> client->pid = pid_nr(rcu_access_pointer(filp->pid));
> client->xdna = xdna;
> @@ -106,9 +96,6 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
> iommu_sva_unbind_device(client->sva);
> failed:
> kfree(client);
> -put_rpm:
> - pm_runtime_mark_last_busy(ddev->dev);
> - pm_runtime_put_autosuspend(ddev->dev);
>
> return ret;
> }
> @@ -130,8 +117,6 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
>
> XDNA_DBG(xdna, "pid %d closed", client->pid);
> kfree(client);
> - pm_runtime_mark_last_busy(ddev->dev);
> - pm_runtime_put_autosuspend(ddev->dev);
> }
>
> static int amdxdna_flush(struct file *f, fl_owner_t id)
> @@ -310,19 +295,12 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> goto failed_dev_fini;
> }
>
> - pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
> - pm_runtime_use_autosuspend(dev);
> - pm_runtime_allow(dev);
> -
> ret = drm_dev_register(&xdna->ddev, 0);
> if (ret) {
> XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
> - pm_runtime_forbid(dev);
> goto failed_sysfs_fini;
> }
>
> - pm_runtime_mark_last_busy(dev);
> - pm_runtime_put_autosuspend(dev);
> return 0;
>
> failed_sysfs_fini:
> @@ -339,14 +317,10 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> static void amdxdna_remove(struct pci_dev *pdev)
> {
> struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
> - struct device *dev = &pdev->dev;
> struct amdxdna_client *client;
>
> destroy_workqueue(xdna->notifier_wq);
>
> - pm_runtime_get_noresume(dev);
> - pm_runtime_forbid(dev);
> -
> drm_dev_unplug(&xdna->ddev);
> amdxdna_sysfs_fini(xdna);
>
> @@ -365,29 +339,9 @@ static void amdxdna_remove(struct pci_dev *pdev)
> mutex_unlock(&xdna->dev_lock);
> }
>
> -static int amdxdna_pmops_suspend(struct device *dev)
> -{
> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
> -
> - if (!xdna->dev_info->ops->suspend)
> - return -EOPNOTSUPP;
> -
> - return xdna->dev_info->ops->suspend(xdna);
> -}
> -
> -static int amdxdna_pmops_resume(struct device *dev)
> -{
> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
> -
> - if (!xdna->dev_info->ops->resume)
> - return -EOPNOTSUPP;
> -
> - return xdna->dev_info->ops->resume(xdna);
> -}
> -
> static const struct dev_pm_ops amdxdna_pm_ops = {
> - SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
> - RUNTIME_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume, NULL)
> + SYSTEM_SLEEP_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume)
> + RUNTIME_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume, NULL)
> };
>
> static struct pci_driver amdxdna_pci_driver = {
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> index 72d6696d49da..626beebf730e 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
> @@ -6,6 +6,7 @@
> #ifndef _AMDXDNA_PCI_DRV_H_
> #define _AMDXDNA_PCI_DRV_H_
>
> +#include <drm/drm_print.h>
This seems like a spurious header inclusion. It shouldn't be needed for
adding a bool to the struct.
> #include <linux/workqueue.h>
> #include <linux/xarray.h>
>
> @@ -99,6 +100,7 @@ struct amdxdna_dev {
> struct amdxdna_fw_ver fw_ver;
> struct rw_semaphore notifier_lock; /* for mmu notifier*/
> struct workqueue_struct *notifier_wq;
> + bool rpm_on;
I'm wondering do you really need a new variable to track the runtime PM
status? Can't you just use pm_runtime_active() and the appropriate
locking when checking it?
> };
>
> /*
> diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/amdxdna/amdxdna_pm.c
> new file mode 100644
> index 000000000000..fad14f60b99b
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_pm.c
> @@ -0,0 +1,96 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2025, Advanced Micro Devices, Inc.
> + */
> +
> +#include <drm/amdxdna_accel.h>
> +#include <drm/drm_drv.h>
> +#include <linux/pm_runtime.h>
> +
> +#include "amdxdna_pm.h"
> +
> +#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
> +
> +int amdxdna_pm_suspend(struct device *dev)
> +{
> + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
> + int ret = -EOPNOTSUPP;
> + bool rpm;
> +
> + if (xdna->dev_info->ops->suspend) {
> + rpm = xdna->rpm_on;
> + xdna->rpm_on = false;
> + ret = xdna->dev_info->ops->suspend(xdna);
> + xdna->rpm_on = rpm;
> + }
> +
> + XDNA_DBG(xdna, "Suspend done ret %d", ret);
> + return ret;
> +}
> +
> +int amdxdna_pm_resume(struct device *dev)
> +{
> + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
> + int ret = -EOPNOTSUPP;
> + bool rpm;
> +
> + if (xdna->dev_info->ops->resume) {
> + rpm = xdna->rpm_on;
> + xdna->rpm_on = false;
> + ret = xdna->dev_info->ops->resume(xdna);
> + xdna->rpm_on = rpm;
> + }
> +
> + XDNA_DBG(xdna, "Resume done ret %d", ret);
> + return ret;
> +}
> +
> +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna)
> +{
> + struct device *dev = xdna->ddev.dev;
> + int ret;
> +
> + if (!xdna->rpm_on)
> + return 0;
> +
> + ret = pm_runtime_resume_and_get(dev);
> + if (ret) {
> + XDNA_ERR(xdna, "Resume failed: %d", ret);
> + pm_runtime_set_suspended(dev);
> + }
> +
> + return ret;
> +}
> +
> +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna)
> +{
> + struct device *dev = xdna->ddev.dev;
> +
> + if (!xdna->rpm_on)
> + return;
> +
> + pm_runtime_mark_last_busy(dev);
> + pm_runtime_put_autosuspend(dev);
> +}
> +
> +void amdxdna_rpm_init(struct amdxdna_dev *xdna)
> +{
> + struct device *dev = xdna->ddev.dev;
> +
> + pm_runtime_set_active(dev);
> + pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
> + pm_runtime_use_autosuspend(dev);
> + pm_runtime_allow(dev);
> + pm_runtime_mark_last_busy(dev);
> + pm_runtime_put_autosuspend(dev);
> + xdna->rpm_on = true;
> +}
> +
> +void amdxdna_rpm_fini(struct amdxdna_dev *xdna)
> +{
> + struct device *dev = xdna->ddev.dev;
> +
> + xdna->rpm_on = false;
> + pm_runtime_get_noresume(dev);
> + pm_runtime_forbid(dev);
> +}
> diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/amdxdna/amdxdna_pm.h
> new file mode 100644
> index 000000000000..11c295b6d64a
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_pm.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2025, Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef _AMDXDNA_PM_H_
> +#define _AMDXDNA_PM_H_
> +
> +#include "amdxdna_pci_drv.h"
> +
> +int amdxdna_pm_suspend(struct device *dev);
> +int amdxdna_pm_resume(struct device *dev);
> +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna);
> +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna);
> +void amdxdna_rpm_init(struct amdxdna_dev *xdna);
> +void amdxdna_rpm_fini(struct amdxdna_dev *xdna);
Nit: Why rpm for init/fini and pm for all the others? Wouldn't it make
sense to pick one or the other?
> +
> +#endif /* _AMDXDNA_PM_H_ */
On 9/18/25 10:31, Mario Limonciello wrote:
>
>
> On 9/18/2025 12:24 PM, Lizhi Hou wrote:
>> Currently, pm_runtime_resume_and_get() is invoked in the driver's open
>> callback, and pm_runtime_put_autosuspend() is called in the close
>> callback. As a result, the device remains active whenever an application
>> opens it, even if no I/O is performed, leading to unnecessary power
>> consumption.
>>
>> Move the runtime PM calls to the AIE2 callbacks that actually interact
>> with the hardware. The device will automatically suspend after 5 seconds
>> of inactivity (no hardware accesses and no pending commands), and it
>> will
>> be resumed on the next hardware access.
>>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>> drivers/accel/amdxdna/Makefile | 1 +
>> drivers/accel/amdxdna/aie2_ctx.c | 42 ++++++++---
>> drivers/accel/amdxdna/aie2_message.c | 28 ++++----
>> drivers/accel/amdxdna/aie2_pci.c | 79 ++++++++++----------
>> drivers/accel/amdxdna/aie2_pci.h | 3 +-
>> drivers/accel/amdxdna/aie2_smu.c | 28 ++++++--
>> drivers/accel/amdxdna/amdxdna_ctx.c | 54 +++++++-------
>> drivers/accel/amdxdna/amdxdna_mailbox.c | 13 ++--
>> drivers/accel/amdxdna/amdxdna_pci_drv.c | 56 ++-------------
>> drivers/accel/amdxdna/amdxdna_pci_drv.h | 2 +
>> drivers/accel/amdxdna/amdxdna_pm.c | 96 +++++++++++++++++++++++++
>> drivers/accel/amdxdna/amdxdna_pm.h | 18 +++++
>> 12 files changed, 262 insertions(+), 158 deletions(-)
>> create mode 100644 drivers/accel/amdxdna/amdxdna_pm.c
>> create mode 100644 drivers/accel/amdxdna/amdxdna_pm.h
>>
>> diff --git a/drivers/accel/amdxdna/Makefile
>> b/drivers/accel/amdxdna/Makefile
>> index 6797dac65efa..6344aaf523fa 100644
>> --- a/drivers/accel/amdxdna/Makefile
>> +++ b/drivers/accel/amdxdna/Makefile
>> @@ -14,6 +14,7 @@ amdxdna-y := \
>> amdxdna_mailbox.o \
>> amdxdna_mailbox_helper.o \
>> amdxdna_pci_drv.o \
>> + amdxdna_pm.o \
>> amdxdna_sysfs.o \
>> amdxdna_ubuf.o \
>> npu1_regs.o \
>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c
>> b/drivers/accel/amdxdna/aie2_ctx.c
>> index e9f9b1fa5dc1..691fdb3b008f 100644
>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>> @@ -21,6 +21,7 @@
>> #include "amdxdna_gem.h"
>> #include "amdxdna_mailbox.h"
>> #include "amdxdna_pci_drv.h"
>> +#include "amdxdna_pm.h"
>> static bool force_cmdlist;
>> module_param(force_cmdlist, bool, 0600);
>> @@ -88,7 +89,7 @@ static int aie2_hwctx_restart(struct amdxdna_dev
>> *xdna, struct amdxdna_hwctx *hw
>> goto out;
>> }
>> - ret = aie2_config_cu(hwctx);
>> + ret = aie2_config_cu(hwctx, NULL);
>> if (ret) {
>> XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
>> goto out;
>> @@ -167,14 +168,11 @@ static int aie2_hwctx_resume_cb(struct
>> amdxdna_hwctx *hwctx, void *arg)
>> int aie2_hwctx_resume(struct amdxdna_client *client)
>> {
>> - struct amdxdna_dev *xdna = client->xdna;
>> -
>> /*
>> * The resume path cannot guarantee that mailbox channel can be
>> * regenerated. If this happen, when submit message to this
>> * mailbox channel, error will return.
>> */
>> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>> return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
>> }
>> @@ -184,6 +182,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
>> struct dma_fence *fence = job->fence;
>> trace_xdna_job(&job->base, job->hwctx->name, "signaled
>> fence", job->seq);
>> +
>> + amdxdna_pm_suspend_put(job->hwctx->client->xdna);
>> job->hwctx->priv->completed++;
>> dma_fence_signal(fence);
>> @@ -531,7 +531,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>> .num_rqs = DRM_SCHED_PRIORITY_COUNT,
>> .credit_limit = HWCTX_MAX_CMDS,
>> .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
>> - .name = hwctx->name,
>> + .name = "amdxdna_js",
>> .dev = xdna->ddev.dev,
>> };
>> struct drm_gpu_scheduler *sched;
>> @@ -697,6 +697,14 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
>> kfree(hwctx->cus);
>> }
>> +static int aie2_config_cu_resp_handler(void *handle, void __iomem
>> *data, size_t size)
>> +{
>> + struct amdxdna_hwctx *hwctx = handle;
>> +
>> + amdxdna_pm_suspend_put(hwctx->client->xdna);
>> + return 0;
>> +}
>> +
>> static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void
>> *buf, u32 size)
>> {
>> struct amdxdna_hwctx_param_config_cu *config = buf;
>> @@ -728,10 +736,14 @@ static int aie2_hwctx_cu_config(struct
>> amdxdna_hwctx *hwctx, void *buf, u32 size
>> if (!hwctx->cus)
>> return -ENOMEM;
>> - ret = aie2_config_cu(hwctx);
>> + ret = amdxdna_pm_resume_get(xdna);
>> + if (ret)
>> + goto free_cus;
>> +
>> + ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
>> if (ret) {
>> XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
>> - goto free_cus;
>> + goto pm_suspend_put;
>> }
>> wmb(); /* To avoid locking in command submit when check
>> status */
>> @@ -739,6 +751,8 @@ static int aie2_hwctx_cu_config(struct
>> amdxdna_hwctx *hwctx, void *buf, u32 size
>> return 0;
>> +pm_suspend_put:
>> + amdxdna_pm_suspend_put(xdna);
>> free_cus:
>> kfree(hwctx->cus);
>> hwctx->cus = NULL;
>> @@ -862,11 +876,15 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>> *hwctx, struct amdxdna_sched_job *job,
>> goto free_chain;
>> }
>> + ret = amdxdna_pm_resume_get(xdna);
>> + if (ret)
>> + goto cleanup_job;
>> +
>> retry:
>> ret = drm_gem_lock_reservations(job->bos, job->bo_cnt,
>> &acquire_ctx);
>> if (ret) {
>> XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
>> - goto cleanup_job;
>> + goto suspend_put;
>> }
>> for (i = 0; i < job->bo_cnt; i++) {
>> @@ -874,7 +892,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx,
>> struct amdxdna_sched_job *job,
>> if (ret) {
>> XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
>> drm_gem_unlock_reservations(job->bos, job->bo_cnt,
>> &acquire_ctx);
>> - goto cleanup_job;
>> + goto suspend_put;
>> }
>> }
>> @@ -889,12 +907,12 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>> *hwctx, struct amdxdna_sched_job *job,
>> msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
>> } else if (time_after(jiffies, timeout)) {
>> ret = -ETIME;
>> - goto cleanup_job;
>> + goto suspend_put;
>> }
>> ret = aie2_populate_range(abo);
>> if (ret)
>> - goto cleanup_job;
>> + goto suspend_put;
>> goto retry;
>> }
>> }
>> @@ -920,6 +938,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx,
>> struct amdxdna_sched_job *job,
>> return 0;
>> +suspend_put:
>> + amdxdna_pm_suspend_put(xdna);
>> cleanup_job:
>> drm_sched_job_cleanup(&job->base);
>> free_chain:
>> diff --git a/drivers/accel/amdxdna/aie2_message.c
>> b/drivers/accel/amdxdna/aie2_message.c
>> index 9caad083543d..4660e8297ed8 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -37,7 +37,7 @@ static int aie2_send_mgmt_msg_wait(struct
>> amdxdna_dev_hdl *ndev,
>> if (!ndev->mgmt_chann)
>> return -ENODEV;
>> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>> + drm_WARN_ON(&xdna->ddev, xdna->rpm_on &&
>> !mutex_is_locked(&xdna->dev_lock));
>> ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
>> if (ret == -ETIME) {
>> xdna_mailbox_stop_channel(ndev->mgmt_chann);
>> @@ -377,15 +377,17 @@ int aie2_register_asyn_event_msg(struct
>> amdxdna_dev_hdl *ndev, dma_addr_t addr,
>> return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
>> }
>> -int aie2_config_cu(struct amdxdna_hwctx *hwctx)
>> +int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>> + int (*notify_cb)(void *, void __iomem *, size_t))
>> {
>> struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>> struct amdxdna_dev *xdna = hwctx->client->xdna;
>> u32 shift = xdna->dev_info->dev_mem_buf_shift;
>> - DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
>> + struct config_cu_req req = { 0 };
>> + struct xdna_mailbox_msg msg;
>> struct drm_gem_object *gobj;
>> struct amdxdna_gem_obj *abo;
>> - int ret, i;
>> + int i;
>> if (!chann)
>> return -ENODEV;
>> @@ -423,18 +425,12 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx)
>> }
>> req.num_cus = hwctx->cus->num_cus;
>> - ret = xdna_send_msg_wait(xdna, chann, &msg);
>> - if (ret == -ETIME)
>> - aie2_destroy_context(xdna->dev_handle, hwctx);
>> -
>> - if (resp.status == AIE2_STATUS_SUCCESS) {
>> - XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret);
>> - return 0;
>> - }
>> -
>> - XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d",
>> - msg.opcode, resp.status, ret);
>> - return ret;
>> + msg.send_data = (u8 *)&req;
>> + msg.send_size = sizeof(req);
>> + msg.handle = hwctx;
>> + msg.opcode = MSG_OP_CONFIG_CU;
>> + msg.notify_cb = notify_cb;
>> + return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>> }
>> int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>> amdxdna_sched_job *job,
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index 6e39c769bb6d..f46a3dcd0580 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -25,6 +25,7 @@
>> #include "amdxdna_gem.h"
>> #include "amdxdna_mailbox.h"
>> #include "amdxdna_pci_drv.h"
>> +#include "amdxdna_pm.h"
>> static int aie2_max_col = XRS_MAX_COL;
>> module_param(aie2_max_col, uint, 0600);
>> @@ -223,15 +224,6 @@ static int aie2_mgmt_fw_init(struct
>> amdxdna_dev_hdl *ndev)
>> return ret;
>> }
>> - if (!ndev->async_events)
>> - return 0;
>> -
>> - ret = aie2_error_async_events_send(ndev);
>> - if (ret) {
>> - XDNA_ERR(ndev->xdna, "Send async events failed");
>> - return ret;
>> - }
>> -
>> return 0;
>> }
>> @@ -257,6 +249,8 @@ static int aie2_mgmt_fw_query(struct
>> amdxdna_dev_hdl *ndev)
>> return ret;
>> }
>> + ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>> +
>> return 0;
>> }
>> @@ -338,6 +332,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
>> ndev->mbox = NULL;
>> aie2_psp_stop(ndev->psp_hdl);
>> aie2_smu_fini(ndev);
>> + aie2_error_async_events_free(ndev);
>> pci_disable_device(pdev);
>> ndev->dev_status = AIE2_DEV_INIT;
>> @@ -424,6 +419,18 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
>> goto destroy_mgmt_chann;
>> }
>> + ret = aie2_mgmt_fw_query(ndev);
>> + if (ret) {
>> + XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
>> + goto destroy_mgmt_chann;
>> + }
>> +
>> + ret = aie2_error_async_events_alloc(ndev);
>> + if (ret) {
>> + XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
>> + goto destroy_mgmt_chann;
>> + }
>> +
>> ndev->dev_status = AIE2_DEV_START;
>> return 0;
>> @@ -459,7 +466,6 @@ static int aie2_hw_resume(struct amdxdna_dev *xdna)
>> struct amdxdna_client *client;
>> int ret;
>> - guard(mutex)(&xdna->dev_lock);
>> ret = aie2_hw_start(xdna);
>> if (ret) {
>> XDNA_ERR(xdna, "Start hardware failed, %d", ret);
>> @@ -565,13 +571,6 @@ static int aie2_init(struct amdxdna_dev *xdna)
>> goto release_fw;
>> }
>> - ret = aie2_mgmt_fw_query(ndev);
>> - if (ret) {
>> - XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
>> - goto stop_hw;
>> - }
>> - ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>> -
>> xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
>> for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
>> xrs_cfg.clk_list.cu_clk_list[i] =
>> ndev->priv->dpm_clk_tbl[i].hclk;
>> @@ -587,30 +586,10 @@ static int aie2_init(struct amdxdna_dev *xdna)
>> goto stop_hw;
>> }
>> - ret = aie2_error_async_events_alloc(ndev);
>> - if (ret) {
>> - XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
>> - goto stop_hw;
>> - }
>> -
>> - ret = aie2_error_async_events_send(ndev);
>> - if (ret) {
>> - XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
>> - goto async_event_free;
>> - }
>> -
>> - /* Issue a command to make sure firmware handled async events */
>> - ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
>> - if (ret) {
>> - XDNA_ERR(xdna, "Re-query firmware version failed");
>> - goto async_event_free;
>> - }
>> -
>> release_firmware(fw);
>> + amdxdna_rpm_init(xdna);
>> return 0;
>> -async_event_free:
>> - aie2_error_async_events_free(ndev);
>> stop_hw:
>> aie2_hw_stop(xdna);
>> release_fw:
>> @@ -621,10 +600,8 @@ static int aie2_init(struct amdxdna_dev *xdna)
>> static void aie2_fini(struct amdxdna_dev *xdna)
>> {
>> - struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>> -
>> + amdxdna_rpm_fini(xdna);
>> aie2_hw_stop(xdna);
>> - aie2_error_async_events_free(ndev);
>> }
>> static int aie2_get_aie_status(struct amdxdna_client *client,
>> @@ -856,6 +833,10 @@ static int aie2_get_info(struct amdxdna_client
>> *client, struct amdxdna_drm_get_i
>> if (!drm_dev_enter(&xdna->ddev, &idx))
>> return -ENODEV;
>> + ret = amdxdna_pm_resume_get(xdna);
>> + if (ret)
>> + goto dev_exit;
>> +
>> switch (args->param) {
>> case DRM_AMDXDNA_QUERY_AIE_STATUS:
>> ret = aie2_get_aie_status(client, args);
>> @@ -882,8 +863,11 @@ static int aie2_get_info(struct amdxdna_client
>> *client, struct amdxdna_drm_get_i
>> XDNA_ERR(xdna, "Not supported request parameter %u",
>> args->param);
>> ret = -EOPNOTSUPP;
>> }
>> +
>> + amdxdna_pm_suspend_put(xdna);
>> XDNA_DBG(xdna, "Got param %d", args->param);
>> +dev_exit:
>> drm_dev_exit(idx);
>> return ret;
>> }
>> @@ -932,6 +916,10 @@ static int aie2_get_array(struct amdxdna_client
>> *client,
>> if (!drm_dev_enter(&xdna->ddev, &idx))
>> return -ENODEV;
>> + ret = amdxdna_pm_resume_get(xdna);
>> + if (ret)
>> + goto dev_exit;
>> +
>> switch (args->param) {
>> case DRM_AMDXDNA_HW_CONTEXT_ALL:
>> ret = aie2_query_ctx_status_array(client, args);
>> @@ -940,8 +928,11 @@ static int aie2_get_array(struct amdxdna_client
>> *client,
>> XDNA_ERR(xdna, "Not supported request parameter %u",
>> args->param);
>> ret = -EOPNOTSUPP;
>> }
>> +
>> + amdxdna_pm_suspend_put(xdna);
>> XDNA_DBG(xdna, "Got param %d", args->param);
>> +dev_exit:
>> drm_dev_exit(idx);
>> return ret;
>> }
>> @@ -980,6 +971,10 @@ static int aie2_set_state(struct amdxdna_client
>> *client,
>> if (!drm_dev_enter(&xdna->ddev, &idx))
>> return -ENODEV;
>> + ret = amdxdna_pm_resume_get(xdna);
>> + if (ret)
>> + goto dev_exit;
>> +
>> switch (args->param) {
>> case DRM_AMDXDNA_SET_POWER_MODE:
>> ret = aie2_set_power_mode(client, args);
>> @@ -990,6 +985,8 @@ static int aie2_set_state(struct amdxdna_client
>> *client,
>> break;
>> }
>> + amdxdna_pm_suspend_put(xdna);
>> +dev_exit:
>> drm_dev_exit(idx);
>> return ret;
>> }
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index 91a8e948f82a..289a23ecd5f1 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -272,7 +272,8 @@ int aie2_map_host_buf(struct amdxdna_dev_hdl
>> *ndev, u32 context_id, u64 addr, u6
>> int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user
>> *buf, u32 size, u32 *cols_filled);
>> int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev,
>> dma_addr_t addr, u32 size,
>> void *handle, int (*cb)(void*, void __iomem *,
>> size_t));
>> -int aie2_config_cu(struct amdxdna_hwctx *hwctx);
>> +int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>> + int (*notify_cb)(void *, void __iomem *, size_t));
>> int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>> amdxdna_sched_job *job,
>> int (*notify_cb)(void *, void __iomem *, size_t));
>> int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
>> diff --git a/drivers/accel/amdxdna/aie2_smu.c
>> b/drivers/accel/amdxdna/aie2_smu.c
>> index d303701b0ded..7f292a615ed8 100644
>> --- a/drivers/accel/amdxdna/aie2_smu.c
>> +++ b/drivers/accel/amdxdna/aie2_smu.c
>> @@ -11,6 +11,7 @@
>> #include "aie2_pci.h"
>> #include "amdxdna_pci_drv.h"
>> +#include "amdxdna_pm.h"
>> #define SMU_RESULT_OK 1
>> @@ -59,12 +60,16 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev,
>> u32 dpm_level)
>> u32 freq;
>> int ret;
>> + ret = amdxdna_pm_resume_get(ndev->xdna);
>> + if (ret)
>> + return ret;
>> +
>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
>> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
>> if (ret) {
>> XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
>> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
>> - return ret;
>> + goto suspend_put;
>> }
>> ndev->npuclk_freq = freq;
>> @@ -73,8 +78,10 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev,
>> u32 dpm_level)
>> if (ret) {
>> XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
>> ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
>> - return ret;
>> + goto suspend_put;
>> }
>> +
>> + amdxdna_pm_suspend_put(ndev->xdna);
>> ndev->hclk_freq = freq;
>> ndev->dpm_level = dpm_level;
>> @@ -82,26 +89,35 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev,
>> u32 dpm_level)
>> ndev->npuclk_freq, ndev->hclk_freq);
>> return 0;
>> +
>> +suspend_put:
>> + amdxdna_pm_suspend_put(ndev->xdna);
>> + return ret;
>> }
>> int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>> {
>> int ret;
>> + ret = amdxdna_pm_resume_get(ndev->xdna);
>> + if (ret)
>> + return ret;
>> +
>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL,
>> dpm_level, NULL);
>> if (ret) {
>> XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
>> dpm_level, ret);
>> - return ret;
>> + goto suspend_put;
>> }
>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL,
>> dpm_level, NULL);
>> if (ret) {
>> XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
>> dpm_level, ret);
>> - return ret;
>> + goto suspend_put;
>> }
>> + amdxdna_pm_suspend_put(ndev->xdna);
>> ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
>> ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
>> ndev->dpm_level = dpm_level;
>> @@ -110,6 +126,10 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev,
>> u32 dpm_level)
>> ndev->npuclk_freq, ndev->hclk_freq);
>> return 0;
>> +
>> +suspend_put:
>> + amdxdna_pm_suspend_put(ndev->xdna);
>> + return ret;
>> }
>> int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c
>> b/drivers/accel/amdxdna/amdxdna_ctx.c
>> index 4bfe4ef20550..c3300eff7460 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>> @@ -161,14 +161,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>> drm_device *dev, void *data, struct dr
>> if (args->ext || args->ext_flags)
>> return -EINVAL;
>> - if (!drm_dev_enter(dev, &idx))
>> - return -ENODEV;
>> -
>> hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
>> - if (!hwctx) {
>> - ret = -ENOMEM;
>> - goto exit;
>> - }
>> + if (!hwctx)
>> + return -ENOMEM;
>> if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p),
>> sizeof(hwctx->qos))) {
>> XDNA_ERR(xdna, "Access QoS info failed");
>> @@ -181,30 +176,36 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>> drm_device *dev, void *data, struct dr
>> hwctx->num_tiles = args->num_tiles;
>> hwctx->mem_size = args->mem_size;
>> hwctx->max_opc = args->max_opc;
>> - ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
>> - XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1,
>> MAX_HWCTX_ID),
>> - &client->next_hwctxid, GFP_KERNEL);
>> - if (ret < 0) {
>> - XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
>> +
>> + guard(mutex)(&xdna->dev_lock);
>> +
>> + if (!drm_dev_enter(dev, &idx)) {
>> + ret = -ENODEV;
>> goto free_hwctx;
>> }
>> - hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d",
>> client->pid, hwctx->id);
>> + ret = xdna->dev_info->ops->hwctx_init(hwctx);
>> + if (ret) {
>> + XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
>> + goto dev_exit;
>> + }
>> +
>> + hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid,
>> hwctx->fw_ctx_id);
>> if (!hwctx->name) {
>> ret = -ENOMEM;
>> - goto rm_id;
>> + goto fini_hwctx;
>> }
>> - mutex_lock(&xdna->dev_lock);
>> - ret = xdna->dev_info->ops->hwctx_init(hwctx);
>> - if (ret) {
>> - mutex_unlock(&xdna->dev_lock);
>> - XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
>> + ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
>> + XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1,
>> MAX_HWCTX_ID),
>> + &client->next_hwctxid, GFP_KERNEL);
>> + if (ret < 0) {
>> + XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
>> goto free_name;
>> }
>> +
>> args->handle = hwctx->id;
>> args->syncobj_handle = hwctx->syncobj_hdl;
>> - mutex_unlock(&xdna->dev_lock);
>> atomic64_set(&hwctx->job_submit_cnt, 0);
>> atomic64_set(&hwctx->job_free_cnt, 0);
>> @@ -214,12 +215,12 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>> drm_device *dev, void *data, struct dr
>> free_name:
>> kfree(hwctx->name);
>> -rm_id:
>> - xa_erase(&client->hwctx_xa, hwctx->id);
>> +fini_hwctx:
>> + xdna->dev_info->ops->hwctx_fini(hwctx);
>> +dev_exit:
>> + drm_dev_exit(idx);
>> free_hwctx:
>> kfree(hwctx);
>> -exit:
>> - drm_dev_exit(idx);
>> return ret;
>> }
>> @@ -431,11 +432,6 @@ int amdxdna_cmd_submit(struct amdxdna_client
>> *client,
>> goto unlock_srcu;
>> }
>> - if (hwctx->status != HWCTX_STAT_READY) {
>> - XDNA_ERR(xdna, "HW Context is not ready");
>> - ret = -EINVAL;
>> - goto unlock_srcu;
>> - }
>> job->hwctx = hwctx;
>> job->mm = current->mm;
>> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c
>> b/drivers/accel/amdxdna/amdxdna_mailbox.c
>> index da1ac89bb78f..24258dcc18eb 100644
>> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
>> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
>> @@ -194,7 +194,8 @@ static void mailbox_release_msg(struct
>> mailbox_channel *mb_chann,
>> {
>> MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
>> mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
>> - mb_msg->notify_cb(mb_msg->handle, NULL, 0);
>> + if (mb_msg->notify_cb)
>> + mb_msg->notify_cb(mb_msg->handle, NULL, 0);
>> kfree(mb_msg);
>> }
>> @@ -248,7 +249,7 @@ mailbox_get_resp(struct mailbox_channel
>> *mb_chann, struct xdna_msg_header *heade
>> {
>> struct mailbox_msg *mb_msg;
>> int msg_id;
>> - int ret;
>> + int ret = 0;
>> msg_id = header->id;
>> if (!mailbox_validate_msgid(msg_id)) {
>> @@ -265,9 +266,11 @@ mailbox_get_resp(struct mailbox_channel
>> *mb_chann, struct xdna_msg_header *heade
>> MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
>> header->opcode, header->total_size, header->id);
>> - ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
>> - if (unlikely(ret))
>> - MB_ERR(mb_chann, "Message callback ret %d", ret);
>> + if (mb_msg->notify_cb) {
>> + ret = mb_msg->notify_cb(mb_msg->handle, data,
>> header->total_size);
>> + if (unlikely(ret))
>> + MB_ERR(mb_chann, "Message callback ret %d", ret);
>> + }
>> kfree(mb_msg);
>> return ret;
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> index 569cd703729d..aa04452310e5 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> @@ -13,13 +13,11 @@
>> #include <drm/gpu_scheduler.h>
>> #include <linux/iommu.h>
>> #include <linux/pci.h>
>> -#include <linux/pm_runtime.h>
>> #include "amdxdna_ctx.h"
>> #include "amdxdna_gem.h"
>> #include "amdxdna_pci_drv.h"
>> -
>> -#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
>> +#include "amdxdna_pm.h"
>> MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
>> MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
>> @@ -61,17 +59,9 @@ static int amdxdna_drm_open(struct drm_device
>> *ddev, struct drm_file *filp)
>> struct amdxdna_client *client;
>> int ret;
>> - ret = pm_runtime_resume_and_get(ddev->dev);
>> - if (ret) {
>> - XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret);
>> - return ret;
>> - }
>> -
>> client = kzalloc(sizeof(*client), GFP_KERNEL);
>> - if (!client) {
>> - ret = -ENOMEM;
>> - goto put_rpm;
>> - }
>> + if (!client)
>> + return -ENOMEM;
>> client->pid = pid_nr(rcu_access_pointer(filp->pid));
>> client->xdna = xdna;
>> @@ -106,9 +96,6 @@ static int amdxdna_drm_open(struct drm_device
>> *ddev, struct drm_file *filp)
>> iommu_sva_unbind_device(client->sva);
>> failed:
>> kfree(client);
>> -put_rpm:
>> - pm_runtime_mark_last_busy(ddev->dev);
>> - pm_runtime_put_autosuspend(ddev->dev);
>> return ret;
>> }
>> @@ -130,8 +117,6 @@ static void amdxdna_drm_close(struct drm_device
>> *ddev, struct drm_file *filp)
>> XDNA_DBG(xdna, "pid %d closed", client->pid);
>> kfree(client);
>> - pm_runtime_mark_last_busy(ddev->dev);
>> - pm_runtime_put_autosuspend(ddev->dev);
>> }
>> static int amdxdna_flush(struct file *f, fl_owner_t id)
>> @@ -310,19 +295,12 @@ static int amdxdna_probe(struct pci_dev *pdev,
>> const struct pci_device_id *id)
>> goto failed_dev_fini;
>> }
>> - pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
>> - pm_runtime_use_autosuspend(dev);
>> - pm_runtime_allow(dev);
>> -
>> ret = drm_dev_register(&xdna->ddev, 0);
>> if (ret) {
>> XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
>> - pm_runtime_forbid(dev);
>> goto failed_sysfs_fini;
>> }
>> - pm_runtime_mark_last_busy(dev);
>> - pm_runtime_put_autosuspend(dev);
>> return 0;
>> failed_sysfs_fini:
>> @@ -339,14 +317,10 @@ static int amdxdna_probe(struct pci_dev *pdev,
>> const struct pci_device_id *id)
>> static void amdxdna_remove(struct pci_dev *pdev)
>> {
>> struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
>> - struct device *dev = &pdev->dev;
>> struct amdxdna_client *client;
>> destroy_workqueue(xdna->notifier_wq);
>> - pm_runtime_get_noresume(dev);
>> - pm_runtime_forbid(dev);
>> -
>> drm_dev_unplug(&xdna->ddev);
>> amdxdna_sysfs_fini(xdna);
>> @@ -365,29 +339,9 @@ static void amdxdna_remove(struct pci_dev *pdev)
>> mutex_unlock(&xdna->dev_lock);
>> }
>> -static int amdxdna_pmops_suspend(struct device *dev)
>> -{
>> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>> -
>> - if (!xdna->dev_info->ops->suspend)
>> - return -EOPNOTSUPP;
>> -
>> - return xdna->dev_info->ops->suspend(xdna);
>> -}
>> -
>> -static int amdxdna_pmops_resume(struct device *dev)
>> -{
>> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>> -
>> - if (!xdna->dev_info->ops->resume)
>> - return -EOPNOTSUPP;
>> -
>> - return xdna->dev_info->ops->resume(xdna);
>> -}
>> -
>> static const struct dev_pm_ops amdxdna_pm_ops = {
>> - SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
>> - RUNTIME_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume, NULL)
>> + SYSTEM_SLEEP_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume)
>> + RUNTIME_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume, NULL)
>> };
>> static struct pci_driver amdxdna_pci_driver = {
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> index 72d6696d49da..626beebf730e 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> @@ -6,6 +6,7 @@
>> #ifndef _AMDXDNA_PCI_DRV_H_
>> #define _AMDXDNA_PCI_DRV_H_
>> +#include <drm/drm_print.h>
>
> This seems like a spurious header inclusion. It shouldn't be needed
> for adding a bool to the struct.
>
>
>> #include <linux/workqueue.h>
>> #include <linux/xarray.h>
>> @@ -99,6 +100,7 @@ struct amdxdna_dev {
>> struct amdxdna_fw_ver fw_ver;
>> struct rw_semaphore notifier_lock; /* for mmu notifier*/
>> struct workqueue_struct *notifier_wq;
>> + bool rpm_on;
>
> I'm wondering do you really need a new variable to track the runtime
> PM status? Can't you just use pm_runtime_active() and the appropriate
> locking when checking it?
>
>> };
>> /*
>> diff --git a/drivers/accel/amdxdna/amdxdna_pm.c
>> b/drivers/accel/amdxdna/amdxdna_pm.c
>> new file mode 100644
>> index 000000000000..fad14f60b99b
>> --- /dev/null
>> +++ b/drivers/accel/amdxdna/amdxdna_pm.c
>> @@ -0,0 +1,96 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Copyright (C) 2025, Advanced Micro Devices, Inc.
>> + */
>> +
>> +#include <drm/amdxdna_accel.h>
>> +#include <drm/drm_drv.h>
>> +#include <linux/pm_runtime.h>
>> +
>> +#include "amdxdna_pm.h"
>> +
>> +#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
>> +
>> +int amdxdna_pm_suspend(struct device *dev)
>> +{
>> + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
>> + int ret = -EOPNOTSUPP;
>> + bool rpm;
>> +
>> + if (xdna->dev_info->ops->suspend) {
>> + rpm = xdna->rpm_on;
>> + xdna->rpm_on = false;
>> + ret = xdna->dev_info->ops->suspend(xdna);
>> + xdna->rpm_on = rpm;
>> + }
>> +
>> + XDNA_DBG(xdna, "Suspend done ret %d", ret);
>> + return ret;
>> +}
>> +
>> +int amdxdna_pm_resume(struct device *dev)
>> +{
>> + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
>> + int ret = -EOPNOTSUPP;
>> + bool rpm;
>> +
>> + if (xdna->dev_info->ops->resume) {
>> + rpm = xdna->rpm_on;
>> + xdna->rpm_on = false;
>> + ret = xdna->dev_info->ops->resume(xdna);
>> + xdna->rpm_on = rpm;
>> + }
>> +
>> + XDNA_DBG(xdna, "Resume done ret %d", ret);
>> + return ret;
>> +}
>> +
>> +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna)
>> +{
>> + struct device *dev = xdna->ddev.dev;
>> + int ret;
>> +
>> + if (!xdna->rpm_on)
>> + return 0;
>> +
>> + ret = pm_runtime_resume_and_get(dev);
>> + if (ret) {
>> + XDNA_ERR(xdna, "Resume failed: %d", ret);
>> + pm_runtime_set_suspended(dev);
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna)
>> +{
>> + struct device *dev = xdna->ddev.dev;
>> +
>> + if (!xdna->rpm_on)
>> + return;
>> +
>> + pm_runtime_mark_last_busy(dev);
>> + pm_runtime_put_autosuspend(dev);
>> +}
>> +
>> +void amdxdna_rpm_init(struct amdxdna_dev *xdna)
>> +{
>> + struct device *dev = xdna->ddev.dev;
>> +
>> + pm_runtime_set_active(dev);
>> + pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
>> + pm_runtime_use_autosuspend(dev);
>> + pm_runtime_allow(dev);
>> + pm_runtime_mark_last_busy(dev);
>> + pm_runtime_put_autosuspend(dev);
>> + xdna->rpm_on = true;
>> +}
>> +
>> +void amdxdna_rpm_fini(struct amdxdna_dev *xdna)
>> +{
>> + struct device *dev = xdna->ddev.dev;
>> +
>> + xdna->rpm_on = false;
>> + pm_runtime_get_noresume(dev);
>> + pm_runtime_forbid(dev);
>> +}
>> diff --git a/drivers/accel/amdxdna/amdxdna_pm.h
>> b/drivers/accel/amdxdna/amdxdna_pm.h
>> new file mode 100644
>> index 000000000000..11c295b6d64a
>> --- /dev/null
>> +++ b/drivers/accel/amdxdna/amdxdna_pm.h
>> @@ -0,0 +1,18 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2025, Advanced Micro Devices, Inc.
>> + */
>> +
>> +#ifndef _AMDXDNA_PM_H_
>> +#define _AMDXDNA_PM_H_
>> +
>> +#include "amdxdna_pci_drv.h"
>> +
>> +int amdxdna_pm_suspend(struct device *dev);
>> +int amdxdna_pm_resume(struct device *dev);
>> +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna);
>> +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna);
>> +void amdxdna_rpm_init(struct amdxdna_dev *xdna);
>> +void amdxdna_rpm_fini(struct amdxdna_dev *xdna);
>
> Nit: Why rpm for init/fini and pm for all the others? Wouldn't it
> make sense to pick one or the other?
Sure. I will change to _pm_init and _pm_fini.
Thanks,
Lizhi
>
>> +
>> +#endif /* _AMDXDNA_PM_H_ */
>
On 9/18/2025 12:41 PM, Lizhi Hou wrote:
>
> On 9/18/25 10:31, Mario Limonciello wrote:
>>
>>
>> On 9/18/2025 12:24 PM, Lizhi Hou wrote:
>>> Currently, pm_runtime_resume_and_get() is invoked in the driver's open
>>> callback, and pm_runtime_put_autosuspend() is called in the close
>>> callback. As a result, the device remains active whenever an application
>>> opens it, even if no I/O is performed, leading to unnecessary power
>>> consumption.
>>>
>>> Move the runtime PM calls to the AIE2 callbacks that actually interact
>>> with the hardware. The device will automatically suspend after 5 seconds
>>> of inactivity (no hardware accesses and no pending commands), and it
>>> will
>>> be resumed on the next hardware access.
>>>
>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>> ---
>>> drivers/accel/amdxdna/Makefile | 1 +
>>> drivers/accel/amdxdna/aie2_ctx.c | 42 ++++++++---
>>> drivers/accel/amdxdna/aie2_message.c | 28 ++++----
>>> drivers/accel/amdxdna/aie2_pci.c | 79 ++++++++++----------
>>> drivers/accel/amdxdna/aie2_pci.h | 3 +-
>>> drivers/accel/amdxdna/aie2_smu.c | 28 ++++++--
>>> drivers/accel/amdxdna/amdxdna_ctx.c | 54 +++++++-------
>>> drivers/accel/amdxdna/amdxdna_mailbox.c | 13 ++--
>>> drivers/accel/amdxdna/amdxdna_pci_drv.c | 56 ++-------------
>>> drivers/accel/amdxdna/amdxdna_pci_drv.h | 2 +
>>> drivers/accel/amdxdna/amdxdna_pm.c | 96 +++++++++++++++++++++++++
>>> drivers/accel/amdxdna/amdxdna_pm.h | 18 +++++
>>> 12 files changed, 262 insertions(+), 158 deletions(-)
>>> create mode 100644 drivers/accel/amdxdna/amdxdna_pm.c
>>> create mode 100644 drivers/accel/amdxdna/amdxdna_pm.h
>>>
>>> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/
>>> Makefile
>>> index 6797dac65efa..6344aaf523fa 100644
>>> --- a/drivers/accel/amdxdna/Makefile
>>> +++ b/drivers/accel/amdxdna/Makefile
>>> @@ -14,6 +14,7 @@ amdxdna-y := \
>>> amdxdna_mailbox.o \
>>> amdxdna_mailbox_helper.o \
>>> amdxdna_pci_drv.o \
>>> + amdxdna_pm.o \
>>> amdxdna_sysfs.o \
>>> amdxdna_ubuf.o \
>>> npu1_regs.o \
>>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/
>>> amdxdna/aie2_ctx.c
>>> index e9f9b1fa5dc1..691fdb3b008f 100644
>>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>>> @@ -21,6 +21,7 @@
>>> #include "amdxdna_gem.h"
>>> #include "amdxdna_mailbox.h"
>>> #include "amdxdna_pci_drv.h"
>>> +#include "amdxdna_pm.h"
>>> static bool force_cmdlist;
>>> module_param(force_cmdlist, bool, 0600);
>>> @@ -88,7 +89,7 @@ static int aie2_hwctx_restart(struct amdxdna_dev
>>> *xdna, struct amdxdna_hwctx *hw
>>> goto out;
>>> }
>>> - ret = aie2_config_cu(hwctx);
>>> + ret = aie2_config_cu(hwctx, NULL);
>>> if (ret) {
>>> XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
>>> goto out;
>>> @@ -167,14 +168,11 @@ static int aie2_hwctx_resume_cb(struct
>>> amdxdna_hwctx *hwctx, void *arg)
>>> int aie2_hwctx_resume(struct amdxdna_client *client)
>>> {
>>> - struct amdxdna_dev *xdna = client->xdna;
>>> -
>>> /*
>>> * The resume path cannot guarantee that mailbox channel can be
>>> * regenerated. If this happen, when submit message to this
>>> * mailbox channel, error will return.
>>> */
>>> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>> return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
>>> }
>>> @@ -184,6 +182,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
>>> struct dma_fence *fence = job->fence;
>>> trace_xdna_job(&job->base, job->hwctx->name, "signaled
>>> fence", job->seq);
>>> +
>>> + amdxdna_pm_suspend_put(job->hwctx->client->xdna);
>>> job->hwctx->priv->completed++;
>>> dma_fence_signal(fence);
>>> @@ -531,7 +531,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>> .num_rqs = DRM_SCHED_PRIORITY_COUNT,
>>> .credit_limit = HWCTX_MAX_CMDS,
>>> .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
>>> - .name = hwctx->name,
>>> + .name = "amdxdna_js",
>>> .dev = xdna->ddev.dev,
>>> };
>>> struct drm_gpu_scheduler *sched;
>>> @@ -697,6 +697,14 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
>>> kfree(hwctx->cus);
>>> }
>>> +static int aie2_config_cu_resp_handler(void *handle, void __iomem
>>> *data, size_t size)
>>> +{
>>> + struct amdxdna_hwctx *hwctx = handle;
>>> +
>>> + amdxdna_pm_suspend_put(hwctx->client->xdna);
>>> + return 0;
>>> +}
>>> +
>>> static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void
>>> *buf, u32 size)
>>> {
>>> struct amdxdna_hwctx_param_config_cu *config = buf;
>>> @@ -728,10 +736,14 @@ static int aie2_hwctx_cu_config(struct
>>> amdxdna_hwctx *hwctx, void *buf, u32 size
>>> if (!hwctx->cus)
>>> return -ENOMEM;
>>> - ret = aie2_config_cu(hwctx);
>>> + ret = amdxdna_pm_resume_get(xdna);
>>> + if (ret)
>>> + goto free_cus;
>>> +
>>> + ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
>>> if (ret) {
>>> XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
>>> - goto free_cus;
>>> + goto pm_suspend_put;
>>> }
>>> wmb(); /* To avoid locking in command submit when check
>>> status */
>>> @@ -739,6 +751,8 @@ static int aie2_hwctx_cu_config(struct
>>> amdxdna_hwctx *hwctx, void *buf, u32 size
>>> return 0;
>>> +pm_suspend_put:
>>> + amdxdna_pm_suspend_put(xdna);
>>> free_cus:
>>> kfree(hwctx->cus);
>>> hwctx->cus = NULL;
>>> @@ -862,11 +876,15 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>> *hwctx, struct amdxdna_sched_job *job,
>>> goto free_chain;
>>> }
>>> + ret = amdxdna_pm_resume_get(xdna);
>>> + if (ret)
>>> + goto cleanup_job;
>>> +
>>> retry:
>>> ret = drm_gem_lock_reservations(job->bos, job->bo_cnt,
>>> &acquire_ctx);
>>> if (ret) {
>>> XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
>>> - goto cleanup_job;
>>> + goto suspend_put;
>>> }
>>> for (i = 0; i < job->bo_cnt; i++) {
>>> @@ -874,7 +892,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx,
>>> struct amdxdna_sched_job *job,
>>> if (ret) {
>>> XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
>>> drm_gem_unlock_reservations(job->bos, job->bo_cnt,
>>> &acquire_ctx);
>>> - goto cleanup_job;
>>> + goto suspend_put;
>>> }
>>> }
>>> @@ -889,12 +907,12 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>> *hwctx, struct amdxdna_sched_job *job,
>>> msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
>>> } else if (time_after(jiffies, timeout)) {
>>> ret = -ETIME;
>>> - goto cleanup_job;
>>> + goto suspend_put;
>>> }
>>> ret = aie2_populate_range(abo);
>>> if (ret)
>>> - goto cleanup_job;
>>> + goto suspend_put;
>>> goto retry;
>>> }
>>> }
>>> @@ -920,6 +938,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx,
>>> struct amdxdna_sched_job *job,
>>> return 0;
>>> +suspend_put:
>>> + amdxdna_pm_suspend_put(xdna);
>>> cleanup_job:
>>> drm_sched_job_cleanup(&job->base);
>>> free_chain:
>>> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/
>>> amdxdna/aie2_message.c
>>> index 9caad083543d..4660e8297ed8 100644
>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>> @@ -37,7 +37,7 @@ static int aie2_send_mgmt_msg_wait(struct
>>> amdxdna_dev_hdl *ndev,
>>> if (!ndev->mgmt_chann)
>>> return -ENODEV;
>>> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>> + drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !mutex_is_locked(&xdna-
>>> >dev_lock));
>>> ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
>>> if (ret == -ETIME) {
>>> xdna_mailbox_stop_channel(ndev->mgmt_chann);
>>> @@ -377,15 +377,17 @@ int aie2_register_asyn_event_msg(struct
>>> amdxdna_dev_hdl *ndev, dma_addr_t addr,
>>> return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
>>> }
>>> -int aie2_config_cu(struct amdxdna_hwctx *hwctx)
>>> +int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>>> + int (*notify_cb)(void *, void __iomem *, size_t))
>>> {
>>> struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>>> struct amdxdna_dev *xdna = hwctx->client->xdna;
>>> u32 shift = xdna->dev_info->dev_mem_buf_shift;
>>> - DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
>>> + struct config_cu_req req = { 0 };
>>> + struct xdna_mailbox_msg msg;
>>> struct drm_gem_object *gobj;
>>> struct amdxdna_gem_obj *abo;
>>> - int ret, i;
>>> + int i;
>>> if (!chann)
>>> return -ENODEV;
>>> @@ -423,18 +425,12 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx)
>>> }
>>> req.num_cus = hwctx->cus->num_cus;
>>> - ret = xdna_send_msg_wait(xdna, chann, &msg);
>>> - if (ret == -ETIME)
>>> - aie2_destroy_context(xdna->dev_handle, hwctx);
>>> -
>>> - if (resp.status == AIE2_STATUS_SUCCESS) {
>>> - XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret);
>>> - return 0;
>>> - }
>>> -
>>> - XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d",
>>> - msg.opcode, resp.status, ret);
>>> - return ret;
>>> + msg.send_data = (u8 *)&req;
>>> + msg.send_size = sizeof(req);
>>> + msg.handle = hwctx;
>>> + msg.opcode = MSG_OP_CONFIG_CU;
>>> + msg.notify_cb = notify_cb;
>>> + return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>>> }
>>> int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>>> amdxdna_sched_job *job,
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/
>>> amdxdna/aie2_pci.c
>>> index 6e39c769bb6d..f46a3dcd0580 100644
>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>> @@ -25,6 +25,7 @@
>>> #include "amdxdna_gem.h"
>>> #include "amdxdna_mailbox.h"
>>> #include "amdxdna_pci_drv.h"
>>> +#include "amdxdna_pm.h"
>>> static int aie2_max_col = XRS_MAX_COL;
>>> module_param(aie2_max_col, uint, 0600);
>>> @@ -223,15 +224,6 @@ static int aie2_mgmt_fw_init(struct
>>> amdxdna_dev_hdl *ndev)
>>> return ret;
>>> }
>>> - if (!ndev->async_events)
>>> - return 0;
>>> -
>>> - ret = aie2_error_async_events_send(ndev);
>>> - if (ret) {
>>> - XDNA_ERR(ndev->xdna, "Send async events failed");
>>> - return ret;
>>> - }
>>> -
>>> return 0;
>>> }
>>> @@ -257,6 +249,8 @@ static int aie2_mgmt_fw_query(struct
>>> amdxdna_dev_hdl *ndev)
>>> return ret;
>>> }
>>> + ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>>> +
>>> return 0;
>>> }
>>> @@ -338,6 +332,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
>>> ndev->mbox = NULL;
>>> aie2_psp_stop(ndev->psp_hdl);
>>> aie2_smu_fini(ndev);
>>> + aie2_error_async_events_free(ndev);
>>> pci_disable_device(pdev);
>>> ndev->dev_status = AIE2_DEV_INIT;
>>> @@ -424,6 +419,18 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
>>> goto destroy_mgmt_chann;
>>> }
>>> + ret = aie2_mgmt_fw_query(ndev);
>>> + if (ret) {
>>> + XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
>>> + goto destroy_mgmt_chann;
>>> + }
>>> +
>>> + ret = aie2_error_async_events_alloc(ndev);
>>> + if (ret) {
>>> + XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
>>> + goto destroy_mgmt_chann;
>>> + }
>>> +
>>> ndev->dev_status = AIE2_DEV_START;
>>> return 0;
>>> @@ -459,7 +466,6 @@ static int aie2_hw_resume(struct amdxdna_dev *xdna)
>>> struct amdxdna_client *client;
>>> int ret;
>>> - guard(mutex)(&xdna->dev_lock);
>>> ret = aie2_hw_start(xdna);
>>> if (ret) {
>>> XDNA_ERR(xdna, "Start hardware failed, %d", ret);
>>> @@ -565,13 +571,6 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>> goto release_fw;
>>> }
>>> - ret = aie2_mgmt_fw_query(ndev);
>>> - if (ret) {
>>> - XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
>>> - goto stop_hw;
>>> - }
>>> - ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>>> -
>>> xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
>>> for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
>>> xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv-
>>> >dpm_clk_tbl[i].hclk;
>>> @@ -587,30 +586,10 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>> goto stop_hw;
>>> }
>>> - ret = aie2_error_async_events_alloc(ndev);
>>> - if (ret) {
>>> - XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
>>> - goto stop_hw;
>>> - }
>>> -
>>> - ret = aie2_error_async_events_send(ndev);
>>> - if (ret) {
>>> - XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
>>> - goto async_event_free;
>>> - }
>>> -
>>> - /* Issue a command to make sure firmware handled async events */
>>> - ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
>>> - if (ret) {
>>> - XDNA_ERR(xdna, "Re-query firmware version failed");
>>> - goto async_event_free;
>>> - }
>>> -
>>> release_firmware(fw);
>>> + amdxdna_rpm_init(xdna);
>>> return 0;
>>> -async_event_free:
>>> - aie2_error_async_events_free(ndev);
>>> stop_hw:
>>> aie2_hw_stop(xdna);
>>> release_fw:
>>> @@ -621,10 +600,8 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>> static void aie2_fini(struct amdxdna_dev *xdna)
>>> {
>>> - struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>>> -
>>> + amdxdna_rpm_fini(xdna);
>>> aie2_hw_stop(xdna);
>>> - aie2_error_async_events_free(ndev);
>>> }
>>> static int aie2_get_aie_status(struct amdxdna_client *client,
>>> @@ -856,6 +833,10 @@ static int aie2_get_info(struct amdxdna_client
>>> *client, struct amdxdna_drm_get_i
>>> if (!drm_dev_enter(&xdna->ddev, &idx))
>>> return -ENODEV;
>>> + ret = amdxdna_pm_resume_get(xdna);
>>> + if (ret)
>>> + goto dev_exit;
>>> +
>>> switch (args->param) {
>>> case DRM_AMDXDNA_QUERY_AIE_STATUS:
>>> ret = aie2_get_aie_status(client, args);
>>> @@ -882,8 +863,11 @@ static int aie2_get_info(struct amdxdna_client
>>> *client, struct amdxdna_drm_get_i
>>> XDNA_ERR(xdna, "Not supported request parameter %u", args-
>>> >param);
>>> ret = -EOPNOTSUPP;
>>> }
>>> +
>>> + amdxdna_pm_suspend_put(xdna);
>>> XDNA_DBG(xdna, "Got param %d", args->param);
>>> +dev_exit:
>>> drm_dev_exit(idx);
>>> return ret;
>>> }
>>> @@ -932,6 +916,10 @@ static int aie2_get_array(struct amdxdna_client
>>> *client,
>>> if (!drm_dev_enter(&xdna->ddev, &idx))
>>> return -ENODEV;
>>> + ret = amdxdna_pm_resume_get(xdna);
>>> + if (ret)
>>> + goto dev_exit;
>>> +
>>> switch (args->param) {
>>> case DRM_AMDXDNA_HW_CONTEXT_ALL:
>>> ret = aie2_query_ctx_status_array(client, args);
>>> @@ -940,8 +928,11 @@ static int aie2_get_array(struct amdxdna_client
>>> *client,
>>> XDNA_ERR(xdna, "Not supported request parameter %u", args-
>>> >param);
>>> ret = -EOPNOTSUPP;
>>> }
>>> +
>>> + amdxdna_pm_suspend_put(xdna);
>>> XDNA_DBG(xdna, "Got param %d", args->param);
>>> +dev_exit:
>>> drm_dev_exit(idx);
>>> return ret;
>>> }
>>> @@ -980,6 +971,10 @@ static int aie2_set_state(struct amdxdna_client
>>> *client,
>>> if (!drm_dev_enter(&xdna->ddev, &idx))
>>> return -ENODEV;
>>> + ret = amdxdna_pm_resume_get(xdna);
>>> + if (ret)
>>> + goto dev_exit;
>>> +
>>> switch (args->param) {
>>> case DRM_AMDXDNA_SET_POWER_MODE:
>>> ret = aie2_set_power_mode(client, args);
>>> @@ -990,6 +985,8 @@ static int aie2_set_state(struct amdxdna_client
>>> *client,
>>> break;
>>> }
>>> + amdxdna_pm_suspend_put(xdna);
>>> +dev_exit:
>>> drm_dev_exit(idx);
>>> return ret;
>>> }
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/
>>> amdxdna/aie2_pci.h
>>> index 91a8e948f82a..289a23ecd5f1 100644
>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>> @@ -272,7 +272,8 @@ int aie2_map_host_buf(struct amdxdna_dev_hdl
>>> *ndev, u32 context_id, u64 addr, u6
>>> int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user
>>> *buf, u32 size, u32 *cols_filled);
>>> int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev,
>>> dma_addr_t addr, u32 size,
>>> void *handle, int (*cb)(void*, void __iomem *,
>>> size_t));
>>> -int aie2_config_cu(struct amdxdna_hwctx *hwctx);
>>> +int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>>> + int (*notify_cb)(void *, void __iomem *, size_t));
>>> int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>>> amdxdna_sched_job *job,
>>> int (*notify_cb)(void *, void __iomem *, size_t));
>>> int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
>>> diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/
>>> amdxdna/aie2_smu.c
>>> index d303701b0ded..7f292a615ed8 100644
>>> --- a/drivers/accel/amdxdna/aie2_smu.c
>>> +++ b/drivers/accel/amdxdna/aie2_smu.c
>>> @@ -11,6 +11,7 @@
>>> #include "aie2_pci.h"
>>> #include "amdxdna_pci_drv.h"
>>> +#include "amdxdna_pm.h"
>>> #define SMU_RESULT_OK 1
>>> @@ -59,12 +60,16 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev,
>>> u32 dpm_level)
>>> u32 freq;
>>> int ret;
>>> + ret = amdxdna_pm_resume_get(ndev->xdna);
>>> + if (ret)
>>> + return ret;
>>> +
>>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
>>> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
>>> if (ret) {
>>> XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
>>> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
>>> - return ret;
>>> + goto suspend_put;
>>> }
>>> ndev->npuclk_freq = freq;
>>> @@ -73,8 +78,10 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev,
>>> u32 dpm_level)
>>> if (ret) {
>>> XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
>>> ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
>>> - return ret;
>>> + goto suspend_put;
>>> }
>>> +
>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>> ndev->hclk_freq = freq;
>>> ndev->dpm_level = dpm_level;
>>> @@ -82,26 +89,35 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev,
>>> u32 dpm_level)
>>> ndev->npuclk_freq, ndev->hclk_freq);
>>> return 0;
>>> +
>>> +suspend_put:
>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>> + return ret;
>>> }
>>> int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>>> {
>>> int ret;
>>> + ret = amdxdna_pm_resume_get(ndev->xdna);
>>> + if (ret)
>>> + return ret;
>>> +
>>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL,
>>> dpm_level, NULL);
>>> if (ret) {
>>> XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
>>> dpm_level, ret);
>>> - return ret;
>>> + goto suspend_put;
>>> }
>>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL,
>>> dpm_level, NULL);
>>> if (ret) {
>>> XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
>>> dpm_level, ret);
>>> - return ret;
>>> + goto suspend_put;
>>> }
>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>> ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
>>> ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
>>> ndev->dpm_level = dpm_level;
>>> @@ -110,6 +126,10 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev,
>>> u32 dpm_level)
>>> ndev->npuclk_freq, ndev->hclk_freq);
>>> return 0;
>>> +
>>> +suspend_put:
>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>> + return ret;
>>> }
>>> int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/
>>> amdxdna/amdxdna_ctx.c
>>> index 4bfe4ef20550..c3300eff7460 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>>> @@ -161,14 +161,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>>> drm_device *dev, void *data, struct dr
>>> if (args->ext || args->ext_flags)
>>> return -EINVAL;
>>> - if (!drm_dev_enter(dev, &idx))
>>> - return -ENODEV;
>>> -
>>> hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
>>> - if (!hwctx) {
>>> - ret = -ENOMEM;
>>> - goto exit;
>>> - }
>>> + if (!hwctx)
>>> + return -ENOMEM;
>>> if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p),
>>> sizeof(hwctx->qos))) {
>>> XDNA_ERR(xdna, "Access QoS info failed");
>>> @@ -181,30 +176,36 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>>> drm_device *dev, void *data, struct dr
>>> hwctx->num_tiles = args->num_tiles;
>>> hwctx->mem_size = args->mem_size;
>>> hwctx->max_opc = args->max_opc;
>>> - ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
>>> - XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1,
>>> MAX_HWCTX_ID),
>>> - &client->next_hwctxid, GFP_KERNEL);
>>> - if (ret < 0) {
>>> - XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
>>> +
>>> + guard(mutex)(&xdna->dev_lock);
>>> +
>>> + if (!drm_dev_enter(dev, &idx)) {
>>> + ret = -ENODEV;
>>> goto free_hwctx;
>>> }
>>> - hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client-
>>> >pid, hwctx->id);
>>> + ret = xdna->dev_info->ops->hwctx_init(hwctx);
>>> + if (ret) {
>>> + XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
>>> + goto dev_exit;
>>> + }
>>> +
>>> + hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid,
>>> hwctx->fw_ctx_id);
>>> if (!hwctx->name) {
>>> ret = -ENOMEM;
>>> - goto rm_id;
>>> + goto fini_hwctx;
>>> }
>>> - mutex_lock(&xdna->dev_lock);
>>> - ret = xdna->dev_info->ops->hwctx_init(hwctx);
>>> - if (ret) {
>>> - mutex_unlock(&xdna->dev_lock);
>>> - XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
>>> + ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
>>> + XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1,
>>> MAX_HWCTX_ID),
>>> + &client->next_hwctxid, GFP_KERNEL);
>>> + if (ret < 0) {
>>> + XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
>>> goto free_name;
>>> }
>>> +
>>> args->handle = hwctx->id;
>>> args->syncobj_handle = hwctx->syncobj_hdl;
>>> - mutex_unlock(&xdna->dev_lock);
>>> atomic64_set(&hwctx->job_submit_cnt, 0);
>>> atomic64_set(&hwctx->job_free_cnt, 0);
>>> @@ -214,12 +215,12 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>>> drm_device *dev, void *data, struct dr
>>> free_name:
>>> kfree(hwctx->name);
>>> -rm_id:
>>> - xa_erase(&client->hwctx_xa, hwctx->id);
>>> +fini_hwctx:
>>> + xdna->dev_info->ops->hwctx_fini(hwctx);
>>> +dev_exit:
>>> + drm_dev_exit(idx);
>>> free_hwctx:
>>> kfree(hwctx);
>>> -exit:
>>> - drm_dev_exit(idx);
>>> return ret;
>>> }
>>> @@ -431,11 +432,6 @@ int amdxdna_cmd_submit(struct amdxdna_client
>>> *client,
>>> goto unlock_srcu;
>>> }
>>> - if (hwctx->status != HWCTX_STAT_READY) {
>>> - XDNA_ERR(xdna, "HW Context is not ready");
>>> - ret = -EINVAL;
>>> - goto unlock_srcu;
>>> - }
>>> job->hwctx = hwctx;
>>> job->mm = current->mm;
>>> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/
>>> amdxdna/amdxdna_mailbox.c
>>> index da1ac89bb78f..24258dcc18eb 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
>>> @@ -194,7 +194,8 @@ static void mailbox_release_msg(struct
>>> mailbox_channel *mb_chann,
>>> {
>>> MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
>>> mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
>>> - mb_msg->notify_cb(mb_msg->handle, NULL, 0);
>>> + if (mb_msg->notify_cb)
>>> + mb_msg->notify_cb(mb_msg->handle, NULL, 0);
>>> kfree(mb_msg);
>>> }
>>> @@ -248,7 +249,7 @@ mailbox_get_resp(struct mailbox_channel
>>> *mb_chann, struct xdna_msg_header *heade
>>> {
>>> struct mailbox_msg *mb_msg;
>>> int msg_id;
>>> - int ret;
>>> + int ret = 0;
>>> msg_id = header->id;
>>> if (!mailbox_validate_msgid(msg_id)) {
>>> @@ -265,9 +266,11 @@ mailbox_get_resp(struct mailbox_channel
>>> *mb_chann, struct xdna_msg_header *heade
>>> MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
>>> header->opcode, header->total_size, header->id);
>>> - ret = mb_msg->notify_cb(mb_msg->handle, data, header->total_size);
>>> - if (unlikely(ret))
>>> - MB_ERR(mb_chann, "Message callback ret %d", ret);
>>> + if (mb_msg->notify_cb) {
>>> + ret = mb_msg->notify_cb(mb_msg->handle, data, header-
>>> >total_size);
>>> + if (unlikely(ret))
>>> + MB_ERR(mb_chann, "Message callback ret %d", ret);
>>> + }
>>> kfree(mb_msg);
>>> return ret;
>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/
>>> amdxdna/amdxdna_pci_drv.c
>>> index 569cd703729d..aa04452310e5 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>> @@ -13,13 +13,11 @@
>>> #include <drm/gpu_scheduler.h>
>>> #include <linux/iommu.h>
>>> #include <linux/pci.h>
>>> -#include <linux/pm_runtime.h>
>>> #include "amdxdna_ctx.h"
>>> #include "amdxdna_gem.h"
>>> #include "amdxdna_pci_drv.h"
>>> -
>>> -#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
>>> +#include "amdxdna_pm.h"
>>> MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
>>> MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
>>> @@ -61,17 +59,9 @@ static int amdxdna_drm_open(struct drm_device
>>> *ddev, struct drm_file *filp)
>>> struct amdxdna_client *client;
>>> int ret;
>>> - ret = pm_runtime_resume_and_get(ddev->dev);
>>> - if (ret) {
>>> - XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret);
>>> - return ret;
>>> - }
>>> -
>>> client = kzalloc(sizeof(*client), GFP_KERNEL);
>>> - if (!client) {
>>> - ret = -ENOMEM;
>>> - goto put_rpm;
>>> - }
>>> + if (!client)
>>> + return -ENOMEM;
>>> client->pid = pid_nr(rcu_access_pointer(filp->pid));
>>> client->xdna = xdna;
>>> @@ -106,9 +96,6 @@ static int amdxdna_drm_open(struct drm_device
>>> *ddev, struct drm_file *filp)
>>> iommu_sva_unbind_device(client->sva);
>>> failed:
>>> kfree(client);
>>> -put_rpm:
>>> - pm_runtime_mark_last_busy(ddev->dev);
>>> - pm_runtime_put_autosuspend(ddev->dev);
>>> return ret;
>>> }
>>> @@ -130,8 +117,6 @@ static void amdxdna_drm_close(struct drm_device
>>> *ddev, struct drm_file *filp)
>>> XDNA_DBG(xdna, "pid %d closed", client->pid);
>>> kfree(client);
>>> - pm_runtime_mark_last_busy(ddev->dev);
>>> - pm_runtime_put_autosuspend(ddev->dev);
>>> }
>>> static int amdxdna_flush(struct file *f, fl_owner_t id)
>>> @@ -310,19 +295,12 @@ static int amdxdna_probe(struct pci_dev *pdev,
>>> const struct pci_device_id *id)
>>> goto failed_dev_fini;
>>> }
>>> - pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
>>> - pm_runtime_use_autosuspend(dev);
>>> - pm_runtime_allow(dev);
>>> -
>>> ret = drm_dev_register(&xdna->ddev, 0);
>>> if (ret) {
>>> XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
>>> - pm_runtime_forbid(dev);
>>> goto failed_sysfs_fini;
>>> }
>>> - pm_runtime_mark_last_busy(dev);
>>> - pm_runtime_put_autosuspend(dev);
>>> return 0;
>>> failed_sysfs_fini:
>>> @@ -339,14 +317,10 @@ static int amdxdna_probe(struct pci_dev *pdev,
>>> const struct pci_device_id *id)
>>> static void amdxdna_remove(struct pci_dev *pdev)
>>> {
>>> struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
>>> - struct device *dev = &pdev->dev;
>>> struct amdxdna_client *client;
>>> destroy_workqueue(xdna->notifier_wq);
>>> - pm_runtime_get_noresume(dev);
>>> - pm_runtime_forbid(dev);
>>> -
>>> drm_dev_unplug(&xdna->ddev);
>>> amdxdna_sysfs_fini(xdna);
>>> @@ -365,29 +339,9 @@ static void amdxdna_remove(struct pci_dev *pdev)
>>> mutex_unlock(&xdna->dev_lock);
>>> }
>>> -static int amdxdna_pmops_suspend(struct device *dev)
>>> -{
>>> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>>> -
>>> - if (!xdna->dev_info->ops->suspend)
>>> - return -EOPNOTSUPP;
>>> -
>>> - return xdna->dev_info->ops->suspend(xdna);
>>> -}
>>> -
>>> -static int amdxdna_pmops_resume(struct device *dev)
>>> -{
>>> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>>> -
>>> - if (!xdna->dev_info->ops->resume)
>>> - return -EOPNOTSUPP;
>>> -
>>> - return xdna->dev_info->ops->resume(xdna);
>>> -}
>>> -
>>> static const struct dev_pm_ops amdxdna_pm_ops = {
>>> - SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
>>> - RUNTIME_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume, NULL)
>>> + SYSTEM_SLEEP_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume)
>>> + RUNTIME_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume, NULL)
>>> };
>>> static struct pci_driver amdxdna_pci_driver = {
>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/
>>> amdxdna/amdxdna_pci_drv.h
>>> index 72d6696d49da..626beebf730e 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>> @@ -6,6 +6,7 @@
>>> #ifndef _AMDXDNA_PCI_DRV_H_
>>> #define _AMDXDNA_PCI_DRV_H_
>>> +#include <drm/drm_print.h>
>>
>> This seems like a spurious header inclusion. It shouldn't be needed
>> for adding a bool to the struct.
>>
>>
>>> #include <linux/workqueue.h>
>>> #include <linux/xarray.h>
>>> @@ -99,6 +100,7 @@ struct amdxdna_dev {
>>> struct amdxdna_fw_ver fw_ver;
>>> struct rw_semaphore notifier_lock; /* for mmu notifier*/
>>> struct workqueue_struct *notifier_wq;
>>> + bool rpm_on;
>>
>> I'm wondering do you really need a new variable to track the runtime
>> PM status? Can't you just use pm_runtime_active() and the appropriate
>> locking when checking it?
>>
Just make sure you didn't miss the two above comments when scanning the
email response since you didn't reply them.
>>> };
>>> /*
>>> diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/
>>> amdxdna/amdxdna_pm.c
>>> new file mode 100644
>>> index 000000000000..fad14f60b99b
>>> --- /dev/null
>>> +++ b/drivers/accel/amdxdna/amdxdna_pm.c
>>> @@ -0,0 +1,96 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * Copyright (C) 2025, Advanced Micro Devices, Inc.
>>> + */
>>> +
>>> +#include <drm/amdxdna_accel.h>
>>> +#include <drm/drm_drv.h>
>>> +#include <linux/pm_runtime.h>
>>> +
>>> +#include "amdxdna_pm.h"
>>> +
>>> +#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
>>> +
>>> +int amdxdna_pm_suspend(struct device *dev)
>>> +{
>>> + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
>>> + int ret = -EOPNOTSUPP;
>>> + bool rpm;
>>> +
>>> + if (xdna->dev_info->ops->suspend) {
>>> + rpm = xdna->rpm_on;
>>> + xdna->rpm_on = false;
>>> + ret = xdna->dev_info->ops->suspend(xdna);
>>> + xdna->rpm_on = rpm;
>>> + }
>>> +
>>> + XDNA_DBG(xdna, "Suspend done ret %d", ret);
>>> + return ret;
>>> +}
>>> +
>>> +int amdxdna_pm_resume(struct device *dev)
>>> +{
>>> + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
>>> + int ret = -EOPNOTSUPP;
>>> + bool rpm;
>>> +
>>> + if (xdna->dev_info->ops->resume) {
>>> + rpm = xdna->rpm_on;
>>> + xdna->rpm_on = false;
>>> + ret = xdna->dev_info->ops->resume(xdna);
>>> + xdna->rpm_on = rpm;
>>> + }
>>> +
>>> + XDNA_DBG(xdna, "Resume done ret %d", ret);
>>> + return ret;
>>> +}
>>> +
>>> +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna)
>>> +{
>>> + struct device *dev = xdna->ddev.dev;
>>> + int ret;
>>> +
>>> + if (!xdna->rpm_on)
>>> + return 0;
>>> +
>>> + ret = pm_runtime_resume_and_get(dev);
>>> + if (ret) {
>>> + XDNA_ERR(xdna, "Resume failed: %d", ret);
>>> + pm_runtime_set_suspended(dev);
>>> + }
>>> +
>>> + return ret;
>>> +}
>>> +
>>> +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna)
>>> +{
>>> + struct device *dev = xdna->ddev.dev;
>>> +
>>> + if (!xdna->rpm_on)
>>> + return;
>>> +
>>> + pm_runtime_mark_last_busy(dev);
>>> + pm_runtime_put_autosuspend(dev);
>>> +}
>>> +
>>> +void amdxdna_rpm_init(struct amdxdna_dev *xdna)
>>> +{
>>> + struct device *dev = xdna->ddev.dev;
>>> +
>>> + pm_runtime_set_active(dev);
>>> + pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
>>> + pm_runtime_use_autosuspend(dev);
>>> + pm_runtime_allow(dev);
>>> + pm_runtime_mark_last_busy(dev);
>>> + pm_runtime_put_autosuspend(dev);
>>> + xdna->rpm_on = true;
>>> +}
>>> +
>>> +void amdxdna_rpm_fini(struct amdxdna_dev *xdna)
>>> +{
>>> + struct device *dev = xdna->ddev.dev;
>>> +
>>> + xdna->rpm_on = false;
>>> + pm_runtime_get_noresume(dev);
>>> + pm_runtime_forbid(dev);
>>> +}
>>> diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/
>>> amdxdna/amdxdna_pm.h
>>> new file mode 100644
>>> index 000000000000..11c295b6d64a
>>> --- /dev/null
>>> +++ b/drivers/accel/amdxdna/amdxdna_pm.h
>>> @@ -0,0 +1,18 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +/*
>>> + * Copyright (C) 2025, Advanced Micro Devices, Inc.
>>> + */
>>> +
>>> +#ifndef _AMDXDNA_PM_H_
>>> +#define _AMDXDNA_PM_H_
>>> +
>>> +#include "amdxdna_pci_drv.h"
>>> +
>>> +int amdxdna_pm_suspend(struct device *dev);
>>> +int amdxdna_pm_resume(struct device *dev);
>>> +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna);
>>> +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna);
>>> +void amdxdna_rpm_init(struct amdxdna_dev *xdna);
>>> +void amdxdna_rpm_fini(struct amdxdna_dev *xdna);
>>
>> Nit: Why rpm for init/fini and pm for all the others? Wouldn't it
>> make sense to pick one or the other?
>
> Sure. I will change to _pm_init and _pm_fini.
>
> Thanks,
>
> Lizhi
>
>>
>>> +
>>> +#endif /* _AMDXDNA_PM_H_ */
>>
On 9/18/25 10:43, Mario Limonciello wrote:
>
>
> On 9/18/2025 12:41 PM, Lizhi Hou wrote:
>>
>> On 9/18/25 10:31, Mario Limonciello wrote:
>>>
>>>
>>> On 9/18/2025 12:24 PM, Lizhi Hou wrote:
>>>> Currently, pm_runtime_resume_and_get() is invoked in the driver's open
>>>> callback, and pm_runtime_put_autosuspend() is called in the close
>>>> callback. As a result, the device remains active whenever an
>>>> application
>>>> opens it, even if no I/O is performed, leading to unnecessary power
>>>> consumption.
>>>>
>>>> Move the runtime PM calls to the AIE2 callbacks that actually interact
>>>> with the hardware. The device will automatically suspend after 5
>>>> seconds
>>>> of inactivity (no hardware accesses and no pending commands), and
>>>> it will
>>>> be resumed on the next hardware access.
>>>>
>>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>>> ---
>>>> drivers/accel/amdxdna/Makefile | 1 +
>>>> drivers/accel/amdxdna/aie2_ctx.c | 42 ++++++++---
>>>> drivers/accel/amdxdna/aie2_message.c | 28 ++++----
>>>> drivers/accel/amdxdna/aie2_pci.c | 79 ++++++++++----------
>>>> drivers/accel/amdxdna/aie2_pci.h | 3 +-
>>>> drivers/accel/amdxdna/aie2_smu.c | 28 ++++++--
>>>> drivers/accel/amdxdna/amdxdna_ctx.c | 54 +++++++-------
>>>> drivers/accel/amdxdna/amdxdna_mailbox.c | 13 ++--
>>>> drivers/accel/amdxdna/amdxdna_pci_drv.c | 56 ++-------------
>>>> drivers/accel/amdxdna/amdxdna_pci_drv.h | 2 +
>>>> drivers/accel/amdxdna/amdxdna_pm.c | 96
>>>> +++++++++++++++++++++++++
>>>> drivers/accel/amdxdna/amdxdna_pm.h | 18 +++++
>>>> 12 files changed, 262 insertions(+), 158 deletions(-)
>>>> create mode 100644 drivers/accel/amdxdna/amdxdna_pm.c
>>>> create mode 100644 drivers/accel/amdxdna/amdxdna_pm.h
>>>>
>>>> diff --git a/drivers/accel/amdxdna/Makefile
>>>> b/drivers/accel/amdxdna/ Makefile
>>>> index 6797dac65efa..6344aaf523fa 100644
>>>> --- a/drivers/accel/amdxdna/Makefile
>>>> +++ b/drivers/accel/amdxdna/Makefile
>>>> @@ -14,6 +14,7 @@ amdxdna-y := \
>>>> amdxdna_mailbox.o \
>>>> amdxdna_mailbox_helper.o \
>>>> amdxdna_pci_drv.o \
>>>> + amdxdna_pm.o \
>>>> amdxdna_sysfs.o \
>>>> amdxdna_ubuf.o \
>>>> npu1_regs.o \
>>>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/
>>>> amdxdna/aie2_ctx.c
>>>> index e9f9b1fa5dc1..691fdb3b008f 100644
>>>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>>>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>>>> @@ -21,6 +21,7 @@
>>>> #include "amdxdna_gem.h"
>>>> #include "amdxdna_mailbox.h"
>>>> #include "amdxdna_pci_drv.h"
>>>> +#include "amdxdna_pm.h"
>>>> static bool force_cmdlist;
>>>> module_param(force_cmdlist, bool, 0600);
>>>> @@ -88,7 +89,7 @@ static int aie2_hwctx_restart(struct amdxdna_dev
>>>> *xdna, struct amdxdna_hwctx *hw
>>>> goto out;
>>>> }
>>>> - ret = aie2_config_cu(hwctx);
>>>> + ret = aie2_config_cu(hwctx, NULL);
>>>> if (ret) {
>>>> XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
>>>> goto out;
>>>> @@ -167,14 +168,11 @@ static int aie2_hwctx_resume_cb(struct
>>>> amdxdna_hwctx *hwctx, void *arg)
>>>> int aie2_hwctx_resume(struct amdxdna_client *client)
>>>> {
>>>> - struct amdxdna_dev *xdna = client->xdna;
>>>> -
>>>> /*
>>>> * The resume path cannot guarantee that mailbox channel can be
>>>> * regenerated. If this happen, when submit message to this
>>>> * mailbox channel, error will return.
>>>> */
>>>> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>>> return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
>>>> }
>>>> @@ -184,6 +182,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
>>>> struct dma_fence *fence = job->fence;
>>>> trace_xdna_job(&job->base, job->hwctx->name, "signaled
>>>> fence", job->seq);
>>>> +
>>>> + amdxdna_pm_suspend_put(job->hwctx->client->xdna);
>>>> job->hwctx->priv->completed++;
>>>> dma_fence_signal(fence);
>>>> @@ -531,7 +531,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>>> .num_rqs = DRM_SCHED_PRIORITY_COUNT,
>>>> .credit_limit = HWCTX_MAX_CMDS,
>>>> .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
>>>> - .name = hwctx->name,
>>>> + .name = "amdxdna_js",
>>>> .dev = xdna->ddev.dev,
>>>> };
>>>> struct drm_gpu_scheduler *sched;
>>>> @@ -697,6 +697,14 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
>>>> kfree(hwctx->cus);
>>>> }
>>>> +static int aie2_config_cu_resp_handler(void *handle, void
>>>> __iomem *data, size_t size)
>>>> +{
>>>> + struct amdxdna_hwctx *hwctx = handle;
>>>> +
>>>> + amdxdna_pm_suspend_put(hwctx->client->xdna);
>>>> + return 0;
>>>> +}
>>>> +
>>>> static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void
>>>> *buf, u32 size)
>>>> {
>>>> struct amdxdna_hwctx_param_config_cu *config = buf;
>>>> @@ -728,10 +736,14 @@ static int aie2_hwctx_cu_config(struct
>>>> amdxdna_hwctx *hwctx, void *buf, u32 size
>>>> if (!hwctx->cus)
>>>> return -ENOMEM;
>>>> - ret = aie2_config_cu(hwctx);
>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>> + if (ret)
>>>> + goto free_cus;
>>>> +
>>>> + ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
>>>> if (ret) {
>>>> XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
>>>> - goto free_cus;
>>>> + goto pm_suspend_put;
>>>> }
>>>> wmb(); /* To avoid locking in command submit when check
>>>> status */
>>>> @@ -739,6 +751,8 @@ static int aie2_hwctx_cu_config(struct
>>>> amdxdna_hwctx *hwctx, void *buf, u32 size
>>>> return 0;
>>>> +pm_suspend_put:
>>>> + amdxdna_pm_suspend_put(xdna);
>>>> free_cus:
>>>> kfree(hwctx->cus);
>>>> hwctx->cus = NULL;
>>>> @@ -862,11 +876,15 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>>> *hwctx, struct amdxdna_sched_job *job,
>>>> goto free_chain;
>>>> }
>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>> + if (ret)
>>>> + goto cleanup_job;
>>>> +
>>>> retry:
>>>> ret = drm_gem_lock_reservations(job->bos, job->bo_cnt,
>>>> &acquire_ctx);
>>>> if (ret) {
>>>> XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
>>>> - goto cleanup_job;
>>>> + goto suspend_put;
>>>> }
>>>> for (i = 0; i < job->bo_cnt; i++) {
>>>> @@ -874,7 +892,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>>> *hwctx, struct amdxdna_sched_job *job,
>>>> if (ret) {
>>>> XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
>>>> drm_gem_unlock_reservations(job->bos, job->bo_cnt,
>>>> &acquire_ctx);
>>>> - goto cleanup_job;
>>>> + goto suspend_put;
>>>> }
>>>> }
>>>> @@ -889,12 +907,12 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>>> *hwctx, struct amdxdna_sched_job *job,
>>>> msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
>>>> } else if (time_after(jiffies, timeout)) {
>>>> ret = -ETIME;
>>>> - goto cleanup_job;
>>>> + goto suspend_put;
>>>> }
>>>> ret = aie2_populate_range(abo);
>>>> if (ret)
>>>> - goto cleanup_job;
>>>> + goto suspend_put;
>>>> goto retry;
>>>> }
>>>> }
>>>> @@ -920,6 +938,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>>> *hwctx, struct amdxdna_sched_job *job,
>>>> return 0;
>>>> +suspend_put:
>>>> + amdxdna_pm_suspend_put(xdna);
>>>> cleanup_job:
>>>> drm_sched_job_cleanup(&job->base);
>>>> free_chain:
>>>> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/
>>>> amdxdna/aie2_message.c
>>>> index 9caad083543d..4660e8297ed8 100644
>>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>>> @@ -37,7 +37,7 @@ static int aie2_send_mgmt_msg_wait(struct
>>>> amdxdna_dev_hdl *ndev,
>>>> if (!ndev->mgmt_chann)
>>>> return -ENODEV;
>>>> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>>> + drm_WARN_ON(&xdna->ddev, xdna->rpm_on &&
>>>> !mutex_is_locked(&xdna- >dev_lock));
>>>> ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
>>>> if (ret == -ETIME) {
>>>> xdna_mailbox_stop_channel(ndev->mgmt_chann);
>>>> @@ -377,15 +377,17 @@ int aie2_register_asyn_event_msg(struct
>>>> amdxdna_dev_hdl *ndev, dma_addr_t addr,
>>>> return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg,
>>>> TX_TIMEOUT);
>>>> }
>>>> -int aie2_config_cu(struct amdxdna_hwctx *hwctx)
>>>> +int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>>>> + int (*notify_cb)(void *, void __iomem *, size_t))
>>>> {
>>>> struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>>>> struct amdxdna_dev *xdna = hwctx->client->xdna;
>>>> u32 shift = xdna->dev_info->dev_mem_buf_shift;
>>>> - DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
>>>> + struct config_cu_req req = { 0 };
>>>> + struct xdna_mailbox_msg msg;
>>>> struct drm_gem_object *gobj;
>>>> struct amdxdna_gem_obj *abo;
>>>> - int ret, i;
>>>> + int i;
>>>> if (!chann)
>>>> return -ENODEV;
>>>> @@ -423,18 +425,12 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx)
>>>> }
>>>> req.num_cus = hwctx->cus->num_cus;
>>>> - ret = xdna_send_msg_wait(xdna, chann, &msg);
>>>> - if (ret == -ETIME)
>>>> - aie2_destroy_context(xdna->dev_handle, hwctx);
>>>> -
>>>> - if (resp.status == AIE2_STATUS_SUCCESS) {
>>>> - XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret);
>>>> - return 0;
>>>> - }
>>>> -
>>>> - XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d",
>>>> - msg.opcode, resp.status, ret);
>>>> - return ret;
>>>> + msg.send_data = (u8 *)&req;
>>>> + msg.send_size = sizeof(req);
>>>> + msg.handle = hwctx;
>>>> + msg.opcode = MSG_OP_CONFIG_CU;
>>>> + msg.notify_cb = notify_cb;
>>>> + return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>>>> }
>>>> int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>>>> amdxdna_sched_job *job,
>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/
>>>> amdxdna/aie2_pci.c
>>>> index 6e39c769bb6d..f46a3dcd0580 100644
>>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>>> @@ -25,6 +25,7 @@
>>>> #include "amdxdna_gem.h"
>>>> #include "amdxdna_mailbox.h"
>>>> #include "amdxdna_pci_drv.h"
>>>> +#include "amdxdna_pm.h"
>>>> static int aie2_max_col = XRS_MAX_COL;
>>>> module_param(aie2_max_col, uint, 0600);
>>>> @@ -223,15 +224,6 @@ static int aie2_mgmt_fw_init(struct
>>>> amdxdna_dev_hdl *ndev)
>>>> return ret;
>>>> }
>>>> - if (!ndev->async_events)
>>>> - return 0;
>>>> -
>>>> - ret = aie2_error_async_events_send(ndev);
>>>> - if (ret) {
>>>> - XDNA_ERR(ndev->xdna, "Send async events failed");
>>>> - return ret;
>>>> - }
>>>> -
>>>> return 0;
>>>> }
>>>> @@ -257,6 +249,8 @@ static int aie2_mgmt_fw_query(struct
>>>> amdxdna_dev_hdl *ndev)
>>>> return ret;
>>>> }
>>>> + ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>>>> +
>>>> return 0;
>>>> }
>>>> @@ -338,6 +332,7 @@ static void aie2_hw_stop(struct amdxdna_dev
>>>> *xdna)
>>>> ndev->mbox = NULL;
>>>> aie2_psp_stop(ndev->psp_hdl);
>>>> aie2_smu_fini(ndev);
>>>> + aie2_error_async_events_free(ndev);
>>>> pci_disable_device(pdev);
>>>> ndev->dev_status = AIE2_DEV_INIT;
>>>> @@ -424,6 +419,18 @@ static int aie2_hw_start(struct amdxdna_dev
>>>> *xdna)
>>>> goto destroy_mgmt_chann;
>>>> }
>>>> + ret = aie2_mgmt_fw_query(ndev);
>>>> + if (ret) {
>>>> + XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
>>>> + goto destroy_mgmt_chann;
>>>> + }
>>>> +
>>>> + ret = aie2_error_async_events_alloc(ndev);
>>>> + if (ret) {
>>>> + XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
>>>> + goto destroy_mgmt_chann;
>>>> + }
>>>> +
>>>> ndev->dev_status = AIE2_DEV_START;
>>>> return 0;
>>>> @@ -459,7 +466,6 @@ static int aie2_hw_resume(struct amdxdna_dev
>>>> *xdna)
>>>> struct amdxdna_client *client;
>>>> int ret;
>>>> - guard(mutex)(&xdna->dev_lock);
>>>> ret = aie2_hw_start(xdna);
>>>> if (ret) {
>>>> XDNA_ERR(xdna, "Start hardware failed, %d", ret);
>>>> @@ -565,13 +571,6 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>>> goto release_fw;
>>>> }
>>>> - ret = aie2_mgmt_fw_query(ndev);
>>>> - if (ret) {
>>>> - XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
>>>> - goto stop_hw;
>>>> - }
>>>> - ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>>>> -
>>>> xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
>>>> for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
>>>> xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv-
>>>> >dpm_clk_tbl[i].hclk;
>>>> @@ -587,30 +586,10 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>>> goto stop_hw;
>>>> }
>>>> - ret = aie2_error_async_events_alloc(ndev);
>>>> - if (ret) {
>>>> - XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
>>>> - goto stop_hw;
>>>> - }
>>>> -
>>>> - ret = aie2_error_async_events_send(ndev);
>>>> - if (ret) {
>>>> - XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
>>>> - goto async_event_free;
>>>> - }
>>>> -
>>>> - /* Issue a command to make sure firmware handled async events */
>>>> - ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
>>>> - if (ret) {
>>>> - XDNA_ERR(xdna, "Re-query firmware version failed");
>>>> - goto async_event_free;
>>>> - }
>>>> -
>>>> release_firmware(fw);
>>>> + amdxdna_rpm_init(xdna);
>>>> return 0;
>>>> -async_event_free:
>>>> - aie2_error_async_events_free(ndev);
>>>> stop_hw:
>>>> aie2_hw_stop(xdna);
>>>> release_fw:
>>>> @@ -621,10 +600,8 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>>> static void aie2_fini(struct amdxdna_dev *xdna)
>>>> {
>>>> - struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>>>> -
>>>> + amdxdna_rpm_fini(xdna);
>>>> aie2_hw_stop(xdna);
>>>> - aie2_error_async_events_free(ndev);
>>>> }
>>>> static int aie2_get_aie_status(struct amdxdna_client *client,
>>>> @@ -856,6 +833,10 @@ static int aie2_get_info(struct amdxdna_client
>>>> *client, struct amdxdna_drm_get_i
>>>> if (!drm_dev_enter(&xdna->ddev, &idx))
>>>> return -ENODEV;
>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>> + if (ret)
>>>> + goto dev_exit;
>>>> +
>>>> switch (args->param) {
>>>> case DRM_AMDXDNA_QUERY_AIE_STATUS:
>>>> ret = aie2_get_aie_status(client, args);
>>>> @@ -882,8 +863,11 @@ static int aie2_get_info(struct amdxdna_client
>>>> *client, struct amdxdna_drm_get_i
>>>> XDNA_ERR(xdna, "Not supported request parameter %u",
>>>> args- >param);
>>>> ret = -EOPNOTSUPP;
>>>> }
>>>> +
>>>> + amdxdna_pm_suspend_put(xdna);
>>>> XDNA_DBG(xdna, "Got param %d", args->param);
>>>> +dev_exit:
>>>> drm_dev_exit(idx);
>>>> return ret;
>>>> }
>>>> @@ -932,6 +916,10 @@ static int aie2_get_array(struct
>>>> amdxdna_client *client,
>>>> if (!drm_dev_enter(&xdna->ddev, &idx))
>>>> return -ENODEV;
>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>> + if (ret)
>>>> + goto dev_exit;
>>>> +
>>>> switch (args->param) {
>>>> case DRM_AMDXDNA_HW_CONTEXT_ALL:
>>>> ret = aie2_query_ctx_status_array(client, args);
>>>> @@ -940,8 +928,11 @@ static int aie2_get_array(struct
>>>> amdxdna_client *client,
>>>> XDNA_ERR(xdna, "Not supported request parameter %u",
>>>> args- >param);
>>>> ret = -EOPNOTSUPP;
>>>> }
>>>> +
>>>> + amdxdna_pm_suspend_put(xdna);
>>>> XDNA_DBG(xdna, "Got param %d", args->param);
>>>> +dev_exit:
>>>> drm_dev_exit(idx);
>>>> return ret;
>>>> }
>>>> @@ -980,6 +971,10 @@ static int aie2_set_state(struct
>>>> amdxdna_client *client,
>>>> if (!drm_dev_enter(&xdna->ddev, &idx))
>>>> return -ENODEV;
>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>> + if (ret)
>>>> + goto dev_exit;
>>>> +
>>>> switch (args->param) {
>>>> case DRM_AMDXDNA_SET_POWER_MODE:
>>>> ret = aie2_set_power_mode(client, args);
>>>> @@ -990,6 +985,8 @@ static int aie2_set_state(struct amdxdna_client
>>>> *client,
>>>> break;
>>>> }
>>>> + amdxdna_pm_suspend_put(xdna);
>>>> +dev_exit:
>>>> drm_dev_exit(idx);
>>>> return ret;
>>>> }
>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/
>>>> amdxdna/aie2_pci.h
>>>> index 91a8e948f82a..289a23ecd5f1 100644
>>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>>> @@ -272,7 +272,8 @@ int aie2_map_host_buf(struct amdxdna_dev_hdl
>>>> *ndev, u32 context_id, u64 addr, u6
>>>> int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user
>>>> *buf, u32 size, u32 *cols_filled);
>>>> int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev,
>>>> dma_addr_t addr, u32 size,
>>>> void *handle, int (*cb)(void*, void __iomem *,
>>>> size_t));
>>>> -int aie2_config_cu(struct amdxdna_hwctx *hwctx);
>>>> +int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>>>> + int (*notify_cb)(void *, void __iomem *, size_t));
>>>> int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>>>> amdxdna_sched_job *job,
>>>> int (*notify_cb)(void *, void __iomem *, size_t));
>>>> int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
>>>> diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/
>>>> amdxdna/aie2_smu.c
>>>> index d303701b0ded..7f292a615ed8 100644
>>>> --- a/drivers/accel/amdxdna/aie2_smu.c
>>>> +++ b/drivers/accel/amdxdna/aie2_smu.c
>>>> @@ -11,6 +11,7 @@
>>>> #include "aie2_pci.h"
>>>> #include "amdxdna_pci_drv.h"
>>>> +#include "amdxdna_pm.h"
>>>> #define SMU_RESULT_OK 1
>>>> @@ -59,12 +60,16 @@ int npu1_set_dpm(struct amdxdna_dev_hdl
>>>> *ndev, u32 dpm_level)
>>>> u32 freq;
>>>> int ret;
>>>> + ret = amdxdna_pm_resume_get(ndev->xdna);
>>>> + if (ret)
>>>> + return ret;
>>>> +
>>>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
>>>> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
>>>> if (ret) {
>>>> XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
>>>> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
>>>> - return ret;
>>>> + goto suspend_put;
>>>> }
>>>> ndev->npuclk_freq = freq;
>>>> @@ -73,8 +78,10 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev,
>>>> u32 dpm_level)
>>>> if (ret) {
>>>> XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
>>>> ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
>>>> - return ret;
>>>> + goto suspend_put;
>>>> }
>>>> +
>>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>>> ndev->hclk_freq = freq;
>>>> ndev->dpm_level = dpm_level;
>>>> @@ -82,26 +89,35 @@ int npu1_set_dpm(struct amdxdna_dev_hdl
>>>> *ndev, u32 dpm_level)
>>>> ndev->npuclk_freq, ndev->hclk_freq);
>>>> return 0;
>>>> +
>>>> +suspend_put:
>>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>>> + return ret;
>>>> }
>>>> int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>>>> {
>>>> int ret;
>>>> + ret = amdxdna_pm_resume_get(ndev->xdna);
>>>> + if (ret)
>>>> + return ret;
>>>> +
>>>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL,
>>>> dpm_level, NULL);
>>>> if (ret) {
>>>> XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret
>>>> %d ",
>>>> dpm_level, ret);
>>>> - return ret;
>>>> + goto suspend_put;
>>>> }
>>>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL,
>>>> dpm_level, NULL);
>>>> if (ret) {
>>>> XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
>>>> dpm_level, ret);
>>>> - return ret;
>>>> + goto suspend_put;
>>>> }
>>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>>> ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
>>>> ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
>>>> ndev->dpm_level = dpm_level;
>>>> @@ -110,6 +126,10 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev,
>>>> u32 dpm_level)
>>>> ndev->npuclk_freq, ndev->hclk_freq);
>>>> return 0;
>>>> +
>>>> +suspend_put:
>>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>>> + return ret;
>>>> }
>>>> int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/
>>>> amdxdna/amdxdna_ctx.c
>>>> index 4bfe4ef20550..c3300eff7460 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>>>> @@ -161,14 +161,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>>>> drm_device *dev, void *data, struct dr
>>>> if (args->ext || args->ext_flags)
>>>> return -EINVAL;
>>>> - if (!drm_dev_enter(dev, &idx))
>>>> - return -ENODEV;
>>>> -
>>>> hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
>>>> - if (!hwctx) {
>>>> - ret = -ENOMEM;
>>>> - goto exit;
>>>> - }
>>>> + if (!hwctx)
>>>> + return -ENOMEM;
>>>> if (copy_from_user(&hwctx->qos,
>>>> u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) {
>>>> XDNA_ERR(xdna, "Access QoS info failed");
>>>> @@ -181,30 +176,36 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>>>> drm_device *dev, void *data, struct dr
>>>> hwctx->num_tiles = args->num_tiles;
>>>> hwctx->mem_size = args->mem_size;
>>>> hwctx->max_opc = args->max_opc;
>>>> - ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
>>>> - XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1,
>>>> MAX_HWCTX_ID),
>>>> - &client->next_hwctxid, GFP_KERNEL);
>>>> - if (ret < 0) {
>>>> - XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
>>>> +
>>>> + guard(mutex)(&xdna->dev_lock);
>>>> +
>>>> + if (!drm_dev_enter(dev, &idx)) {
>>>> + ret = -ENODEV;
>>>> goto free_hwctx;
>>>> }
>>>> - hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client-
>>>> >pid, hwctx->id);
>>>> + ret = xdna->dev_info->ops->hwctx_init(hwctx);
>>>> + if (ret) {
>>>> + XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
>>>> + goto dev_exit;
>>>> + }
>>>> +
>>>> + hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d",
>>>> client->pid, hwctx->fw_ctx_id);
>>>> if (!hwctx->name) {
>>>> ret = -ENOMEM;
>>>> - goto rm_id;
>>>> + goto fini_hwctx;
>>>> }
>>>> - mutex_lock(&xdna->dev_lock);
>>>> - ret = xdna->dev_info->ops->hwctx_init(hwctx);
>>>> - if (ret) {
>>>> - mutex_unlock(&xdna->dev_lock);
>>>> - XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
>>>> + ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
>>>> + XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1,
>>>> MAX_HWCTX_ID),
>>>> + &client->next_hwctxid, GFP_KERNEL);
>>>> + if (ret < 0) {
>>>> + XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
>>>> goto free_name;
>>>> }
>>>> +
>>>> args->handle = hwctx->id;
>>>> args->syncobj_handle = hwctx->syncobj_hdl;
>>>> - mutex_unlock(&xdna->dev_lock);
>>>> atomic64_set(&hwctx->job_submit_cnt, 0);
>>>> atomic64_set(&hwctx->job_free_cnt, 0);
>>>> @@ -214,12 +215,12 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>>>> drm_device *dev, void *data, struct dr
>>>> free_name:
>>>> kfree(hwctx->name);
>>>> -rm_id:
>>>> - xa_erase(&client->hwctx_xa, hwctx->id);
>>>> +fini_hwctx:
>>>> + xdna->dev_info->ops->hwctx_fini(hwctx);
>>>> +dev_exit:
>>>> + drm_dev_exit(idx);
>>>> free_hwctx:
>>>> kfree(hwctx);
>>>> -exit:
>>>> - drm_dev_exit(idx);
>>>> return ret;
>>>> }
>>>> @@ -431,11 +432,6 @@ int amdxdna_cmd_submit(struct amdxdna_client
>>>> *client,
>>>> goto unlock_srcu;
>>>> }
>>>> - if (hwctx->status != HWCTX_STAT_READY) {
>>>> - XDNA_ERR(xdna, "HW Context is not ready");
>>>> - ret = -EINVAL;
>>>> - goto unlock_srcu;
>>>> - }
>>>> job->hwctx = hwctx;
>>>> job->mm = current->mm;
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>> b/drivers/accel/ amdxdna/amdxdna_mailbox.c
>>>> index da1ac89bb78f..24258dcc18eb 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>> @@ -194,7 +194,8 @@ static void mailbox_release_msg(struct
>>>> mailbox_channel *mb_chann,
>>>> {
>>>> MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
>>>> mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
>>>> - mb_msg->notify_cb(mb_msg->handle, NULL, 0);
>>>> + if (mb_msg->notify_cb)
>>>> + mb_msg->notify_cb(mb_msg->handle, NULL, 0);
>>>> kfree(mb_msg);
>>>> }
>>>> @@ -248,7 +249,7 @@ mailbox_get_resp(struct mailbox_channel
>>>> *mb_chann, struct xdna_msg_header *heade
>>>> {
>>>> struct mailbox_msg *mb_msg;
>>>> int msg_id;
>>>> - int ret;
>>>> + int ret = 0;
>>>> msg_id = header->id;
>>>> if (!mailbox_validate_msgid(msg_id)) {
>>>> @@ -265,9 +266,11 @@ mailbox_get_resp(struct mailbox_channel
>>>> *mb_chann, struct xdna_msg_header *heade
>>>> MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
>>>> header->opcode, header->total_size, header->id);
>>>> - ret = mb_msg->notify_cb(mb_msg->handle, data,
>>>> header->total_size);
>>>> - if (unlikely(ret))
>>>> - MB_ERR(mb_chann, "Message callback ret %d", ret);
>>>> + if (mb_msg->notify_cb) {
>>>> + ret = mb_msg->notify_cb(mb_msg->handle, data, header-
>>>> >total_size);
>>>> + if (unlikely(ret))
>>>> + MB_ERR(mb_chann, "Message callback ret %d", ret);
>>>> + }
>>>> kfree(mb_msg);
>>>> return ret;
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>>> b/drivers/accel/ amdxdna/amdxdna_pci_drv.c
>>>> index 569cd703729d..aa04452310e5 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>>> @@ -13,13 +13,11 @@
>>>> #include <drm/gpu_scheduler.h>
>>>> #include <linux/iommu.h>
>>>> #include <linux/pci.h>
>>>> -#include <linux/pm_runtime.h>
>>>> #include "amdxdna_ctx.h"
>>>> #include "amdxdna_gem.h"
>>>> #include "amdxdna_pci_drv.h"
>>>> -
>>>> -#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
>>>> +#include "amdxdna_pm.h"
>>>> MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
>>>> MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
>>>> @@ -61,17 +59,9 @@ static int amdxdna_drm_open(struct drm_device
>>>> *ddev, struct drm_file *filp)
>>>> struct amdxdna_client *client;
>>>> int ret;
>>>> - ret = pm_runtime_resume_and_get(ddev->dev);
>>>> - if (ret) {
>>>> - XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret);
>>>> - return ret;
>>>> - }
>>>> -
>>>> client = kzalloc(sizeof(*client), GFP_KERNEL);
>>>> - if (!client) {
>>>> - ret = -ENOMEM;
>>>> - goto put_rpm;
>>>> - }
>>>> + if (!client)
>>>> + return -ENOMEM;
>>>> client->pid = pid_nr(rcu_access_pointer(filp->pid));
>>>> client->xdna = xdna;
>>>> @@ -106,9 +96,6 @@ static int amdxdna_drm_open(struct drm_device
>>>> *ddev, struct drm_file *filp)
>>>> iommu_sva_unbind_device(client->sva);
>>>> failed:
>>>> kfree(client);
>>>> -put_rpm:
>>>> - pm_runtime_mark_last_busy(ddev->dev);
>>>> - pm_runtime_put_autosuspend(ddev->dev);
>>>> return ret;
>>>> }
>>>> @@ -130,8 +117,6 @@ static void amdxdna_drm_close(struct drm_device
>>>> *ddev, struct drm_file *filp)
>>>> XDNA_DBG(xdna, "pid %d closed", client->pid);
>>>> kfree(client);
>>>> - pm_runtime_mark_last_busy(ddev->dev);
>>>> - pm_runtime_put_autosuspend(ddev->dev);
>>>> }
>>>> static int amdxdna_flush(struct file *f, fl_owner_t id)
>>>> @@ -310,19 +295,12 @@ static int amdxdna_probe(struct pci_dev
>>>> *pdev, const struct pci_device_id *id)
>>>> goto failed_dev_fini;
>>>> }
>>>> - pm_runtime_set_autosuspend_delay(dev,
>>>> AMDXDNA_AUTOSUSPEND_DELAY);
>>>> - pm_runtime_use_autosuspend(dev);
>>>> - pm_runtime_allow(dev);
>>>> -
>>>> ret = drm_dev_register(&xdna->ddev, 0);
>>>> if (ret) {
>>>> XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
>>>> - pm_runtime_forbid(dev);
>>>> goto failed_sysfs_fini;
>>>> }
>>>> - pm_runtime_mark_last_busy(dev);
>>>> - pm_runtime_put_autosuspend(dev);
>>>> return 0;
>>>> failed_sysfs_fini:
>>>> @@ -339,14 +317,10 @@ static int amdxdna_probe(struct pci_dev
>>>> *pdev, const struct pci_device_id *id)
>>>> static void amdxdna_remove(struct pci_dev *pdev)
>>>> {
>>>> struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
>>>> - struct device *dev = &pdev->dev;
>>>> struct amdxdna_client *client;
>>>> destroy_workqueue(xdna->notifier_wq);
>>>> - pm_runtime_get_noresume(dev);
>>>> - pm_runtime_forbid(dev);
>>>> -
>>>> drm_dev_unplug(&xdna->ddev);
>>>> amdxdna_sysfs_fini(xdna);
>>>> @@ -365,29 +339,9 @@ static void amdxdna_remove(struct pci_dev
>>>> *pdev)
>>>> mutex_unlock(&xdna->dev_lock);
>>>> }
>>>> -static int amdxdna_pmops_suspend(struct device *dev)
>>>> -{
>>>> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>>>> -
>>>> - if (!xdna->dev_info->ops->suspend)
>>>> - return -EOPNOTSUPP;
>>>> -
>>>> - return xdna->dev_info->ops->suspend(xdna);
>>>> -}
>>>> -
>>>> -static int amdxdna_pmops_resume(struct device *dev)
>>>> -{
>>>> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>>>> -
>>>> - if (!xdna->dev_info->ops->resume)
>>>> - return -EOPNOTSUPP;
>>>> -
>>>> - return xdna->dev_info->ops->resume(xdna);
>>>> -}
>>>> -
>>>> static const struct dev_pm_ops amdxdna_pm_ops = {
>>>> - SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
>>>> - RUNTIME_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume, NULL)
>>>> + SYSTEM_SLEEP_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume)
>>>> + RUNTIME_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume, NULL)
>>>> };
>>>> static struct pci_driver amdxdna_pci_driver = {
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>>> b/drivers/accel/ amdxdna/amdxdna_pci_drv.h
>>>> index 72d6696d49da..626beebf730e 100644
>>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>>> @@ -6,6 +6,7 @@
>>>> #ifndef _AMDXDNA_PCI_DRV_H_
>>>> #define _AMDXDNA_PCI_DRV_H_
>>>> +#include <drm/drm_print.h>
>>>
>>> This seems like a spurious header inclusion. It shouldn't be needed
>>> for adding a bool to the struct.
>>>
>>>
>>>> #include <linux/workqueue.h>
>>>> #include <linux/xarray.h>
>>>> @@ -99,6 +100,7 @@ struct amdxdna_dev {
>>>> struct amdxdna_fw_ver fw_ver;
>>>> struct rw_semaphore notifier_lock; /* for mmu notifier*/
>>>> struct workqueue_struct *notifier_wq;
>>>> + bool rpm_on;
>>>
>>> I'm wondering do you really need a new variable to track the runtime
>>> PM status? Can't you just use pm_runtime_active() and the
>>> appropriate locking when checking it?
>>>
>
> Just make sure you didn't miss the two above comments when scanning
> the email response since you didn't reply them.
Sorry, I indeed missed this question. Thanks for your reminding.
rpm_on is used to check if suspend/resume is in-progress. So it will not
recursively call into suspend and resume. I did not see any function to
check if suspend/resume is in-progress.
Thanks,
Lizhi
>
>>>> };
>>>> /*
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/
>>>> amdxdna/amdxdna_pm.c
>>>> new file mode 100644
>>>> index 000000000000..fad14f60b99b
>>>> --- /dev/null
>>>> +++ b/drivers/accel/amdxdna/amdxdna_pm.c
>>>> @@ -0,0 +1,96 @@
>>>> +// SPDX-License-Identifier: GPL-2.0
>>>> +/*
>>>> + * Copyright (C) 2025, Advanced Micro Devices, Inc.
>>>> + */
>>>> +
>>>> +#include <drm/amdxdna_accel.h>
>>>> +#include <drm/drm_drv.h>
>>>> +#include <linux/pm_runtime.h>
>>>> +
>>>> +#include "amdxdna_pm.h"
>>>> +
>>>> +#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
>>>> +
>>>> +int amdxdna_pm_suspend(struct device *dev)
>>>> +{
>>>> + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
>>>> + int ret = -EOPNOTSUPP;
>>>> + bool rpm;
>>>> +
>>>> + if (xdna->dev_info->ops->suspend) {
>>>> + rpm = xdna->rpm_on;
>>>> + xdna->rpm_on = false;
>>>> + ret = xdna->dev_info->ops->suspend(xdna);
>>>> + xdna->rpm_on = rpm;
>>>> + }
>>>> +
>>>> + XDNA_DBG(xdna, "Suspend done ret %d", ret);
>>>> + return ret;
>>>> +}
>>>> +
>>>> +int amdxdna_pm_resume(struct device *dev)
>>>> +{
>>>> + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
>>>> + int ret = -EOPNOTSUPP;
>>>> + bool rpm;
>>>> +
>>>> + if (xdna->dev_info->ops->resume) {
>>>> + rpm = xdna->rpm_on;
>>>> + xdna->rpm_on = false;
>>>> + ret = xdna->dev_info->ops->resume(xdna);
>>>> + xdna->rpm_on = rpm;
>>>> + }
>>>> +
>>>> + XDNA_DBG(xdna, "Resume done ret %d", ret);
>>>> + return ret;
>>>> +}
>>>> +
>>>> +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna)
>>>> +{
>>>> + struct device *dev = xdna->ddev.dev;
>>>> + int ret;
>>>> +
>>>> + if (!xdna->rpm_on)
>>>> + return 0;
>>>> +
>>>> + ret = pm_runtime_resume_and_get(dev);
>>>> + if (ret) {
>>>> + XDNA_ERR(xdna, "Resume failed: %d", ret);
>>>> + pm_runtime_set_suspended(dev);
>>>> + }
>>>> +
>>>> + return ret;
>>>> +}
>>>> +
>>>> +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna)
>>>> +{
>>>> + struct device *dev = xdna->ddev.dev;
>>>> +
>>>> + if (!xdna->rpm_on)
>>>> + return;
>>>> +
>>>> + pm_runtime_mark_last_busy(dev);
>>>> + pm_runtime_put_autosuspend(dev);
>>>> +}
>>>> +
>>>> +void amdxdna_rpm_init(struct amdxdna_dev *xdna)
>>>> +{
>>>> + struct device *dev = xdna->ddev.dev;
>>>> +
>>>> + pm_runtime_set_active(dev);
>>>> + pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
>>>> + pm_runtime_use_autosuspend(dev);
>>>> + pm_runtime_allow(dev);
>>>> + pm_runtime_mark_last_busy(dev);
>>>> + pm_runtime_put_autosuspend(dev);
>>>> + xdna->rpm_on = true;
>>>> +}
>>>> +
>>>> +void amdxdna_rpm_fini(struct amdxdna_dev *xdna)
>>>> +{
>>>> + struct device *dev = xdna->ddev.dev;
>>>> +
>>>> + xdna->rpm_on = false;
>>>> + pm_runtime_get_noresume(dev);
>>>> + pm_runtime_forbid(dev);
>>>> +}
>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/
>>>> amdxdna/amdxdna_pm.h
>>>> new file mode 100644
>>>> index 000000000000..11c295b6d64a
>>>> --- /dev/null
>>>> +++ b/drivers/accel/amdxdna/amdxdna_pm.h
>>>> @@ -0,0 +1,18 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>> +/*
>>>> + * Copyright (C) 2025, Advanced Micro Devices, Inc.
>>>> + */
>>>> +
>>>> +#ifndef _AMDXDNA_PM_H_
>>>> +#define _AMDXDNA_PM_H_
>>>> +
>>>> +#include "amdxdna_pci_drv.h"
>>>> +
>>>> +int amdxdna_pm_suspend(struct device *dev);
>>>> +int amdxdna_pm_resume(struct device *dev);
>>>> +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna);
>>>> +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna);
>>>> +void amdxdna_rpm_init(struct amdxdna_dev *xdna);
>>>> +void amdxdna_rpm_fini(struct amdxdna_dev *xdna);
>>>
>>> Nit: Why rpm for init/fini and pm for all the others? Wouldn't it
>>> make sense to pick one or the other?
>>
>> Sure. I will change to _pm_init and _pm_fini.
>>
>> Thanks,
>>
>> Lizhi
>>
>>>
>>>> +
>>>> +#endif /* _AMDXDNA_PM_H_ */
>>>
>
On 9/18/2025 1:05 PM, Lizhi Hou wrote:
>
> On 9/18/25 10:43, Mario Limonciello wrote:
>>
>>
>> On 9/18/2025 12:41 PM, Lizhi Hou wrote:
>>>
>>> On 9/18/25 10:31, Mario Limonciello wrote:
>>>>
>>>>
>>>> On 9/18/2025 12:24 PM, Lizhi Hou wrote:
>>>>> Currently, pm_runtime_resume_and_get() is invoked in the driver's open
>>>>> callback, and pm_runtime_put_autosuspend() is called in the close
>>>>> callback. As a result, the device remains active whenever an
>>>>> application
>>>>> opens it, even if no I/O is performed, leading to unnecessary power
>>>>> consumption.
>>>>>
>>>>> Move the runtime PM calls to the AIE2 callbacks that actually interact
>>>>> with the hardware. The device will automatically suspend after 5
>>>>> seconds
>>>>> of inactivity (no hardware accesses and no pending commands), and
>>>>> it will
>>>>> be resumed on the next hardware access.
>>>>>
>>>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>>>> ---
>>>>> drivers/accel/amdxdna/Makefile | 1 +
>>>>> drivers/accel/amdxdna/aie2_ctx.c | 42 ++++++++---
>>>>> drivers/accel/amdxdna/aie2_message.c | 28 ++++----
>>>>> drivers/accel/amdxdna/aie2_pci.c | 79 ++++++++++----------
>>>>> drivers/accel/amdxdna/aie2_pci.h | 3 +-
>>>>> drivers/accel/amdxdna/aie2_smu.c | 28 ++++++--
>>>>> drivers/accel/amdxdna/amdxdna_ctx.c | 54 +++++++-------
>>>>> drivers/accel/amdxdna/amdxdna_mailbox.c | 13 ++--
>>>>> drivers/accel/amdxdna/amdxdna_pci_drv.c | 56 ++-------------
>>>>> drivers/accel/amdxdna/amdxdna_pci_drv.h | 2 +
>>>>> drivers/accel/amdxdna/amdxdna_pm.c | 96 ++++++++++++++++++++
>>>>> +++++
>>>>> drivers/accel/amdxdna/amdxdna_pm.h | 18 +++++
>>>>> 12 files changed, 262 insertions(+), 158 deletions(-)
>>>>> create mode 100644 drivers/accel/amdxdna/amdxdna_pm.c
>>>>> create mode 100644 drivers/accel/amdxdna/amdxdna_pm.h
>>>>>
>>>>> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/
>>>>> amdxdna/ Makefile
>>>>> index 6797dac65efa..6344aaf523fa 100644
>>>>> --- a/drivers/accel/amdxdna/Makefile
>>>>> +++ b/drivers/accel/amdxdna/Makefile
>>>>> @@ -14,6 +14,7 @@ amdxdna-y := \
>>>>> amdxdna_mailbox.o \
>>>>> amdxdna_mailbox_helper.o \
>>>>> amdxdna_pci_drv.o \
>>>>> + amdxdna_pm.o \
>>>>> amdxdna_sysfs.o \
>>>>> amdxdna_ubuf.o \
>>>>> npu1_regs.o \
>>>>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/
>>>>> amdxdna/aie2_ctx.c
>>>>> index e9f9b1fa5dc1..691fdb3b008f 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>>>>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>>>>> @@ -21,6 +21,7 @@
>>>>> #include "amdxdna_gem.h"
>>>>> #include "amdxdna_mailbox.h"
>>>>> #include "amdxdna_pci_drv.h"
>>>>> +#include "amdxdna_pm.h"
>>>>> static bool force_cmdlist;
>>>>> module_param(force_cmdlist, bool, 0600);
>>>>> @@ -88,7 +89,7 @@ static int aie2_hwctx_restart(struct amdxdna_dev
>>>>> *xdna, struct amdxdna_hwctx *hw
>>>>> goto out;
>>>>> }
>>>>> - ret = aie2_config_cu(hwctx);
>>>>> + ret = aie2_config_cu(hwctx, NULL);
>>>>> if (ret) {
>>>>> XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
>>>>> goto out;
>>>>> @@ -167,14 +168,11 @@ static int aie2_hwctx_resume_cb(struct
>>>>> amdxdna_hwctx *hwctx, void *arg)
>>>>> int aie2_hwctx_resume(struct amdxdna_client *client)
>>>>> {
>>>>> - struct amdxdna_dev *xdna = client->xdna;
>>>>> -
>>>>> /*
>>>>> * The resume path cannot guarantee that mailbox channel can be
>>>>> * regenerated. If this happen, when submit message to this
>>>>> * mailbox channel, error will return.
>>>>> */
>>>>> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>>>> return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
>>>>> }
>>>>> @@ -184,6 +182,8 @@ aie2_sched_notify(struct amdxdna_sched_job *job)
>>>>> struct dma_fence *fence = job->fence;
>>>>> trace_xdna_job(&job->base, job->hwctx->name, "signaled
>>>>> fence", job->seq);
>>>>> +
>>>>> + amdxdna_pm_suspend_put(job->hwctx->client->xdna);
>>>>> job->hwctx->priv->completed++;
>>>>> dma_fence_signal(fence);
>>>>> @@ -531,7 +531,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>>>> .num_rqs = DRM_SCHED_PRIORITY_COUNT,
>>>>> .credit_limit = HWCTX_MAX_CMDS,
>>>>> .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
>>>>> - .name = hwctx->name,
>>>>> + .name = "amdxdna_js",
>>>>> .dev = xdna->ddev.dev,
>>>>> };
>>>>> struct drm_gpu_scheduler *sched;
>>>>> @@ -697,6 +697,14 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
>>>>> kfree(hwctx->cus);
>>>>> }
>>>>> +static int aie2_config_cu_resp_handler(void *handle, void
>>>>> __iomem *data, size_t size)
>>>>> +{
>>>>> + struct amdxdna_hwctx *hwctx = handle;
>>>>> +
>>>>> + amdxdna_pm_suspend_put(hwctx->client->xdna);
>>>>> + return 0;
>>>>> +}
>>>>> +
>>>>> static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void
>>>>> *buf, u32 size)
>>>>> {
>>>>> struct amdxdna_hwctx_param_config_cu *config = buf;
>>>>> @@ -728,10 +736,14 @@ static int aie2_hwctx_cu_config(struct
>>>>> amdxdna_hwctx *hwctx, void *buf, u32 size
>>>>> if (!hwctx->cus)
>>>>> return -ENOMEM;
>>>>> - ret = aie2_config_cu(hwctx);
>>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>>> + if (ret)
>>>>> + goto free_cus;
>>>>> +
>>>>> + ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
>>>>> if (ret) {
>>>>> XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
>>>>> - goto free_cus;
>>>>> + goto pm_suspend_put;
>>>>> }
>>>>> wmb(); /* To avoid locking in command submit when check
>>>>> status */
>>>>> @@ -739,6 +751,8 @@ static int aie2_hwctx_cu_config(struct
>>>>> amdxdna_hwctx *hwctx, void *buf, u32 size
>>>>> return 0;
>>>>> +pm_suspend_put:
>>>>> + amdxdna_pm_suspend_put(xdna);
>>>>> free_cus:
>>>>> kfree(hwctx->cus);
>>>>> hwctx->cus = NULL;
>>>>> @@ -862,11 +876,15 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>>>> *hwctx, struct amdxdna_sched_job *job,
>>>>> goto free_chain;
>>>>> }
>>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>>> + if (ret)
>>>>> + goto cleanup_job;
>>>>> +
>>>>> retry:
>>>>> ret = drm_gem_lock_reservations(job->bos, job->bo_cnt,
>>>>> &acquire_ctx);
>>>>> if (ret) {
>>>>> XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
>>>>> - goto cleanup_job;
>>>>> + goto suspend_put;
>>>>> }
>>>>> for (i = 0; i < job->bo_cnt; i++) {
>>>>> @@ -874,7 +892,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>>>> *hwctx, struct amdxdna_sched_job *job,
>>>>> if (ret) {
>>>>> XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
>>>>> drm_gem_unlock_reservations(job->bos, job->bo_cnt,
>>>>> &acquire_ctx);
>>>>> - goto cleanup_job;
>>>>> + goto suspend_put;
>>>>> }
>>>>> }
>>>>> @@ -889,12 +907,12 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>>>> *hwctx, struct amdxdna_sched_job *job,
>>>>> msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
>>>>> } else if (time_after(jiffies, timeout)) {
>>>>> ret = -ETIME;
>>>>> - goto cleanup_job;
>>>>> + goto suspend_put;
>>>>> }
>>>>> ret = aie2_populate_range(abo);
>>>>> if (ret)
>>>>> - goto cleanup_job;
>>>>> + goto suspend_put;
>>>>> goto retry;
>>>>> }
>>>>> }
>>>>> @@ -920,6 +938,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>>>> *hwctx, struct amdxdna_sched_job *job,
>>>>> return 0;
>>>>> +suspend_put:
>>>>> + amdxdna_pm_suspend_put(xdna);
>>>>> cleanup_job:
>>>>> drm_sched_job_cleanup(&job->base);
>>>>> free_chain:
>>>>> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/
>>>>> amdxdna/aie2_message.c
>>>>> index 9caad083543d..4660e8297ed8 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>>>> @@ -37,7 +37,7 @@ static int aie2_send_mgmt_msg_wait(struct
>>>>> amdxdna_dev_hdl *ndev,
>>>>> if (!ndev->mgmt_chann)
>>>>> return -ENODEV;
>>>>> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>>>> + drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !
>>>>> mutex_is_locked(&xdna- >dev_lock));
>>>>> ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
>>>>> if (ret == -ETIME) {
>>>>> xdna_mailbox_stop_channel(ndev->mgmt_chann);
>>>>> @@ -377,15 +377,17 @@ int aie2_register_asyn_event_msg(struct
>>>>> amdxdna_dev_hdl *ndev, dma_addr_t addr,
>>>>> return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg,
>>>>> TX_TIMEOUT);
>>>>> }
>>>>> -int aie2_config_cu(struct amdxdna_hwctx *hwctx)
>>>>> +int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>>>>> + int (*notify_cb)(void *, void __iomem *, size_t))
>>>>> {
>>>>> struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>>>>> struct amdxdna_dev *xdna = hwctx->client->xdna;
>>>>> u32 shift = xdna->dev_info->dev_mem_buf_shift;
>>>>> - DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
>>>>> + struct config_cu_req req = { 0 };
>>>>> + struct xdna_mailbox_msg msg;
>>>>> struct drm_gem_object *gobj;
>>>>> struct amdxdna_gem_obj *abo;
>>>>> - int ret, i;
>>>>> + int i;
>>>>> if (!chann)
>>>>> return -ENODEV;
>>>>> @@ -423,18 +425,12 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx)
>>>>> }
>>>>> req.num_cus = hwctx->cus->num_cus;
>>>>> - ret = xdna_send_msg_wait(xdna, chann, &msg);
>>>>> - if (ret == -ETIME)
>>>>> - aie2_destroy_context(xdna->dev_handle, hwctx);
>>>>> -
>>>>> - if (resp.status == AIE2_STATUS_SUCCESS) {
>>>>> - XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret);
>>>>> - return 0;
>>>>> - }
>>>>> -
>>>>> - XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d",
>>>>> - msg.opcode, resp.status, ret);
>>>>> - return ret;
>>>>> + msg.send_data = (u8 *)&req;
>>>>> + msg.send_size = sizeof(req);
>>>>> + msg.handle = hwctx;
>>>>> + msg.opcode = MSG_OP_CONFIG_CU;
>>>>> + msg.notify_cb = notify_cb;
>>>>> + return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>>>>> }
>>>>> int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>>>>> amdxdna_sched_job *job,
>>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/
>>>>> amdxdna/aie2_pci.c
>>>>> index 6e39c769bb6d..f46a3dcd0580 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>>>> @@ -25,6 +25,7 @@
>>>>> #include "amdxdna_gem.h"
>>>>> #include "amdxdna_mailbox.h"
>>>>> #include "amdxdna_pci_drv.h"
>>>>> +#include "amdxdna_pm.h"
>>>>> static int aie2_max_col = XRS_MAX_COL;
>>>>> module_param(aie2_max_col, uint, 0600);
>>>>> @@ -223,15 +224,6 @@ static int aie2_mgmt_fw_init(struct
>>>>> amdxdna_dev_hdl *ndev)
>>>>> return ret;
>>>>> }
>>>>> - if (!ndev->async_events)
>>>>> - return 0;
>>>>> -
>>>>> - ret = aie2_error_async_events_send(ndev);
>>>>> - if (ret) {
>>>>> - XDNA_ERR(ndev->xdna, "Send async events failed");
>>>>> - return ret;
>>>>> - }
>>>>> -
>>>>> return 0;
>>>>> }
>>>>> @@ -257,6 +249,8 @@ static int aie2_mgmt_fw_query(struct
>>>>> amdxdna_dev_hdl *ndev)
>>>>> return ret;
>>>>> }
>>>>> + ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>>>>> +
>>>>> return 0;
>>>>> }
>>>>> @@ -338,6 +332,7 @@ static void aie2_hw_stop(struct amdxdna_dev
>>>>> *xdna)
>>>>> ndev->mbox = NULL;
>>>>> aie2_psp_stop(ndev->psp_hdl);
>>>>> aie2_smu_fini(ndev);
>>>>> + aie2_error_async_events_free(ndev);
>>>>> pci_disable_device(pdev);
>>>>> ndev->dev_status = AIE2_DEV_INIT;
>>>>> @@ -424,6 +419,18 @@ static int aie2_hw_start(struct amdxdna_dev
>>>>> *xdna)
>>>>> goto destroy_mgmt_chann;
>>>>> }
>>>>> + ret = aie2_mgmt_fw_query(ndev);
>>>>> + if (ret) {
>>>>> + XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
>>>>> + goto destroy_mgmt_chann;
>>>>> + }
>>>>> +
>>>>> + ret = aie2_error_async_events_alloc(ndev);
>>>>> + if (ret) {
>>>>> + XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
>>>>> + goto destroy_mgmt_chann;
>>>>> + }
>>>>> +
>>>>> ndev->dev_status = AIE2_DEV_START;
>>>>> return 0;
>>>>> @@ -459,7 +466,6 @@ static int aie2_hw_resume(struct amdxdna_dev
>>>>> *xdna)
>>>>> struct amdxdna_client *client;
>>>>> int ret;
>>>>> - guard(mutex)(&xdna->dev_lock);
>>>>> ret = aie2_hw_start(xdna);
>>>>> if (ret) {
>>>>> XDNA_ERR(xdna, "Start hardware failed, %d", ret);
>>>>> @@ -565,13 +571,6 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>>>> goto release_fw;
>>>>> }
>>>>> - ret = aie2_mgmt_fw_query(ndev);
>>>>> - if (ret) {
>>>>> - XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
>>>>> - goto stop_hw;
>>>>> - }
>>>>> - ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>>>>> -
>>>>> xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
>>>>> for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
>>>>> xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv-
>>>>> >dpm_clk_tbl[i].hclk;
>>>>> @@ -587,30 +586,10 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>>>> goto stop_hw;
>>>>> }
>>>>> - ret = aie2_error_async_events_alloc(ndev);
>>>>> - if (ret) {
>>>>> - XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
>>>>> - goto stop_hw;
>>>>> - }
>>>>> -
>>>>> - ret = aie2_error_async_events_send(ndev);
>>>>> - if (ret) {
>>>>> - XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
>>>>> - goto async_event_free;
>>>>> - }
>>>>> -
>>>>> - /* Issue a command to make sure firmware handled async events */
>>>>> - ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
>>>>> - if (ret) {
>>>>> - XDNA_ERR(xdna, "Re-query firmware version failed");
>>>>> - goto async_event_free;
>>>>> - }
>>>>> -
>>>>> release_firmware(fw);
>>>>> + amdxdna_rpm_init(xdna);
>>>>> return 0;
>>>>> -async_event_free:
>>>>> - aie2_error_async_events_free(ndev);
>>>>> stop_hw:
>>>>> aie2_hw_stop(xdna);
>>>>> release_fw:
>>>>> @@ -621,10 +600,8 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>>>> static void aie2_fini(struct amdxdna_dev *xdna)
>>>>> {
>>>>> - struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>>>>> -
>>>>> + amdxdna_rpm_fini(xdna);
>>>>> aie2_hw_stop(xdna);
>>>>> - aie2_error_async_events_free(ndev);
>>>>> }
>>>>> static int aie2_get_aie_status(struct amdxdna_client *client,
>>>>> @@ -856,6 +833,10 @@ static int aie2_get_info(struct amdxdna_client
>>>>> *client, struct amdxdna_drm_get_i
>>>>> if (!drm_dev_enter(&xdna->ddev, &idx))
>>>>> return -ENODEV;
>>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>>> + if (ret)
>>>>> + goto dev_exit;
>>>>> +
>>>>> switch (args->param) {
>>>>> case DRM_AMDXDNA_QUERY_AIE_STATUS:
>>>>> ret = aie2_get_aie_status(client, args);
>>>>> @@ -882,8 +863,11 @@ static int aie2_get_info(struct amdxdna_client
>>>>> *client, struct amdxdna_drm_get_i
>>>>> XDNA_ERR(xdna, "Not supported request parameter %u",
>>>>> args- >param);
>>>>> ret = -EOPNOTSUPP;
>>>>> }
>>>>> +
>>>>> + amdxdna_pm_suspend_put(xdna);
>>>>> XDNA_DBG(xdna, "Got param %d", args->param);
>>>>> +dev_exit:
>>>>> drm_dev_exit(idx);
>>>>> return ret;
>>>>> }
>>>>> @@ -932,6 +916,10 @@ static int aie2_get_array(struct
>>>>> amdxdna_client *client,
>>>>> if (!drm_dev_enter(&xdna->ddev, &idx))
>>>>> return -ENODEV;
>>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>>> + if (ret)
>>>>> + goto dev_exit;
>>>>> +
>>>>> switch (args->param) {
>>>>> case DRM_AMDXDNA_HW_CONTEXT_ALL:
>>>>> ret = aie2_query_ctx_status_array(client, args);
>>>>> @@ -940,8 +928,11 @@ static int aie2_get_array(struct
>>>>> amdxdna_client *client,
>>>>> XDNA_ERR(xdna, "Not supported request parameter %u",
>>>>> args- >param);
>>>>> ret = -EOPNOTSUPP;
>>>>> }
>>>>> +
>>>>> + amdxdna_pm_suspend_put(xdna);
>>>>> XDNA_DBG(xdna, "Got param %d", args->param);
>>>>> +dev_exit:
>>>>> drm_dev_exit(idx);
>>>>> return ret;
>>>>> }
>>>>> @@ -980,6 +971,10 @@ static int aie2_set_state(struct
>>>>> amdxdna_client *client,
>>>>> if (!drm_dev_enter(&xdna->ddev, &idx))
>>>>> return -ENODEV;
>>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>>> + if (ret)
>>>>> + goto dev_exit;
>>>>> +
>>>>> switch (args->param) {
>>>>> case DRM_AMDXDNA_SET_POWER_MODE:
>>>>> ret = aie2_set_power_mode(client, args);
>>>>> @@ -990,6 +985,8 @@ static int aie2_set_state(struct amdxdna_client
>>>>> *client,
>>>>> break;
>>>>> }
>>>>> + amdxdna_pm_suspend_put(xdna);
>>>>> +dev_exit:
>>>>> drm_dev_exit(idx);
>>>>> return ret;
>>>>> }
>>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/
>>>>> amdxdna/aie2_pci.h
>>>>> index 91a8e948f82a..289a23ecd5f1 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>>>> @@ -272,7 +272,8 @@ int aie2_map_host_buf(struct amdxdna_dev_hdl
>>>>> *ndev, u32 context_id, u64 addr, u6
>>>>> int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user
>>>>> *buf, u32 size, u32 *cols_filled);
>>>>> int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev,
>>>>> dma_addr_t addr, u32 size,
>>>>> void *handle, int (*cb)(void*, void __iomem *,
>>>>> size_t));
>>>>> -int aie2_config_cu(struct amdxdna_hwctx *hwctx);
>>>>> +int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>>>>> + int (*notify_cb)(void *, void __iomem *, size_t));
>>>>> int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>>>>> amdxdna_sched_job *job,
>>>>> int (*notify_cb)(void *, void __iomem *, size_t));
>>>>> int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
>>>>> diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/
>>>>> amdxdna/aie2_smu.c
>>>>> index d303701b0ded..7f292a615ed8 100644
>>>>> --- a/drivers/accel/amdxdna/aie2_smu.c
>>>>> +++ b/drivers/accel/amdxdna/aie2_smu.c
>>>>> @@ -11,6 +11,7 @@
>>>>> #include "aie2_pci.h"
>>>>> #include "amdxdna_pci_drv.h"
>>>>> +#include "amdxdna_pm.h"
>>>>> #define SMU_RESULT_OK 1
>>>>> @@ -59,12 +60,16 @@ int npu1_set_dpm(struct amdxdna_dev_hdl
>>>>> *ndev, u32 dpm_level)
>>>>> u32 freq;
>>>>> int ret;
>>>>> + ret = amdxdna_pm_resume_get(ndev->xdna);
>>>>> + if (ret)
>>>>> + return ret;
>>>>> +
>>>>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
>>>>> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
>>>>> if (ret) {
>>>>> XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
>>>>> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
>>>>> - return ret;
>>>>> + goto suspend_put;
>>>>> }
>>>>> ndev->npuclk_freq = freq;
>>>>> @@ -73,8 +78,10 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev,
>>>>> u32 dpm_level)
>>>>> if (ret) {
>>>>> XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
>>>>> ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
>>>>> - return ret;
>>>>> + goto suspend_put;
>>>>> }
>>>>> +
>>>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>>>> ndev->hclk_freq = freq;
>>>>> ndev->dpm_level = dpm_level;
>>>>> @@ -82,26 +89,35 @@ int npu1_set_dpm(struct amdxdna_dev_hdl
>>>>> *ndev, u32 dpm_level)
>>>>> ndev->npuclk_freq, ndev->hclk_freq);
>>>>> return 0;
>>>>> +
>>>>> +suspend_put:
>>>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>>>> + return ret;
>>>>> }
>>>>> int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>>>>> {
>>>>> int ret;
>>>>> + ret = amdxdna_pm_resume_get(ndev->xdna);
>>>>> + if (ret)
>>>>> + return ret;
>>>>> +
>>>>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL,
>>>>> dpm_level, NULL);
>>>>> if (ret) {
>>>>> XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret
>>>>> %d ",
>>>>> dpm_level, ret);
>>>>> - return ret;
>>>>> + goto suspend_put;
>>>>> }
>>>>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL,
>>>>> dpm_level, NULL);
>>>>> if (ret) {
>>>>> XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
>>>>> dpm_level, ret);
>>>>> - return ret;
>>>>> + goto suspend_put;
>>>>> }
>>>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>>>> ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
>>>>> ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
>>>>> ndev->dpm_level = dpm_level;
>>>>> @@ -110,6 +126,10 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev,
>>>>> u32 dpm_level)
>>>>> ndev->npuclk_freq, ndev->hclk_freq);
>>>>> return 0;
>>>>> +
>>>>> +suspend_put:
>>>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>>>> + return ret;
>>>>> }
>>>>> int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
>>>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/
>>>>> amdxdna/amdxdna_ctx.c
>>>>> index 4bfe4ef20550..c3300eff7460 100644
>>>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>>>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>>>>> @@ -161,14 +161,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>>>>> drm_device *dev, void *data, struct dr
>>>>> if (args->ext || args->ext_flags)
>>>>> return -EINVAL;
>>>>> - if (!drm_dev_enter(dev, &idx))
>>>>> - return -ENODEV;
>>>>> -
>>>>> hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
>>>>> - if (!hwctx) {
>>>>> - ret = -ENOMEM;
>>>>> - goto exit;
>>>>> - }
>>>>> + if (!hwctx)
>>>>> + return -ENOMEM;
>>>>> if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args-
>>>>> >qos_p), sizeof(hwctx->qos))) {
>>>>> XDNA_ERR(xdna, "Access QoS info failed");
>>>>> @@ -181,30 +176,36 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>>>>> drm_device *dev, void *data, struct dr
>>>>> hwctx->num_tiles = args->num_tiles;
>>>>> hwctx->mem_size = args->mem_size;
>>>>> hwctx->max_opc = args->max_opc;
>>>>> - ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
>>>>> - XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1,
>>>>> MAX_HWCTX_ID),
>>>>> - &client->next_hwctxid, GFP_KERNEL);
>>>>> - if (ret < 0) {
>>>>> - XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
>>>>> +
>>>>> + guard(mutex)(&xdna->dev_lock);
>>>>> +
>>>>> + if (!drm_dev_enter(dev, &idx)) {
>>>>> + ret = -ENODEV;
>>>>> goto free_hwctx;
>>>>> }
>>>>> - hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client-
>>>>> >pid, hwctx->id);
>>>>> + ret = xdna->dev_info->ops->hwctx_init(hwctx);
>>>>> + if (ret) {
>>>>> + XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
>>>>> + goto dev_exit;
>>>>> + }
>>>>> +
>>>>> + hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client-
>>>>> >pid, hwctx->fw_ctx_id);
>>>>> if (!hwctx->name) {
>>>>> ret = -ENOMEM;
>>>>> - goto rm_id;
>>>>> + goto fini_hwctx;
>>>>> }
>>>>> - mutex_lock(&xdna->dev_lock);
>>>>> - ret = xdna->dev_info->ops->hwctx_init(hwctx);
>>>>> - if (ret) {
>>>>> - mutex_unlock(&xdna->dev_lock);
>>>>> - XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
>>>>> + ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
>>>>> + XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1,
>>>>> MAX_HWCTX_ID),
>>>>> + &client->next_hwctxid, GFP_KERNEL);
>>>>> + if (ret < 0) {
>>>>> + XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
>>>>> goto free_name;
>>>>> }
>>>>> +
>>>>> args->handle = hwctx->id;
>>>>> args->syncobj_handle = hwctx->syncobj_hdl;
>>>>> - mutex_unlock(&xdna->dev_lock);
>>>>> atomic64_set(&hwctx->job_submit_cnt, 0);
>>>>> atomic64_set(&hwctx->job_free_cnt, 0);
>>>>> @@ -214,12 +215,12 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>>>>> drm_device *dev, void *data, struct dr
>>>>> free_name:
>>>>> kfree(hwctx->name);
>>>>> -rm_id:
>>>>> - xa_erase(&client->hwctx_xa, hwctx->id);
>>>>> +fini_hwctx:
>>>>> + xdna->dev_info->ops->hwctx_fini(hwctx);
>>>>> +dev_exit:
>>>>> + drm_dev_exit(idx);
>>>>> free_hwctx:
>>>>> kfree(hwctx);
>>>>> -exit:
>>>>> - drm_dev_exit(idx);
>>>>> return ret;
>>>>> }
>>>>> @@ -431,11 +432,6 @@ int amdxdna_cmd_submit(struct amdxdna_client
>>>>> *client,
>>>>> goto unlock_srcu;
>>>>> }
>>>>> - if (hwctx->status != HWCTX_STAT_READY) {
>>>>> - XDNA_ERR(xdna, "HW Context is not ready");
>>>>> - ret = -EINVAL;
>>>>> - goto unlock_srcu;
>>>>> - }
>>>>> job->hwctx = hwctx;
>>>>> job->mm = current->mm;
>>>>> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/
>>>>> accel/ amdxdna/amdxdna_mailbox.c
>>>>> index da1ac89bb78f..24258dcc18eb 100644
>>>>> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>>> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>>> @@ -194,7 +194,8 @@ static void mailbox_release_msg(struct
>>>>> mailbox_channel *mb_chann,
>>>>> {
>>>>> MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
>>>>> mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
>>>>> - mb_msg->notify_cb(mb_msg->handle, NULL, 0);
>>>>> + if (mb_msg->notify_cb)
>>>>> + mb_msg->notify_cb(mb_msg->handle, NULL, 0);
>>>>> kfree(mb_msg);
>>>>> }
>>>>> @@ -248,7 +249,7 @@ mailbox_get_resp(struct mailbox_channel
>>>>> *mb_chann, struct xdna_msg_header *heade
>>>>> {
>>>>> struct mailbox_msg *mb_msg;
>>>>> int msg_id;
>>>>> - int ret;
>>>>> + int ret = 0;
>>>>> msg_id = header->id;
>>>>> if (!mailbox_validate_msgid(msg_id)) {
>>>>> @@ -265,9 +266,11 @@ mailbox_get_resp(struct mailbox_channel
>>>>> *mb_chann, struct xdna_msg_header *heade
>>>>> MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
>>>>> header->opcode, header->total_size, header->id);
>>>>> - ret = mb_msg->notify_cb(mb_msg->handle, data, header-
>>>>> >total_size);
>>>>> - if (unlikely(ret))
>>>>> - MB_ERR(mb_chann, "Message callback ret %d", ret);
>>>>> + if (mb_msg->notify_cb) {
>>>>> + ret = mb_msg->notify_cb(mb_msg->handle, data, header-
>>>>> >total_size);
>>>>> + if (unlikely(ret))
>>>>> + MB_ERR(mb_chann, "Message callback ret %d", ret);
>>>>> + }
>>>>> kfree(mb_msg);
>>>>> return ret;
>>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/
>>>>> accel/ amdxdna/amdxdna_pci_drv.c
>>>>> index 569cd703729d..aa04452310e5 100644
>>>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>>>> @@ -13,13 +13,11 @@
>>>>> #include <drm/gpu_scheduler.h>
>>>>> #include <linux/iommu.h>
>>>>> #include <linux/pci.h>
>>>>> -#include <linux/pm_runtime.h>
>>>>> #include "amdxdna_ctx.h"
>>>>> #include "amdxdna_gem.h"
>>>>> #include "amdxdna_pci_drv.h"
>>>>> -
>>>>> -#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
>>>>> +#include "amdxdna_pm.h"
>>>>> MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
>>>>> MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
>>>>> @@ -61,17 +59,9 @@ static int amdxdna_drm_open(struct drm_device
>>>>> *ddev, struct drm_file *filp)
>>>>> struct amdxdna_client *client;
>>>>> int ret;
>>>>> - ret = pm_runtime_resume_and_get(ddev->dev);
>>>>> - if (ret) {
>>>>> - XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret);
>>>>> - return ret;
>>>>> - }
>>>>> -
>>>>> client = kzalloc(sizeof(*client), GFP_KERNEL);
>>>>> - if (!client) {
>>>>> - ret = -ENOMEM;
>>>>> - goto put_rpm;
>>>>> - }
>>>>> + if (!client)
>>>>> + return -ENOMEM;
>>>>> client->pid = pid_nr(rcu_access_pointer(filp->pid));
>>>>> client->xdna = xdna;
>>>>> @@ -106,9 +96,6 @@ static int amdxdna_drm_open(struct drm_device
>>>>> *ddev, struct drm_file *filp)
>>>>> iommu_sva_unbind_device(client->sva);
>>>>> failed:
>>>>> kfree(client);
>>>>> -put_rpm:
>>>>> - pm_runtime_mark_last_busy(ddev->dev);
>>>>> - pm_runtime_put_autosuspend(ddev->dev);
>>>>> return ret;
>>>>> }
>>>>> @@ -130,8 +117,6 @@ static void amdxdna_drm_close(struct drm_device
>>>>> *ddev, struct drm_file *filp)
>>>>> XDNA_DBG(xdna, "pid %d closed", client->pid);
>>>>> kfree(client);
>>>>> - pm_runtime_mark_last_busy(ddev->dev);
>>>>> - pm_runtime_put_autosuspend(ddev->dev);
>>>>> }
>>>>> static int amdxdna_flush(struct file *f, fl_owner_t id)
>>>>> @@ -310,19 +295,12 @@ static int amdxdna_probe(struct pci_dev
>>>>> *pdev, const struct pci_device_id *id)
>>>>> goto failed_dev_fini;
>>>>> }
>>>>> - pm_runtime_set_autosuspend_delay(dev,
>>>>> AMDXDNA_AUTOSUSPEND_DELAY);
>>>>> - pm_runtime_use_autosuspend(dev);
>>>>> - pm_runtime_allow(dev);
>>>>> -
>>>>> ret = drm_dev_register(&xdna->ddev, 0);
>>>>> if (ret) {
>>>>> XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
>>>>> - pm_runtime_forbid(dev);
>>>>> goto failed_sysfs_fini;
>>>>> }
>>>>> - pm_runtime_mark_last_busy(dev);
>>>>> - pm_runtime_put_autosuspend(dev);
>>>>> return 0;
>>>>> failed_sysfs_fini:
>>>>> @@ -339,14 +317,10 @@ static int amdxdna_probe(struct pci_dev
>>>>> *pdev, const struct pci_device_id *id)
>>>>> static void amdxdna_remove(struct pci_dev *pdev)
>>>>> {
>>>>> struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
>>>>> - struct device *dev = &pdev->dev;
>>>>> struct amdxdna_client *client;
>>>>> destroy_workqueue(xdna->notifier_wq);
>>>>> - pm_runtime_get_noresume(dev);
>>>>> - pm_runtime_forbid(dev);
>>>>> -
>>>>> drm_dev_unplug(&xdna->ddev);
>>>>> amdxdna_sysfs_fini(xdna);
>>>>> @@ -365,29 +339,9 @@ static void amdxdna_remove(struct pci_dev
>>>>> *pdev)
>>>>> mutex_unlock(&xdna->dev_lock);
>>>>> }
>>>>> -static int amdxdna_pmops_suspend(struct device *dev)
>>>>> -{
>>>>> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>>>>> -
>>>>> - if (!xdna->dev_info->ops->suspend)
>>>>> - return -EOPNOTSUPP;
>>>>> -
>>>>> - return xdna->dev_info->ops->suspend(xdna);
>>>>> -}
>>>>> -
>>>>> -static int amdxdna_pmops_resume(struct device *dev)
>>>>> -{
>>>>> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>>>>> -
>>>>> - if (!xdna->dev_info->ops->resume)
>>>>> - return -EOPNOTSUPP;
>>>>> -
>>>>> - return xdna->dev_info->ops->resume(xdna);
>>>>> -}
>>>>> -
>>>>> static const struct dev_pm_ops amdxdna_pm_ops = {
>>>>> - SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
>>>>> - RUNTIME_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume, NULL)
>>>>> + SYSTEM_SLEEP_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume)
>>>>> + RUNTIME_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume, NULL)
>>>>> };
>>>>> static struct pci_driver amdxdna_pci_driver = {
>>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/
>>>>> accel/ amdxdna/amdxdna_pci_drv.h
>>>>> index 72d6696d49da..626beebf730e 100644
>>>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>>>> @@ -6,6 +6,7 @@
>>>>> #ifndef _AMDXDNA_PCI_DRV_H_
>>>>> #define _AMDXDNA_PCI_DRV_H_
>>>>> +#include <drm/drm_print.h>
>>>>
>>>> This seems like a spurious header inclusion. It shouldn't be needed
>>>> for adding a bool to the struct.
>>>>
>>>>
>>>>> #include <linux/workqueue.h>
>>>>> #include <linux/xarray.h>
>>>>> @@ -99,6 +100,7 @@ struct amdxdna_dev {
>>>>> struct amdxdna_fw_ver fw_ver;
>>>>> struct rw_semaphore notifier_lock; /* for mmu notifier*/
>>>>> struct workqueue_struct *notifier_wq;
>>>>> + bool rpm_on;
>>>>
>>>> I'm wondering do you really need a new variable to track the runtime
>>>> PM status? Can't you just use pm_runtime_active() and the
>>>> appropriate locking when checking it?
>>>>
>>
>> Just make sure you didn't miss the two above comments when scanning
>> the email response since you didn't reply them.
>
> Sorry, I indeed missed this question. Thanks for your reminding.
>
> rpm_on is used to check if suspend/resume is in-progress. So it will not
> recursively call into suspend and resume. I did not see any function to
> check if suspend/resume is in-progress.
Ah yes there are RPM_SUSPENDING and RPM_RESUMING. You could do
something like this:
dev->power.runtime_status == RPM_SUSPENDING ||
dev->power.runtime_status == RPM_RESUMING ||
pm_suspend_in_progress();
But used without locking it could be a bit inconsistent. Your variable
approach probably is better.
>
>
> Thanks,
>
> Lizhi
>
>>
>>>>> };
>>>>> /*
>>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/
>>>>> amdxdna/amdxdna_pm.c
>>>>> new file mode 100644
>>>>> index 000000000000..fad14f60b99b
>>>>> --- /dev/null
>>>>> +++ b/drivers/accel/amdxdna/amdxdna_pm.c
>>>>> @@ -0,0 +1,96 @@
>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>> +/*
>>>>> + * Copyright (C) 2025, Advanced Micro Devices, Inc.
>>>>> + */
>>>>> +
>>>>> +#include <drm/amdxdna_accel.h>
>>>>> +#include <drm/drm_drv.h>
>>>>> +#include <linux/pm_runtime.h>
>>>>> +
>>>>> +#include "amdxdna_pm.h"
>>>>> +
>>>>> +#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
>>>>> +
>>>>> +int amdxdna_pm_suspend(struct device *dev)
>>>>> +{
>>>>> + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
>>>>> + int ret = -EOPNOTSUPP;
>>>>> + bool rpm;
>>>>> +
>>>>> + if (xdna->dev_info->ops->suspend) {
>>>>> + rpm = xdna->rpm_on;
>>>>> + xdna->rpm_on = false;
>>>>> + ret = xdna->dev_info->ops->suspend(xdna);
>>>>> + xdna->rpm_on = rpm;
>>>>> + }
>>>>> +
>>>>> + XDNA_DBG(xdna, "Suspend done ret %d", ret);
>>>>> + return ret;
>>>>> +}
>>>>> +
>>>>> +int amdxdna_pm_resume(struct device *dev)
>>>>> +{
>>>>> + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
>>>>> + int ret = -EOPNOTSUPP;
>>>>> + bool rpm;
>>>>> +
>>>>> + if (xdna->dev_info->ops->resume) {
>>>>> + rpm = xdna->rpm_on;
>>>>> + xdna->rpm_on = false;
>>>>> + ret = xdna->dev_info->ops->resume(xdna);
>>>>> + xdna->rpm_on = rpm;
>>>>> + }
>>>>> +
>>>>> + XDNA_DBG(xdna, "Resume done ret %d", ret);
>>>>> + return ret;
>>>>> +}
>>>>> +
>>>>> +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna)
>>>>> +{
>>>>> + struct device *dev = xdna->ddev.dev;
>>>>> + int ret;
>>>>> +
>>>>> + if (!xdna->rpm_on)
>>>>> + return 0;
>>>>> +
>>>>> + ret = pm_runtime_resume_and_get(dev);
>>>>> + if (ret) {
>>>>> + XDNA_ERR(xdna, "Resume failed: %d", ret);
>>>>> + pm_runtime_set_suspended(dev);
>>>>> + }
>>>>> +
>>>>> + return ret;
>>>>> +}
>>>>> +
>>>>> +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna)
>>>>> +{
>>>>> + struct device *dev = xdna->ddev.dev;
>>>>> +
>>>>> + if (!xdna->rpm_on)
>>>>> + return;
>>>>> +
>>>>> + pm_runtime_mark_last_busy(dev);
>>>>> + pm_runtime_put_autosuspend(dev);
>>>>> +}
>>>>> +
>>>>> +void amdxdna_rpm_init(struct amdxdna_dev *xdna)
>>>>> +{
>>>>> + struct device *dev = xdna->ddev.dev;
>>>>> +
>>>>> + pm_runtime_set_active(dev);
>>>>> + pm_runtime_set_autosuspend_delay(dev, AMDXDNA_AUTOSUSPEND_DELAY);
>>>>> + pm_runtime_use_autosuspend(dev);
>>>>> + pm_runtime_allow(dev);
>>>>> + pm_runtime_mark_last_busy(dev);
>>>>> + pm_runtime_put_autosuspend(dev);
>>>>> + xdna->rpm_on = true;
>>>>> +}
>>>>> +
>>>>> +void amdxdna_rpm_fini(struct amdxdna_dev *xdna)
>>>>> +{
>>>>> + struct device *dev = xdna->ddev.dev;
>>>>> +
>>>>> + xdna->rpm_on = false;
>>>>> + pm_runtime_get_noresume(dev);
>>>>> + pm_runtime_forbid(dev);
>>>>> +}
>>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/
>>>>> amdxdna/amdxdna_pm.h
>>>>> new file mode 100644
>>>>> index 000000000000..11c295b6d64a
>>>>> --- /dev/null
>>>>> +++ b/drivers/accel/amdxdna/amdxdna_pm.h
>>>>> @@ -0,0 +1,18 @@
>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>>> +/*
>>>>> + * Copyright (C) 2025, Advanced Micro Devices, Inc.
>>>>> + */
>>>>> +
>>>>> +#ifndef _AMDXDNA_PM_H_
>>>>> +#define _AMDXDNA_PM_H_
>>>>> +
>>>>> +#include "amdxdna_pci_drv.h"
>>>>> +
>>>>> +int amdxdna_pm_suspend(struct device *dev);
>>>>> +int amdxdna_pm_resume(struct device *dev);
>>>>> +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna);
>>>>> +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna);
>>>>> +void amdxdna_rpm_init(struct amdxdna_dev *xdna);
>>>>> +void amdxdna_rpm_fini(struct amdxdna_dev *xdna);
>>>>
>>>> Nit: Why rpm for init/fini and pm for all the others? Wouldn't it
>>>> make sense to pick one or the other?
>>>
>>> Sure. I will change to _pm_init and _pm_fini.
>>>
>>> Thanks,
>>>
>>> Lizhi
>>>
>>>>
>>>>> +
>>>>> +#endif /* _AMDXDNA_PM_H_ */
>>>>
>>
On 9/18/25 11:23, Mario Limonciello wrote:
>
>
> On 9/18/2025 1:05 PM, Lizhi Hou wrote:
>>
>> On 9/18/25 10:43, Mario Limonciello wrote:
>>>
>>>
>>> On 9/18/2025 12:41 PM, Lizhi Hou wrote:
>>>>
>>>> On 9/18/25 10:31, Mario Limonciello wrote:
>>>>>
>>>>>
>>>>> On 9/18/2025 12:24 PM, Lizhi Hou wrote:
>>>>>> Currently, pm_runtime_resume_and_get() is invoked in the driver's
>>>>>> open
>>>>>> callback, and pm_runtime_put_autosuspend() is called in the close
>>>>>> callback. As a result, the device remains active whenever an
>>>>>> application
>>>>>> opens it, even if no I/O is performed, leading to unnecessary power
>>>>>> consumption.
>>>>>>
>>>>>> Move the runtime PM calls to the AIE2 callbacks that actually
>>>>>> interact
>>>>>> with the hardware. The device will automatically suspend after 5
>>>>>> seconds
>>>>>> of inactivity (no hardware accesses and no pending commands), and
>>>>>> it will
>>>>>> be resumed on the next hardware access.
>>>>>>
>>>>>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>>>>>> ---
>>>>>> drivers/accel/amdxdna/Makefile | 1 +
>>>>>> drivers/accel/amdxdna/aie2_ctx.c | 42 ++++++++---
>>>>>> drivers/accel/amdxdna/aie2_message.c | 28 ++++----
>>>>>> drivers/accel/amdxdna/aie2_pci.c | 79 ++++++++++----------
>>>>>> drivers/accel/amdxdna/aie2_pci.h | 3 +-
>>>>>> drivers/accel/amdxdna/aie2_smu.c | 28 ++++++--
>>>>>> drivers/accel/amdxdna/amdxdna_ctx.c | 54 +++++++-------
>>>>>> drivers/accel/amdxdna/amdxdna_mailbox.c | 13 ++--
>>>>>> drivers/accel/amdxdna/amdxdna_pci_drv.c | 56 ++-------------
>>>>>> drivers/accel/amdxdna/amdxdna_pci_drv.h | 2 +
>>>>>> drivers/accel/amdxdna/amdxdna_pm.c | 96
>>>>>> ++++++++++++++++++++ +++++
>>>>>> drivers/accel/amdxdna/amdxdna_pm.h | 18 +++++
>>>>>> 12 files changed, 262 insertions(+), 158 deletions(-)
>>>>>> create mode 100644 drivers/accel/amdxdna/amdxdna_pm.c
>>>>>> create mode 100644 drivers/accel/amdxdna/amdxdna_pm.h
>>>>>>
>>>>>> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/
>>>>>> amdxdna/ Makefile
>>>>>> index 6797dac65efa..6344aaf523fa 100644
>>>>>> --- a/drivers/accel/amdxdna/Makefile
>>>>>> +++ b/drivers/accel/amdxdna/Makefile
>>>>>> @@ -14,6 +14,7 @@ amdxdna-y := \
>>>>>> amdxdna_mailbox.o \
>>>>>> amdxdna_mailbox_helper.o \
>>>>>> amdxdna_pci_drv.o \
>>>>>> + amdxdna_pm.o \
>>>>>> amdxdna_sysfs.o \
>>>>>> amdxdna_ubuf.o \
>>>>>> npu1_regs.o \
>>>>>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/
>>>>>> amdxdna/aie2_ctx.c
>>>>>> index e9f9b1fa5dc1..691fdb3b008f 100644
>>>>>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>>>>>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>>>>>> @@ -21,6 +21,7 @@
>>>>>> #include "amdxdna_gem.h"
>>>>>> #include "amdxdna_mailbox.h"
>>>>>> #include "amdxdna_pci_drv.h"
>>>>>> +#include "amdxdna_pm.h"
>>>>>> static bool force_cmdlist;
>>>>>> module_param(force_cmdlist, bool, 0600);
>>>>>> @@ -88,7 +89,7 @@ static int aie2_hwctx_restart(struct
>>>>>> amdxdna_dev *xdna, struct amdxdna_hwctx *hw
>>>>>> goto out;
>>>>>> }
>>>>>> - ret = aie2_config_cu(hwctx);
>>>>>> + ret = aie2_config_cu(hwctx, NULL);
>>>>>> if (ret) {
>>>>>> XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
>>>>>> goto out;
>>>>>> @@ -167,14 +168,11 @@ static int aie2_hwctx_resume_cb(struct
>>>>>> amdxdna_hwctx *hwctx, void *arg)
>>>>>> int aie2_hwctx_resume(struct amdxdna_client *client)
>>>>>> {
>>>>>> - struct amdxdna_dev *xdna = client->xdna;
>>>>>> -
>>>>>> /*
>>>>>> * The resume path cannot guarantee that mailbox channel
>>>>>> can be
>>>>>> * regenerated. If this happen, when submit message to this
>>>>>> * mailbox channel, error will return.
>>>>>> */
>>>>>> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>>>>> return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
>>>>>> }
>>>>>> @@ -184,6 +182,8 @@ aie2_sched_notify(struct amdxdna_sched_job
>>>>>> *job)
>>>>>> struct dma_fence *fence = job->fence;
>>>>>> trace_xdna_job(&job->base, job->hwctx->name, "signaled
>>>>>> fence", job->seq);
>>>>>> +
>>>>>> + amdxdna_pm_suspend_put(job->hwctx->client->xdna);
>>>>>> job->hwctx->priv->completed++;
>>>>>> dma_fence_signal(fence);
>>>>>> @@ -531,7 +531,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx
>>>>>> *hwctx)
>>>>>> .num_rqs = DRM_SCHED_PRIORITY_COUNT,
>>>>>> .credit_limit = HWCTX_MAX_CMDS,
>>>>>> .timeout = msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
>>>>>> - .name = hwctx->name,
>>>>>> + .name = "amdxdna_js",
>>>>>> .dev = xdna->ddev.dev,
>>>>>> };
>>>>>> struct drm_gpu_scheduler *sched;
>>>>>> @@ -697,6 +697,14 @@ void aie2_hwctx_fini(struct amdxdna_hwctx
>>>>>> *hwctx)
>>>>>> kfree(hwctx->cus);
>>>>>> }
>>>>>> +static int aie2_config_cu_resp_handler(void *handle, void
>>>>>> __iomem *data, size_t size)
>>>>>> +{
>>>>>> + struct amdxdna_hwctx *hwctx = handle;
>>>>>> +
>>>>>> + amdxdna_pm_suspend_put(hwctx->client->xdna);
>>>>>> + return 0;
>>>>>> +}
>>>>>> +
>>>>>> static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx,
>>>>>> void *buf, u32 size)
>>>>>> {
>>>>>> struct amdxdna_hwctx_param_config_cu *config = buf;
>>>>>> @@ -728,10 +736,14 @@ static int aie2_hwctx_cu_config(struct
>>>>>> amdxdna_hwctx *hwctx, void *buf, u32 size
>>>>>> if (!hwctx->cus)
>>>>>> return -ENOMEM;
>>>>>> - ret = aie2_config_cu(hwctx);
>>>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>>>> + if (ret)
>>>>>> + goto free_cus;
>>>>>> +
>>>>>> + ret = aie2_config_cu(hwctx, aie2_config_cu_resp_handler);
>>>>>> if (ret) {
>>>>>> XDNA_ERR(xdna, "Config CU to firmware failed, ret %d",
>>>>>> ret);
>>>>>> - goto free_cus;
>>>>>> + goto pm_suspend_put;
>>>>>> }
>>>>>> wmb(); /* To avoid locking in command submit when check
>>>>>> status */
>>>>>> @@ -739,6 +751,8 @@ static int aie2_hwctx_cu_config(struct
>>>>>> amdxdna_hwctx *hwctx, void *buf, u32 size
>>>>>> return 0;
>>>>>> +pm_suspend_put:
>>>>>> + amdxdna_pm_suspend_put(xdna);
>>>>>> free_cus:
>>>>>> kfree(hwctx->cus);
>>>>>> hwctx->cus = NULL;
>>>>>> @@ -862,11 +876,15 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>>>>> *hwctx, struct amdxdna_sched_job *job,
>>>>>> goto free_chain;
>>>>>> }
>>>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>>>> + if (ret)
>>>>>> + goto cleanup_job;
>>>>>> +
>>>>>> retry:
>>>>>> ret = drm_gem_lock_reservations(job->bos, job->bo_cnt,
>>>>>> &acquire_ctx);
>>>>>> if (ret) {
>>>>>> XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
>>>>>> - goto cleanup_job;
>>>>>> + goto suspend_put;
>>>>>> }
>>>>>> for (i = 0; i < job->bo_cnt; i++) {
>>>>>> @@ -874,7 +892,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>>>>> *hwctx, struct amdxdna_sched_job *job,
>>>>>> if (ret) {
>>>>>> XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
>>>>>> drm_gem_unlock_reservations(job->bos, job->bo_cnt,
>>>>>> &acquire_ctx);
>>>>>> - goto cleanup_job;
>>>>>> + goto suspend_put;
>>>>>> }
>>>>>> }
>>>>>> @@ -889,12 +907,12 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>>>>> *hwctx, struct amdxdna_sched_job *job,
>>>>>> msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
>>>>>> } else if (time_after(jiffies, timeout)) {
>>>>>> ret = -ETIME;
>>>>>> - goto cleanup_job;
>>>>>> + goto suspend_put;
>>>>>> }
>>>>>> ret = aie2_populate_range(abo);
>>>>>> if (ret)
>>>>>> - goto cleanup_job;
>>>>>> + goto suspend_put;
>>>>>> goto retry;
>>>>>> }
>>>>>> }
>>>>>> @@ -920,6 +938,8 @@ int aie2_cmd_submit(struct amdxdna_hwctx
>>>>>> *hwctx, struct amdxdna_sched_job *job,
>>>>>> return 0;
>>>>>> +suspend_put:
>>>>>> + amdxdna_pm_suspend_put(xdna);
>>>>>> cleanup_job:
>>>>>> drm_sched_job_cleanup(&job->base);
>>>>>> free_chain:
>>>>>> diff --git a/drivers/accel/amdxdna/aie2_message.c
>>>>>> b/drivers/accel/ amdxdna/aie2_message.c
>>>>>> index 9caad083543d..4660e8297ed8 100644
>>>>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>>>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>>>>> @@ -37,7 +37,7 @@ static int aie2_send_mgmt_msg_wait(struct
>>>>>> amdxdna_dev_hdl *ndev,
>>>>>> if (!ndev->mgmt_chann)
>>>>>> return -ENODEV;
>>>>>> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>>>>> + drm_WARN_ON(&xdna->ddev, xdna->rpm_on && !
>>>>>> mutex_is_locked(&xdna- >dev_lock));
>>>>>> ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
>>>>>> if (ret == -ETIME) {
>>>>>> xdna_mailbox_stop_channel(ndev->mgmt_chann);
>>>>>> @@ -377,15 +377,17 @@ int aie2_register_asyn_event_msg(struct
>>>>>> amdxdna_dev_hdl *ndev, dma_addr_t addr,
>>>>>> return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg,
>>>>>> TX_TIMEOUT);
>>>>>> }
>>>>>> -int aie2_config_cu(struct amdxdna_hwctx *hwctx)
>>>>>> +int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>>>>>> + int (*notify_cb)(void *, void __iomem *, size_t))
>>>>>> {
>>>>>> struct mailbox_channel *chann = hwctx->priv->mbox_chann;
>>>>>> struct amdxdna_dev *xdna = hwctx->client->xdna;
>>>>>> u32 shift = xdna->dev_info->dev_mem_buf_shift;
>>>>>> - DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
>>>>>> + struct config_cu_req req = { 0 };
>>>>>> + struct xdna_mailbox_msg msg;
>>>>>> struct drm_gem_object *gobj;
>>>>>> struct amdxdna_gem_obj *abo;
>>>>>> - int ret, i;
>>>>>> + int i;
>>>>>> if (!chann)
>>>>>> return -ENODEV;
>>>>>> @@ -423,18 +425,12 @@ int aie2_config_cu(struct amdxdna_hwctx
>>>>>> *hwctx)
>>>>>> }
>>>>>> req.num_cus = hwctx->cus->num_cus;
>>>>>> - ret = xdna_send_msg_wait(xdna, chann, &msg);
>>>>>> - if (ret == -ETIME)
>>>>>> - aie2_destroy_context(xdna->dev_handle, hwctx);
>>>>>> -
>>>>>> - if (resp.status == AIE2_STATUS_SUCCESS) {
>>>>>> - XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus,
>>>>>> ret);
>>>>>> - return 0;
>>>>>> - }
>>>>>> -
>>>>>> - XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret
>>>>>> %d",
>>>>>> - msg.opcode, resp.status, ret);
>>>>>> - return ret;
>>>>>> + msg.send_data = (u8 *)&req;
>>>>>> + msg.send_size = sizeof(req);
>>>>>> + msg.handle = hwctx;
>>>>>> + msg.opcode = MSG_OP_CONFIG_CU;
>>>>>> + msg.notify_cb = notify_cb;
>>>>>> + return xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
>>>>>> }
>>>>>> int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>>>>>> amdxdna_sched_job *job,
>>>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/
>>>>>> amdxdna/aie2_pci.c
>>>>>> index 6e39c769bb6d..f46a3dcd0580 100644
>>>>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>>>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>>>>> @@ -25,6 +25,7 @@
>>>>>> #include "amdxdna_gem.h"
>>>>>> #include "amdxdna_mailbox.h"
>>>>>> #include "amdxdna_pci_drv.h"
>>>>>> +#include "amdxdna_pm.h"
>>>>>> static int aie2_max_col = XRS_MAX_COL;
>>>>>> module_param(aie2_max_col, uint, 0600);
>>>>>> @@ -223,15 +224,6 @@ static int aie2_mgmt_fw_init(struct
>>>>>> amdxdna_dev_hdl *ndev)
>>>>>> return ret;
>>>>>> }
>>>>>> - if (!ndev->async_events)
>>>>>> - return 0;
>>>>>> -
>>>>>> - ret = aie2_error_async_events_send(ndev);
>>>>>> - if (ret) {
>>>>>> - XDNA_ERR(ndev->xdna, "Send async events failed");
>>>>>> - return ret;
>>>>>> - }
>>>>>> -
>>>>>> return 0;
>>>>>> }
>>>>>> @@ -257,6 +249,8 @@ static int aie2_mgmt_fw_query(struct
>>>>>> amdxdna_dev_hdl *ndev)
>>>>>> return ret;
>>>>>> }
>>>>>> + ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>>>>>> +
>>>>>> return 0;
>>>>>> }
>>>>>> @@ -338,6 +332,7 @@ static void aie2_hw_stop(struct amdxdna_dev
>>>>>> *xdna)
>>>>>> ndev->mbox = NULL;
>>>>>> aie2_psp_stop(ndev->psp_hdl);
>>>>>> aie2_smu_fini(ndev);
>>>>>> + aie2_error_async_events_free(ndev);
>>>>>> pci_disable_device(pdev);
>>>>>> ndev->dev_status = AIE2_DEV_INIT;
>>>>>> @@ -424,6 +419,18 @@ static int aie2_hw_start(struct amdxdna_dev
>>>>>> *xdna)
>>>>>> goto destroy_mgmt_chann;
>>>>>> }
>>>>>> + ret = aie2_mgmt_fw_query(ndev);
>>>>>> + if (ret) {
>>>>>> + XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
>>>>>> + goto destroy_mgmt_chann;
>>>>>> + }
>>>>>> +
>>>>>> + ret = aie2_error_async_events_alloc(ndev);
>>>>>> + if (ret) {
>>>>>> + XDNA_ERR(xdna, "Allocate async events failed, ret %d",
>>>>>> ret);
>>>>>> + goto destroy_mgmt_chann;
>>>>>> + }
>>>>>> +
>>>>>> ndev->dev_status = AIE2_DEV_START;
>>>>>> return 0;
>>>>>> @@ -459,7 +466,6 @@ static int aie2_hw_resume(struct amdxdna_dev
>>>>>> *xdna)
>>>>>> struct amdxdna_client *client;
>>>>>> int ret;
>>>>>> - guard(mutex)(&xdna->dev_lock);
>>>>>> ret = aie2_hw_start(xdna);
>>>>>> if (ret) {
>>>>>> XDNA_ERR(xdna, "Start hardware failed, %d", ret);
>>>>>> @@ -565,13 +571,6 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>>>>> goto release_fw;
>>>>>> }
>>>>>> - ret = aie2_mgmt_fw_query(ndev);
>>>>>> - if (ret) {
>>>>>> - XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
>>>>>> - goto stop_hw;
>>>>>> - }
>>>>>> - ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>>>>>> -
>>>>>> xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
>>>>>> for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
>>>>>> xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv-
>>>>>> >dpm_clk_tbl[i].hclk;
>>>>>> @@ -587,30 +586,10 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>>>>> goto stop_hw;
>>>>>> }
>>>>>> - ret = aie2_error_async_events_alloc(ndev);
>>>>>> - if (ret) {
>>>>>> - XDNA_ERR(xdna, "Allocate async events failed, ret %d",
>>>>>> ret);
>>>>>> - goto stop_hw;
>>>>>> - }
>>>>>> -
>>>>>> - ret = aie2_error_async_events_send(ndev);
>>>>>> - if (ret) {
>>>>>> - XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
>>>>>> - goto async_event_free;
>>>>>> - }
>>>>>> -
>>>>>> - /* Issue a command to make sure firmware handled async
>>>>>> events */
>>>>>> - ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
>>>>>> - if (ret) {
>>>>>> - XDNA_ERR(xdna, "Re-query firmware version failed");
>>>>>> - goto async_event_free;
>>>>>> - }
>>>>>> -
>>>>>> release_firmware(fw);
>>>>>> + amdxdna_rpm_init(xdna);
>>>>>> return 0;
>>>>>> -async_event_free:
>>>>>> - aie2_error_async_events_free(ndev);
>>>>>> stop_hw:
>>>>>> aie2_hw_stop(xdna);
>>>>>> release_fw:
>>>>>> @@ -621,10 +600,8 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>>>>> static void aie2_fini(struct amdxdna_dev *xdna)
>>>>>> {
>>>>>> - struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
>>>>>> -
>>>>>> + amdxdna_rpm_fini(xdna);
>>>>>> aie2_hw_stop(xdna);
>>>>>> - aie2_error_async_events_free(ndev);
>>>>>> }
>>>>>> static int aie2_get_aie_status(struct amdxdna_client *client,
>>>>>> @@ -856,6 +833,10 @@ static int aie2_get_info(struct
>>>>>> amdxdna_client *client, struct amdxdna_drm_get_i
>>>>>> if (!drm_dev_enter(&xdna->ddev, &idx))
>>>>>> return -ENODEV;
>>>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>>>> + if (ret)
>>>>>> + goto dev_exit;
>>>>>> +
>>>>>> switch (args->param) {
>>>>>> case DRM_AMDXDNA_QUERY_AIE_STATUS:
>>>>>> ret = aie2_get_aie_status(client, args);
>>>>>> @@ -882,8 +863,11 @@ static int aie2_get_info(struct
>>>>>> amdxdna_client *client, struct amdxdna_drm_get_i
>>>>>> XDNA_ERR(xdna, "Not supported request parameter %u",
>>>>>> args- >param);
>>>>>> ret = -EOPNOTSUPP;
>>>>>> }
>>>>>> +
>>>>>> + amdxdna_pm_suspend_put(xdna);
>>>>>> XDNA_DBG(xdna, "Got param %d", args->param);
>>>>>> +dev_exit:
>>>>>> drm_dev_exit(idx);
>>>>>> return ret;
>>>>>> }
>>>>>> @@ -932,6 +916,10 @@ static int aie2_get_array(struct
>>>>>> amdxdna_client *client,
>>>>>> if (!drm_dev_enter(&xdna->ddev, &idx))
>>>>>> return -ENODEV;
>>>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>>>> + if (ret)
>>>>>> + goto dev_exit;
>>>>>> +
>>>>>> switch (args->param) {
>>>>>> case DRM_AMDXDNA_HW_CONTEXT_ALL:
>>>>>> ret = aie2_query_ctx_status_array(client, args);
>>>>>> @@ -940,8 +928,11 @@ static int aie2_get_array(struct
>>>>>> amdxdna_client *client,
>>>>>> XDNA_ERR(xdna, "Not supported request parameter %u",
>>>>>> args- >param);
>>>>>> ret = -EOPNOTSUPP;
>>>>>> }
>>>>>> +
>>>>>> + amdxdna_pm_suspend_put(xdna);
>>>>>> XDNA_DBG(xdna, "Got param %d", args->param);
>>>>>> +dev_exit:
>>>>>> drm_dev_exit(idx);
>>>>>> return ret;
>>>>>> }
>>>>>> @@ -980,6 +971,10 @@ static int aie2_set_state(struct
>>>>>> amdxdna_client *client,
>>>>>> if (!drm_dev_enter(&xdna->ddev, &idx))
>>>>>> return -ENODEV;
>>>>>> + ret = amdxdna_pm_resume_get(xdna);
>>>>>> + if (ret)
>>>>>> + goto dev_exit;
>>>>>> +
>>>>>> switch (args->param) {
>>>>>> case DRM_AMDXDNA_SET_POWER_MODE:
>>>>>> ret = aie2_set_power_mode(client, args);
>>>>>> @@ -990,6 +985,8 @@ static int aie2_set_state(struct
>>>>>> amdxdna_client *client,
>>>>>> break;
>>>>>> }
>>>>>> + amdxdna_pm_suspend_put(xdna);
>>>>>> +dev_exit:
>>>>>> drm_dev_exit(idx);
>>>>>> return ret;
>>>>>> }
>>>>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/
>>>>>> amdxdna/aie2_pci.h
>>>>>> index 91a8e948f82a..289a23ecd5f1 100644
>>>>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>>>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>>>>> @@ -272,7 +272,8 @@ int aie2_map_host_buf(struct amdxdna_dev_hdl
>>>>>> *ndev, u32 context_id, u64 addr, u6
>>>>>> int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user
>>>>>> *buf, u32 size, u32 *cols_filled);
>>>>>> int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev,
>>>>>> dma_addr_t addr, u32 size,
>>>>>> void *handle, int (*cb)(void*, void __iomem *,
>>>>>> size_t));
>>>>>> -int aie2_config_cu(struct amdxdna_hwctx *hwctx);
>>>>>> +int aie2_config_cu(struct amdxdna_hwctx *hwctx,
>>>>>> + int (*notify_cb)(void *, void __iomem *, size_t));
>>>>>> int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct
>>>>>> amdxdna_sched_job *job,
>>>>>> int (*notify_cb)(void *, void __iomem *, size_t));
>>>>>> int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
>>>>>> diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/
>>>>>> amdxdna/aie2_smu.c
>>>>>> index d303701b0ded..7f292a615ed8 100644
>>>>>> --- a/drivers/accel/amdxdna/aie2_smu.c
>>>>>> +++ b/drivers/accel/amdxdna/aie2_smu.c
>>>>>> @@ -11,6 +11,7 @@
>>>>>> #include "aie2_pci.h"
>>>>>> #include "amdxdna_pci_drv.h"
>>>>>> +#include "amdxdna_pm.h"
>>>>>> #define SMU_RESULT_OK 1
>>>>>> @@ -59,12 +60,16 @@ int npu1_set_dpm(struct amdxdna_dev_hdl
>>>>>> *ndev, u32 dpm_level)
>>>>>> u32 freq;
>>>>>> int ret;
>>>>>> + ret = amdxdna_pm_resume_get(ndev->xdna);
>>>>>> + if (ret)
>>>>>> + return ret;
>>>>>> +
>>>>>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
>>>>>> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
>>>>>> if (ret) {
>>>>>> XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret
>>>>>> %d\n",
>>>>>> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
>>>>>> - return ret;
>>>>>> + goto suspend_put;
>>>>>> }
>>>>>> ndev->npuclk_freq = freq;
>>>>>> @@ -73,8 +78,10 @@ int npu1_set_dpm(struct amdxdna_dev_hdl
>>>>>> *ndev, u32 dpm_level)
>>>>>> if (ret) {
>>>>>> XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
>>>>>> ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
>>>>>> - return ret;
>>>>>> + goto suspend_put;
>>>>>> }
>>>>>> +
>>>>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>>>>> ndev->hclk_freq = freq;
>>>>>> ndev->dpm_level = dpm_level;
>>>>>> @@ -82,26 +89,35 @@ int npu1_set_dpm(struct amdxdna_dev_hdl
>>>>>> *ndev, u32 dpm_level)
>>>>>> ndev->npuclk_freq, ndev->hclk_freq);
>>>>>> return 0;
>>>>>> +
>>>>>> +suspend_put:
>>>>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>>>>> + return ret;
>>>>>> }
>>>>>> int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>>>>>> {
>>>>>> int ret;
>>>>>> + ret = amdxdna_pm_resume_get(ndev->xdna);
>>>>>> + if (ret)
>>>>>> + return ret;
>>>>>> +
>>>>>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL,
>>>>>> dpm_level, NULL);
>>>>>> if (ret) {
>>>>>> XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret
>>>>>> %d ",
>>>>>> dpm_level, ret);
>>>>>> - return ret;
>>>>>> + goto suspend_put;
>>>>>> }
>>>>>> ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL,
>>>>>> dpm_level, NULL);
>>>>>> if (ret) {
>>>>>> XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret
>>>>>> %d",
>>>>>> dpm_level, ret);
>>>>>> - return ret;
>>>>>> + goto suspend_put;
>>>>>> }
>>>>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>>>>> ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
>>>>>> ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
>>>>>> ndev->dpm_level = dpm_level;
>>>>>> @@ -110,6 +126,10 @@ int npu4_set_dpm(struct amdxdna_dev_hdl
>>>>>> *ndev, u32 dpm_level)
>>>>>> ndev->npuclk_freq, ndev->hclk_freq);
>>>>>> return 0;
>>>>>> +
>>>>>> +suspend_put:
>>>>>> + amdxdna_pm_suspend_put(ndev->xdna);
>>>>>> + return ret;
>>>>>> }
>>>>>> int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
>>>>>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/
>>>>>> amdxdna/amdxdna_ctx.c
>>>>>> index 4bfe4ef20550..c3300eff7460 100644
>>>>>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>>>>>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>>>>>> @@ -161,14 +161,9 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>>>>>> drm_device *dev, void *data, struct dr
>>>>>> if (args->ext || args->ext_flags)
>>>>>> return -EINVAL;
>>>>>> - if (!drm_dev_enter(dev, &idx))
>>>>>> - return -ENODEV;
>>>>>> -
>>>>>> hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
>>>>>> - if (!hwctx) {
>>>>>> - ret = -ENOMEM;
>>>>>> - goto exit;
>>>>>> - }
>>>>>> + if (!hwctx)
>>>>>> + return -ENOMEM;
>>>>>> if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args-
>>>>>> >qos_p), sizeof(hwctx->qos))) {
>>>>>> XDNA_ERR(xdna, "Access QoS info failed");
>>>>>> @@ -181,30 +176,36 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>>>>>> drm_device *dev, void *data, struct dr
>>>>>> hwctx->num_tiles = args->num_tiles;
>>>>>> hwctx->mem_size = args->mem_size;
>>>>>> hwctx->max_opc = args->max_opc;
>>>>>> - ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
>>>>>> - XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1,
>>>>>> MAX_HWCTX_ID),
>>>>>> - &client->next_hwctxid, GFP_KERNEL);
>>>>>> - if (ret < 0) {
>>>>>> - XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
>>>>>> +
>>>>>> + guard(mutex)(&xdna->dev_lock);
>>>>>> +
>>>>>> + if (!drm_dev_enter(dev, &idx)) {
>>>>>> + ret = -ENODEV;
>>>>>> goto free_hwctx;
>>>>>> }
>>>>>> - hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client-
>>>>>> >pid, hwctx->id);
>>>>>> + ret = xdna->dev_info->ops->hwctx_init(hwctx);
>>>>>> + if (ret) {
>>>>>> + XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
>>>>>> + goto dev_exit;
>>>>>> + }
>>>>>> +
>>>>>> + hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client-
>>>>>> >pid, hwctx->fw_ctx_id);
>>>>>> if (!hwctx->name) {
>>>>>> ret = -ENOMEM;
>>>>>> - goto rm_id;
>>>>>> + goto fini_hwctx;
>>>>>> }
>>>>>> - mutex_lock(&xdna->dev_lock);
>>>>>> - ret = xdna->dev_info->ops->hwctx_init(hwctx);
>>>>>> - if (ret) {
>>>>>> - mutex_unlock(&xdna->dev_lock);
>>>>>> - XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
>>>>>> + ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
>>>>>> + XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1,
>>>>>> MAX_HWCTX_ID),
>>>>>> + &client->next_hwctxid, GFP_KERNEL);
>>>>>> + if (ret < 0) {
>>>>>> + XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
>>>>>> goto free_name;
>>>>>> }
>>>>>> +
>>>>>> args->handle = hwctx->id;
>>>>>> args->syncobj_handle = hwctx->syncobj_hdl;
>>>>>> - mutex_unlock(&xdna->dev_lock);
>>>>>> atomic64_set(&hwctx->job_submit_cnt, 0);
>>>>>> atomic64_set(&hwctx->job_free_cnt, 0);
>>>>>> @@ -214,12 +215,12 @@ int amdxdna_drm_create_hwctx_ioctl(struct
>>>>>> drm_device *dev, void *data, struct dr
>>>>>> free_name:
>>>>>> kfree(hwctx->name);
>>>>>> -rm_id:
>>>>>> - xa_erase(&client->hwctx_xa, hwctx->id);
>>>>>> +fini_hwctx:
>>>>>> + xdna->dev_info->ops->hwctx_fini(hwctx);
>>>>>> +dev_exit:
>>>>>> + drm_dev_exit(idx);
>>>>>> free_hwctx:
>>>>>> kfree(hwctx);
>>>>>> -exit:
>>>>>> - drm_dev_exit(idx);
>>>>>> return ret;
>>>>>> }
>>>>>> @@ -431,11 +432,6 @@ int amdxdna_cmd_submit(struct
>>>>>> amdxdna_client *client,
>>>>>> goto unlock_srcu;
>>>>>> }
>>>>>> - if (hwctx->status != HWCTX_STAT_READY) {
>>>>>> - XDNA_ERR(xdna, "HW Context is not ready");
>>>>>> - ret = -EINVAL;
>>>>>> - goto unlock_srcu;
>>>>>> - }
>>>>>> job->hwctx = hwctx;
>>>>>> job->mm = current->mm;
>>>>>> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/
>>>>>> accel/ amdxdna/amdxdna_mailbox.c
>>>>>> index da1ac89bb78f..24258dcc18eb 100644
>>>>>> --- a/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>>>> +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c
>>>>>> @@ -194,7 +194,8 @@ static void mailbox_release_msg(struct
>>>>>> mailbox_channel *mb_chann,
>>>>>> {
>>>>>> MB_DBG(mb_chann, "msg_id 0x%x msg opcode 0x%x",
>>>>>> mb_msg->pkg.header.id, mb_msg->pkg.header.opcode);
>>>>>> - mb_msg->notify_cb(mb_msg->handle, NULL, 0);
>>>>>> + if (mb_msg->notify_cb)
>>>>>> + mb_msg->notify_cb(mb_msg->handle, NULL, 0);
>>>>>> kfree(mb_msg);
>>>>>> }
>>>>>> @@ -248,7 +249,7 @@ mailbox_get_resp(struct mailbox_channel
>>>>>> *mb_chann, struct xdna_msg_header *heade
>>>>>> {
>>>>>> struct mailbox_msg *mb_msg;
>>>>>> int msg_id;
>>>>>> - int ret;
>>>>>> + int ret = 0;
>>>>>> msg_id = header->id;
>>>>>> if (!mailbox_validate_msgid(msg_id)) {
>>>>>> @@ -265,9 +266,11 @@ mailbox_get_resp(struct mailbox_channel
>>>>>> *mb_chann, struct xdna_msg_header *heade
>>>>>> MB_DBG(mb_chann, "opcode 0x%x size %d id 0x%x",
>>>>>> header->opcode, header->total_size, header->id);
>>>>>> - ret = mb_msg->notify_cb(mb_msg->handle, data, header-
>>>>>> >total_size);
>>>>>> - if (unlikely(ret))
>>>>>> - MB_ERR(mb_chann, "Message callback ret %d", ret);
>>>>>> + if (mb_msg->notify_cb) {
>>>>>> + ret = mb_msg->notify_cb(mb_msg->handle, data, header-
>>>>>> >total_size);
>>>>>> + if (unlikely(ret))
>>>>>> + MB_ERR(mb_chann, "Message callback ret %d", ret);
>>>>>> + }
>>>>>> kfree(mb_msg);
>>>>>> return ret;
>>>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/
>>>>>> accel/ amdxdna/amdxdna_pci_drv.c
>>>>>> index 569cd703729d..aa04452310e5 100644
>>>>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>>>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>>>>> @@ -13,13 +13,11 @@
>>>>>> #include <drm/gpu_scheduler.h>
>>>>>> #include <linux/iommu.h>
>>>>>> #include <linux/pci.h>
>>>>>> -#include <linux/pm_runtime.h>
>>>>>> #include "amdxdna_ctx.h"
>>>>>> #include "amdxdna_gem.h"
>>>>>> #include "amdxdna_pci_drv.h"
>>>>>> -
>>>>>> -#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
>>>>>> +#include "amdxdna_pm.h"
>>>>>> MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin");
>>>>>> MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
>>>>>> @@ -61,17 +59,9 @@ static int amdxdna_drm_open(struct drm_device
>>>>>> *ddev, struct drm_file *filp)
>>>>>> struct amdxdna_client *client;
>>>>>> int ret;
>>>>>> - ret = pm_runtime_resume_and_get(ddev->dev);
>>>>>> - if (ret) {
>>>>>> - XDNA_ERR(xdna, "Failed to get rpm, ret %d", ret);
>>>>>> - return ret;
>>>>>> - }
>>>>>> -
>>>>>> client = kzalloc(sizeof(*client), GFP_KERNEL);
>>>>>> - if (!client) {
>>>>>> - ret = -ENOMEM;
>>>>>> - goto put_rpm;
>>>>>> - }
>>>>>> + if (!client)
>>>>>> + return -ENOMEM;
>>>>>> client->pid = pid_nr(rcu_access_pointer(filp->pid));
>>>>>> client->xdna = xdna;
>>>>>> @@ -106,9 +96,6 @@ static int amdxdna_drm_open(struct drm_device
>>>>>> *ddev, struct drm_file *filp)
>>>>>> iommu_sva_unbind_device(client->sva);
>>>>>> failed:
>>>>>> kfree(client);
>>>>>> -put_rpm:
>>>>>> - pm_runtime_mark_last_busy(ddev->dev);
>>>>>> - pm_runtime_put_autosuspend(ddev->dev);
>>>>>> return ret;
>>>>>> }
>>>>>> @@ -130,8 +117,6 @@ static void amdxdna_drm_close(struct
>>>>>> drm_device *ddev, struct drm_file *filp)
>>>>>> XDNA_DBG(xdna, "pid %d closed", client->pid);
>>>>>> kfree(client);
>>>>>> - pm_runtime_mark_last_busy(ddev->dev);
>>>>>> - pm_runtime_put_autosuspend(ddev->dev);
>>>>>> }
>>>>>> static int amdxdna_flush(struct file *f, fl_owner_t id)
>>>>>> @@ -310,19 +295,12 @@ static int amdxdna_probe(struct pci_dev
>>>>>> *pdev, const struct pci_device_id *id)
>>>>>> goto failed_dev_fini;
>>>>>> }
>>>>>> - pm_runtime_set_autosuspend_delay(dev,
>>>>>> AMDXDNA_AUTOSUSPEND_DELAY);
>>>>>> - pm_runtime_use_autosuspend(dev);
>>>>>> - pm_runtime_allow(dev);
>>>>>> -
>>>>>> ret = drm_dev_register(&xdna->ddev, 0);
>>>>>> if (ret) {
>>>>>> XDNA_ERR(xdna, "DRM register failed, ret %d", ret);
>>>>>> - pm_runtime_forbid(dev);
>>>>>> goto failed_sysfs_fini;
>>>>>> }
>>>>>> - pm_runtime_mark_last_busy(dev);
>>>>>> - pm_runtime_put_autosuspend(dev);
>>>>>> return 0;
>>>>>> failed_sysfs_fini:
>>>>>> @@ -339,14 +317,10 @@ static int amdxdna_probe(struct pci_dev
>>>>>> *pdev, const struct pci_device_id *id)
>>>>>> static void amdxdna_remove(struct pci_dev *pdev)
>>>>>> {
>>>>>> struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
>>>>>> - struct device *dev = &pdev->dev;
>>>>>> struct amdxdna_client *client;
>>>>>> destroy_workqueue(xdna->notifier_wq);
>>>>>> - pm_runtime_get_noresume(dev);
>>>>>> - pm_runtime_forbid(dev);
>>>>>> -
>>>>>> drm_dev_unplug(&xdna->ddev);
>>>>>> amdxdna_sysfs_fini(xdna);
>>>>>> @@ -365,29 +339,9 @@ static void amdxdna_remove(struct pci_dev
>>>>>> *pdev)
>>>>>> mutex_unlock(&xdna->dev_lock);
>>>>>> }
>>>>>> -static int amdxdna_pmops_suspend(struct device *dev)
>>>>>> -{
>>>>>> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>>>>>> -
>>>>>> - if (!xdna->dev_info->ops->suspend)
>>>>>> - return -EOPNOTSUPP;
>>>>>> -
>>>>>> - return xdna->dev_info->ops->suspend(xdna);
>>>>>> -}
>>>>>> -
>>>>>> -static int amdxdna_pmops_resume(struct device *dev)
>>>>>> -{
>>>>>> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>>>>>> -
>>>>>> - if (!xdna->dev_info->ops->resume)
>>>>>> - return -EOPNOTSUPP;
>>>>>> -
>>>>>> - return xdna->dev_info->ops->resume(xdna);
>>>>>> -}
>>>>>> -
>>>>>> static const struct dev_pm_ops amdxdna_pm_ops = {
>>>>>> - SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend,
>>>>>> amdxdna_pmops_resume)
>>>>>> - RUNTIME_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume,
>>>>>> NULL)
>>>>>> + SYSTEM_SLEEP_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume)
>>>>>> + RUNTIME_PM_OPS(amdxdna_pm_suspend, amdxdna_pm_resume, NULL)
>>>>>> };
>>>>>> static struct pci_driver amdxdna_pci_driver = {
>>>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/
>>>>>> accel/ amdxdna/amdxdna_pci_drv.h
>>>>>> index 72d6696d49da..626beebf730e 100644
>>>>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>>>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>>>>> @@ -6,6 +6,7 @@
>>>>>> #ifndef _AMDXDNA_PCI_DRV_H_
>>>>>> #define _AMDXDNA_PCI_DRV_H_
>>>>>> +#include <drm/drm_print.h>
>>>>>
>>>>> This seems like a spurious header inclusion. It shouldn't be
>>>>> needed for adding a bool to the struct.
>>>>>
>>>>>
>>>>>> #include <linux/workqueue.h>
>>>>>> #include <linux/xarray.h>
>>>>>> @@ -99,6 +100,7 @@ struct amdxdna_dev {
>>>>>> struct amdxdna_fw_ver fw_ver;
>>>>>> struct rw_semaphore notifier_lock; /* for mmu
>>>>>> notifier*/
>>>>>> struct workqueue_struct *notifier_wq;
>>>>>> + bool rpm_on;
>>>>>
>>>>> I'm wondering do you really need a new variable to track the
>>>>> runtime PM status? Can't you just use pm_runtime_active() and the
>>>>> appropriate locking when checking it?
>>>>>
>>>
>>> Just make sure you didn't miss the two above comments when scanning
>>> the email response since you didn't reply them.
>>
>> Sorry, I indeed missed this question. Thanks for your reminding.
>>
>> rpm_on is used to check if suspend/resume is in-progress. So it will
>> not recursively call into suspend and resume. I did not see any
>> function to check if suspend/resume is in-progress.
>
> Ah yes there are RPM_SUSPENDING and RPM_RESUMING. You could do
> something like this:
>
> dev->power.runtime_status == RPM_SUSPENDING ||
> dev->power.runtime_status == RPM_RESUMING ||
> pm_suspend_in_progress();
>
> But used without locking it could be a bit inconsistent. Your
> variable approach probably is better.
Yes. I saw this piece of code in xe driver and marked "very unreliable"
in code comment before. :)
I will send a new patch to address the other comment and keep this
variable approach.
Thanks,
Lizhi
>
>>
>>
>> Thanks,
>>
>> Lizhi
>>
>>>
>>>>>> };
>>>>>> /*
>>>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pm.c b/drivers/accel/
>>>>>> amdxdna/amdxdna_pm.c
>>>>>> new file mode 100644
>>>>>> index 000000000000..fad14f60b99b
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/accel/amdxdna/amdxdna_pm.c
>>>>>> @@ -0,0 +1,96 @@
>>>>>> +// SPDX-License-Identifier: GPL-2.0
>>>>>> +/*
>>>>>> + * Copyright (C) 2025, Advanced Micro Devices, Inc.
>>>>>> + */
>>>>>> +
>>>>>> +#include <drm/amdxdna_accel.h>
>>>>>> +#include <drm/drm_drv.h>
>>>>>> +#include <linux/pm_runtime.h>
>>>>>> +
>>>>>> +#include "amdxdna_pm.h"
>>>>>> +
>>>>>> +#define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */
>>>>>> +
>>>>>> +int amdxdna_pm_suspend(struct device *dev)
>>>>>> +{
>>>>>> + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
>>>>>> + int ret = -EOPNOTSUPP;
>>>>>> + bool rpm;
>>>>>> +
>>>>>> + if (xdna->dev_info->ops->suspend) {
>>>>>> + rpm = xdna->rpm_on;
>>>>>> + xdna->rpm_on = false;
>>>>>> + ret = xdna->dev_info->ops->suspend(xdna);
>>>>>> + xdna->rpm_on = rpm;
>>>>>> + }
>>>>>> +
>>>>>> + XDNA_DBG(xdna, "Suspend done ret %d", ret);
>>>>>> + return ret;
>>>>>> +}
>>>>>> +
>>>>>> +int amdxdna_pm_resume(struct device *dev)
>>>>>> +{
>>>>>> + struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
>>>>>> + int ret = -EOPNOTSUPP;
>>>>>> + bool rpm;
>>>>>> +
>>>>>> + if (xdna->dev_info->ops->resume) {
>>>>>> + rpm = xdna->rpm_on;
>>>>>> + xdna->rpm_on = false;
>>>>>> + ret = xdna->dev_info->ops->resume(xdna);
>>>>>> + xdna->rpm_on = rpm;
>>>>>> + }
>>>>>> +
>>>>>> + XDNA_DBG(xdna, "Resume done ret %d", ret);
>>>>>> + return ret;
>>>>>> +}
>>>>>> +
>>>>>> +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna)
>>>>>> +{
>>>>>> + struct device *dev = xdna->ddev.dev;
>>>>>> + int ret;
>>>>>> +
>>>>>> + if (!xdna->rpm_on)
>>>>>> + return 0;
>>>>>> +
>>>>>> + ret = pm_runtime_resume_and_get(dev);
>>>>>> + if (ret) {
>>>>>> + XDNA_ERR(xdna, "Resume failed: %d", ret);
>>>>>> + pm_runtime_set_suspended(dev);
>>>>>> + }
>>>>>> +
>>>>>> + return ret;
>>>>>> +}
>>>>>> +
>>>>>> +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna)
>>>>>> +{
>>>>>> + struct device *dev = xdna->ddev.dev;
>>>>>> +
>>>>>> + if (!xdna->rpm_on)
>>>>>> + return;
>>>>>> +
>>>>>> + pm_runtime_mark_last_busy(dev);
>>>>>> + pm_runtime_put_autosuspend(dev);
>>>>>> +}
>>>>>> +
>>>>>> +void amdxdna_rpm_init(struct amdxdna_dev *xdna)
>>>>>> +{
>>>>>> + struct device *dev = xdna->ddev.dev;
>>>>>> +
>>>>>> + pm_runtime_set_active(dev);
>>>>>> + pm_runtime_set_autosuspend_delay(dev,
>>>>>> AMDXDNA_AUTOSUSPEND_DELAY);
>>>>>> + pm_runtime_use_autosuspend(dev);
>>>>>> + pm_runtime_allow(dev);
>>>>>> + pm_runtime_mark_last_busy(dev);
>>>>>> + pm_runtime_put_autosuspend(dev);
>>>>>> + xdna->rpm_on = true;
>>>>>> +}
>>>>>> +
>>>>>> +void amdxdna_rpm_fini(struct amdxdna_dev *xdna)
>>>>>> +{
>>>>>> + struct device *dev = xdna->ddev.dev;
>>>>>> +
>>>>>> + xdna->rpm_on = false;
>>>>>> + pm_runtime_get_noresume(dev);
>>>>>> + pm_runtime_forbid(dev);
>>>>>> +}
>>>>>> diff --git a/drivers/accel/amdxdna/amdxdna_pm.h b/drivers/accel/
>>>>>> amdxdna/amdxdna_pm.h
>>>>>> new file mode 100644
>>>>>> index 000000000000..11c295b6d64a
>>>>>> --- /dev/null
>>>>>> +++ b/drivers/accel/amdxdna/amdxdna_pm.h
>>>>>> @@ -0,0 +1,18 @@
>>>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>>>> +/*
>>>>>> + * Copyright (C) 2025, Advanced Micro Devices, Inc.
>>>>>> + */
>>>>>> +
>>>>>> +#ifndef _AMDXDNA_PM_H_
>>>>>> +#define _AMDXDNA_PM_H_
>>>>>> +
>>>>>> +#include "amdxdna_pci_drv.h"
>>>>>> +
>>>>>> +int amdxdna_pm_suspend(struct device *dev);
>>>>>> +int amdxdna_pm_resume(struct device *dev);
>>>>>> +int amdxdna_pm_resume_get(struct amdxdna_dev *xdna);
>>>>>> +void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna);
>>>>>> +void amdxdna_rpm_init(struct amdxdna_dev *xdna);
>>>>>> +void amdxdna_rpm_fini(struct amdxdna_dev *xdna);
>>>>>
>>>>> Nit: Why rpm for init/fini and pm for all the others? Wouldn't it
>>>>> make sense to pick one or the other?
>>>>
>>>> Sure. I will change to _pm_init and _pm_fini.
>>>>
>>>> Thanks,
>>>>
>>>> Lizhi
>>>>
>>>>>
>>>>>> +
>>>>>> +#endif /* _AMDXDNA_PM_H_ */
>>>>>
>>>
>
© 2016 - 2026 Red Hat, Inc.