Add interfaces for user application to submit command and wait for its
completion.
Co-developed-by: Min Ma <min.ma@amd.com>
Signed-off-by: Min Ma <min.ma@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/aie2_ctx.c | 634 +++++++++++++++++-
drivers/accel/amdxdna/aie2_message.c | 343 ++++++++++
drivers/accel/amdxdna/aie2_pci.c | 5 +
drivers/accel/amdxdna/aie2_pci.h | 40 ++
drivers/accel/amdxdna/aie2_psp.c | 2 +
drivers/accel/amdxdna/aie2_smu.c | 2 +
drivers/accel/amdxdna/amdxdna_ctx.c | 320 ++++++++-
drivers/accel/amdxdna/amdxdna_ctx.h | 111 +++
drivers/accel/amdxdna/amdxdna_gem.c | 1 +
.../accel/amdxdna/amdxdna_mailbox_helper.c | 5 +
drivers/accel/amdxdna/amdxdna_pci_drv.c | 12 +
drivers/accel/amdxdna/amdxdna_pci_drv.h | 5 +
drivers/accel/amdxdna/amdxdna_sysfs.c | 5 +
drivers/accel/amdxdna/npu1_regs.c | 1 +
drivers/accel/amdxdna/npu2_regs.c | 1 +
drivers/accel/amdxdna/npu4_regs.c | 1 +
drivers/accel/amdxdna/npu5_regs.c | 1 +
include/trace/events/amdxdna.h | 41 ++
include/uapi/drm/amdxdna_accel.h | 38 ++
19 files changed, 1559 insertions(+), 9 deletions(-)
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index ae8a91dad042..4641e52b59e2 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -8,8 +8,12 @@
#include <drm/drm_gem.h>
#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_print.h>
+#include <drm/drm_syncobj.h>
+#include <linux/hmm.h>
#include <linux/types.h>
+#include <trace/events/amdxdna.h>
+#include "aie2_msg_priv.h"
#include "aie2_pci.h"
#include "aie2_solver.h"
#include "amdxdna_ctx.h"
@@ -17,6 +21,342 @@
#include "amdxdna_mailbox.h"
#include "amdxdna_pci_drv.h"
+bool force_cmdlist;
+module_param(force_cmdlist, bool, 0600);
+MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
+
+#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
+
+static void aie2_job_release(struct kref *ref)
+{
+ struct amdxdna_sched_job *job;
+
+ job = container_of(ref, struct amdxdna_sched_job, refcnt);
+ amdxdna_sched_job_cleanup(job);
+ kfree(job);
+}
+
+static void aie2_job_put(struct amdxdna_sched_job *job)
+{
+ kref_put(&job->refcnt, aie2_job_release);
+}
+
+/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
+static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
+ struct drm_sched_job *bad_job)
+{
+ drm_sched_stop(&hwctx->priv->sched, bad_job);
+ aie2_destroy_context(xdna->dev_handle, hwctx);
+}
+
+static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_gem_obj *heap = hwctx->priv->heap;
+ int ret;
+
+ ret = aie2_create_context(xdna->dev_handle, hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
+ goto out;
+ }
+
+ ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
+ heap->mem.userptr, heap->mem.size);
+ if (ret) {
+ XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
+ goto out;
+ }
+
+ if (hwctx->status != HWCTX_STAT_READY) {
+ XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
+ goto out;
+ }
+
+ ret = aie2_config_cu(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
+ goto out;
+ }
+
+out:
+ drm_sched_start(&hwctx->priv->sched);
+ XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
+ return ret;
+}
+
+void aie2_stop_ctx_by_col_map(struct amdxdna_client *client, u32 col_map)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx *hwctx;
+ int next = 0;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ mutex_lock(&client->hwctx_lock);
+ idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
+ /* check if the HW context uses the error column */
+ if (!(col_map & amdxdna_hwctx_col_map(hwctx)))
+ continue;
+
+ aie2_hwctx_stop(xdna, hwctx, NULL);
+ hwctx->old_status = hwctx->status;
+ hwctx->status = HWCTX_STAT_STOP;
+ XDNA_DBG(xdna, "Stop %s", hwctx->name);
+ }
+ mutex_unlock(&client->hwctx_lock);
+}
+
+void aie2_restart_ctx(struct amdxdna_client *client)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_hwctx *hwctx;
+ int next = 0;
+
+ drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+ mutex_lock(&client->hwctx_lock);
+ idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
+ if (hwctx->status != HWCTX_STAT_STOP)
+ continue;
+
+ hwctx->status = hwctx->old_status;
+ XDNA_DBG(xdna, "Resetting %s", hwctx->name);
+ aie2_hwctx_restart(xdna, hwctx);
+ }
+ mutex_unlock(&client->hwctx_lock);
+}
+
+static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+ struct dma_fence *fence, *out_fence = NULL;
+ int ret;
+
+ fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
+ if (!fence)
+ return NULL;
+
+ ret = dma_fence_chain_find_seqno(&fence, seq);
+ if (ret)
+ goto out;
+
+ out_fence = dma_fence_get(dma_fence_chain_contained(fence));
+
+out:
+ dma_fence_put(fence);
+ return out_fence;
+}
+
+static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
+{
+ struct dma_fence *fence;
+
+ fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
+ if (!fence)
+ return;
+
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+}
+
+static void
+aie2_sched_notify(struct amdxdna_sched_job *job)
+{
+ struct dma_fence *fence = job->fence;
+
+ trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
+ job->hwctx->priv->completed++;
+ dma_fence_signal(fence);
+
+ up(&job->hwctx->priv->job_sem);
+ job->job_done = true;
+ dma_fence_put(fence);
+ mmput(job->mm);
+ aie2_job_put(job);
+}
+
+static int
+aie2_sched_resp_handler(void *handle, const u32 *data, size_t size)
+{
+ struct amdxdna_sched_job *job = handle;
+ struct amdxdna_gem_obj *cmd_abo;
+ u32 ret = 0;
+ u32 status;
+
+ cmd_abo = job->cmd_bo;
+
+ if (unlikely(!data))
+ goto out;
+
+ if (unlikely(size != sizeof(u32))) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ status = *data;
+ XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+ if (status == AIE2_STATUS_SUCCESS)
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
+ else
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
+
+out:
+ aie2_sched_notify(job);
+ return ret;
+}
+
+static int
+aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size)
+{
+ struct amdxdna_sched_job *job = handle;
+ u32 ret = 0;
+ u32 status;
+
+ if (unlikely(!data))
+ goto out;
+
+ if (unlikely(size != sizeof(u32))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ status = *data;
+ XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+
+out:
+ aie2_sched_notify(job);
+ return ret;
+}
+
+static int
+aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size)
+{
+ struct amdxdna_sched_job *job = handle;
+ struct amdxdna_gem_obj *cmd_abo;
+ struct cmd_chain_resp *resp;
+ struct amdxdna_dev *xdna;
+ u32 fail_cmd_status;
+ u32 fail_cmd_idx;
+ u32 ret = 0;
+
+ cmd_abo = job->cmd_bo;
+ if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ resp = (struct cmd_chain_resp *)data;
+ xdna = job->hwctx->client->xdna;
+ XDNA_DBG(xdna, "Status 0x%x", resp->status);
+ if (resp->status == AIE2_STATUS_SUCCESS) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
+ goto out;
+ }
+
+ /* Slow path to handle error, read from ringbuf on BAR */
+ fail_cmd_idx = resp->fail_cmd_idx;
+ fail_cmd_status = resp->fail_cmd_status;
+ XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
+ fail_cmd_idx, fail_cmd_status);
+
+ if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+ ret = -EINVAL;
+ goto out;
+ }
+ amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
+
+ if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
+ struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
+
+ cc->error_index = fail_cmd_idx;
+ if (cc->error_index >= cc->command_count)
+ cc->error_index = 0;
+ }
+out:
+ aie2_sched_notify(job);
+ return ret;
+}
+
+static struct dma_fence *
+aie2_sched_job_run(struct drm_sched_job *sched_job)
+{
+ struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+ struct dma_fence *fence;
+ int ret;
+
+ if (!mmget_not_zero(job->mm))
+ return ERR_PTR(-ESRCH);
+
+ kref_get(&job->refcnt);
+ fence = dma_fence_get(job->fence);
+
+ if (unlikely(!cmd_abo)) {
+ ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
+ goto out;
+ }
+
+ amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
+
+ if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
+ ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
+ else if (force_cmdlist)
+ ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
+ else
+ ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
+
+out:
+ if (ret) {
+ dma_fence_put(job->fence);
+ aie2_job_put(job);
+ mmput(job->mm);
+ fence = ERR_PTR(ret);
+ }
+ trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
+
+ return fence;
+}
+
+static void aie2_sched_job_free(struct drm_sched_job *sched_job)
+{
+ struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+
+ trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
+ if (!job->job_done)
+ up(&hwctx->priv->job_sem);
+
+ if (job->out_fence)
+ dma_fence_put(job->out_fence);
+ drm_sched_job_cleanup(sched_job);
+ aie2_job_put(job);
+}
+
+static enum drm_gpu_sched_stat
+aie2_sched_job_timedout(struct drm_sched_job *sched_job)
+{
+ struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+ struct amdxdna_dev *xdna;
+
+ xdna = hwctx->client->xdna;
+ trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
+ mutex_lock(&xdna->dev_lock);
+ aie2_hwctx_stop(xdna, hwctx, sched_job);
+
+ aie2_hwctx_restart(xdna, hwctx);
+ mutex_unlock(&xdna->dev_lock);
+
+ return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+const struct drm_sched_backend_ops sched_ops = {
+ .run_job = aie2_sched_job_run,
+ .free_job = aie2_sched_job_free,
+ .timedout_job = aie2_sched_job_timedout,
+};
+
static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
{
struct amdxdna_dev *xdna = hwctx->client->xdna;
@@ -126,13 +466,66 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
}
+static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct drm_file *filp = hwctx->client->filp;
+ struct drm_syncobj *syncobj;
+ u32 hdl;
+ int ret;
+
+ hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
+
+ ret = drm_syncobj_create(&syncobj, 0, NULL);
+ if (ret) {
+ XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
+ return ret;
+ }
+ ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
+ if (ret) {
+ drm_syncobj_put(syncobj);
+ XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
+ return ret;
+ }
+ hwctx->priv->syncobj = syncobj;
+ hwctx->syncobj_hdl = hdl;
+
+ return 0;
+}
+
+static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
+{
+ /*
+ * The syncobj_hdl is owned by user space and will be cleaned up
+ * separately.
+ */
+ drm_syncobj_put(hwctx->priv->syncobj);
+}
+
+static void aie2_ctx_syncobj_add_fence(struct amdxdna_hwctx *hwctx,
+ struct dma_fence *ofence, u64 seq)
+{
+ struct drm_syncobj *syncobj = hwctx->priv->syncobj;
+ struct dma_fence_chain *chain;
+
+ if (!syncobj)
+ return;
+
+ chain = dma_fence_chain_alloc();
+ if (!chain)
+ return;
+
+ drm_syncobj_add_point(syncobj, chain, ofence, seq);
+}
+
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
{
struct amdxdna_client *client = hwctx->client;
struct amdxdna_dev *xdna = client->xdna;
+ struct drm_gpu_scheduler *sched;
struct amdxdna_hwctx_priv *priv;
struct amdxdna_gem_obj *heap;
- int ret;
+ int i, ret;
priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
if (!priv)
@@ -150,6 +543,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
drm_gem_object_get(to_gobj(heap));
mutex_unlock(&client->mm_lock);
priv->heap = heap;
+ sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
ret = amdxdna_gem_pin(heap);
if (ret) {
@@ -157,10 +551,47 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
goto put_heap;
}
+ for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
+ struct amdxdna_gem_obj *abo;
+ struct amdxdna_drm_create_bo args = {
+ .flags = 0,
+ .type = AMDXDNA_BO_DEV,
+ .vaddr = 0,
+ .size = MAX_CHAIN_CMDBUF_SIZE,
+ };
+
+ abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true);
+ if (IS_ERR(abo)) {
+ ret = PTR_ERR(abo);
+ goto free_cmd_bufs;
+ }
+
+ XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
+ i, abo->mem.dev_addr, abo->mem.size);
+ priv->cmd_buf[i] = abo;
+ }
+
+ sched = &priv->sched;
+ mutex_init(&priv->io_lock);
+ ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT,
+ HWCTX_MAX_CMDS, 0, msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
+ NULL, NULL, hwctx->name, xdna->ddev.dev);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
+ goto free_cmd_bufs;
+ }
+
+ ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (ret) {
+ XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
+ goto free_sched;
+ }
+
ret = aie2_hwctx_col_list(hwctx);
if (ret) {
XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
- goto unpin;
+ goto free_entity;
}
ret = aie2_alloc_resource(hwctx);
@@ -175,6 +606,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
goto release_resource;
}
+
+ ret = aie2_ctx_syncobj_create(hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
+ goto release_resource;
+ }
+
hwctx->status = HWCTX_STAT_INIT;
XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
@@ -185,7 +623,16 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
aie2_release_resource(hwctx);
free_col_list:
kfree(hwctx->col_list);
-unpin:
+free_entity:
+ drm_sched_entity_destroy(&priv->entity);
+free_sched:
+ drm_sched_fini(&priv->sched);
+free_cmd_bufs:
+ for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
+ if (!priv->cmd_buf[i])
+ continue;
+ drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
+ }
amdxdna_gem_unpin(heap);
put_heap:
drm_gem_object_put(to_gobj(heap));
@@ -196,11 +643,35 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
{
+ struct amdxdna_dev *xdna;
+ int idx;
+
+ xdna = hwctx->client->xdna;
+ drm_sched_wqueue_stop(&hwctx->priv->sched);
+
+ /* Now, scheduler will not send command to device. */
aie2_release_resource(hwctx);
+ /*
+ * All submitted commands are aborted.
+ * Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run()
+ * will return NODEV if it is called.
+ */
+ drm_sched_wqueue_start(&hwctx->priv->sched);
+
+ aie2_hwctx_wait_for_idle(hwctx);
+ drm_sched_entity_destroy(&hwctx->priv->entity);
+ drm_sched_fini(&hwctx->priv->sched);
+ aie2_ctx_syncobj_destroy(hwctx);
+
+ XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
+
+ for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
+ drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
amdxdna_gem_unpin(hwctx->priv->heap);
drm_gem_object_put(to_gobj(hwctx->priv->heap));
+ mutex_destroy(&hwctx->priv->io_lock);
kfree(hwctx->col_list);
kfree(hwctx->priv);
kfree(hwctx->cus);
@@ -267,3 +738,160 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu
return -EOPNOTSUPP;
}
}
+
+static int aie2_populate_range(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ struct mm_struct *mm = abo->mem.notifier.mm;
+ struct hmm_range range = { 0 };
+ unsigned long timeout;
+ int ret;
+
+ XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx",
+ abo->mem.userptr, abo->mem.size);
+ range.notifier = &abo->mem.notifier;
+ range.start = abo->mem.userptr;
+ range.end = abo->mem.userptr + abo->mem.size;
+ range.hmm_pfns = abo->mem.pfns;
+ range.default_flags = HMM_PFN_REQ_FAULT;
+
+ if (!mmget_not_zero(mm))
+ return -EFAULT;
+
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+again:
+ range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier);
+ mmap_read_lock(mm);
+ ret = hmm_range_fault(&range);
+ mmap_read_unlock(mm);
+ if (ret) {
+ if (time_after(jiffies, timeout)) {
+ ret = -ETIME;
+ goto put_mm;
+ }
+
+ if (ret == -EBUSY)
+ goto again;
+
+ goto put_mm;
+ }
+
+ read_lock(&xdna->notifier_lock);
+ if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) {
+ read_unlock(&xdna->notifier_lock);
+ goto again;
+ }
+ abo->mem.map_invalid = false;
+ read_unlock(&xdna->notifier_lock);
+
+put_mm:
+ mmput(mm);
+ return ret;
+}
+
+static void aie2_hwctx_push_job(struct amdxdna_sched_job *job, u64 *seq)
+{
+ struct amdxdna_hwctx *hwctx = job->hwctx;
+
+ mutex_lock(&hwctx->priv->io_lock);
+ drm_sched_job_arm(&job->base);
+ job->seq = hwctx->priv->seq++;
+ *seq = job->seq;
+
+ job->out_fence = dma_fence_get(&job->base.s_fence->finished);
+ drm_sched_entity_push_job(&job->base);
+ aie2_ctx_syncobj_add_fence(hwctx, job->out_fence, *seq);
+ mutex_unlock(&hwctx->priv->io_lock);
+}
+
+int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
+{
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct ww_acquire_ctx acquire_ctx;
+ struct amdxdna_gem_obj *abo;
+ unsigned long timeout = 0;
+ int ret, i;
+
+ ret = down_interruptible(&hwctx->priv->job_sem);
+ if (ret) {
+ XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
+ return ret;
+ }
+
+ ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
+ if (ret) {
+ XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
+ goto up_sem;
+ }
+
+retry:
+ ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+ if (ret) {
+ XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
+ goto cleanup_job;
+ }
+
+ for (i = 0; i < job->bo_cnt; i++) {
+ ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
+ if (ret) {
+ XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
+ drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+ goto cleanup_job;
+ }
+ }
+
+ read_lock(&xdna->notifier_lock);
+ for (i = 0; i < job->bo_cnt; i++) {
+ abo = to_xdna_obj(job->bos[i]);
+ if (abo->mem.map_invalid) {
+ read_unlock(&xdna->notifier_lock);
+ drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+ if (!timeout) {
+ timeout = jiffies +
+ msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+ } else if (time_after(jiffies, timeout)) {
+ ret = -ETIME;
+ goto cleanup_job;
+ }
+
+ ret = aie2_populate_range(abo);
+ if (ret)
+ goto cleanup_job;
+ goto retry;
+ }
+ }
+
+ for (i = 0; i < job->bo_cnt; i++)
+ dma_resv_add_fence(job->bos[i]->resv, job->fence, DMA_RESV_USAGE_WRITE);
+
+ read_unlock(&xdna->notifier_lock);
+ drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
+
+ aie2_hwctx_push_job(job, seq);
+
+ return 0;
+
+cleanup_job:
+ drm_sched_job_cleanup(&job->base);
+up_sem:
+ up(&hwctx->priv->job_sem);
+ job->job_done = true;
+ return ret;
+}
+
+void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
+ unsigned long cur_seq)
+{
+ struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+ struct drm_gem_object *gobj = to_gobj(abo);
+ long ret;
+
+ write_lock(&xdna->notifier_lock);
+ abo->mem.map_invalid = true;
+ mmu_interval_set_seq(&abo->mem.notifier, cur_seq);
+ write_unlock(&xdna->notifier_lock);
+ ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
+ true, MAX_SCHEDULE_TIMEOUT);
+ if (!ret || ret == -ERESTARTSYS)
+ XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
+}
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 40d9e4261e8b..db62954eb378 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -4,10 +4,12 @@
*/
#include <drm/amdxdna_accel.h>
+#include <drm/drm_cache.h>
#include <drm/drm_device.h>
#include <drm/drm_gem.h>
#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
#include <linux/bitfield.h>
#include <linux/errno.h>
#include <linux/pci.h>
@@ -362,3 +364,344 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx)
msg.opcode, resp.status, ret);
return ret;
}
+
+int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ union {
+ struct execute_buffer_req ebuf;
+ struct exec_dpu_req dpu;
+ } req;
+ struct xdna_mailbox_msg msg;
+ u32 payload_len;
+ void *payload;
+ int cu_idx;
+ int ret;
+ u32 op;
+
+ if (!chann)
+ return -ENODEV;
+
+ payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
+ if (!payload) {
+ XDNA_ERR(xdna, "Invalid command, cannot get payload");
+ return -EINVAL;
+ }
+
+ cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
+ if (cu_idx < 0) {
+ XDNA_DBG(xdna, "Invalid cu idx");
+ return -EINVAL;
+ }
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ switch (op) {
+ case ERT_START_CU:
+ if (unlikely(payload_len > sizeof(req.ebuf.payload)))
+ XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len);
+ req.ebuf.cu_idx = cu_idx;
+ memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
+ msg.send_size = sizeof(req.ebuf);
+ msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
+ break;
+ case ERT_START_NPU: {
+ struct amdxdna_cmd_start_npu *sn = payload;
+
+ if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload)))
+ XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len);
+ req.dpu.inst_buf_addr = sn->buffer;
+ req.dpu.inst_size = sn->buffer_size;
+ req.dpu.inst_prop_cnt = sn->prop_count;
+ req.dpu.cu_idx = cu_idx;
+ memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload));
+ msg.send_size = sizeof(req.dpu);
+ msg.opcode = MSG_OP_EXEC_DPU;
+ break;
+ }
+ default:
+ XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
+ return -EINVAL;
+ }
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
+ 0x40, false);
+
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
+ struct amdxdna_gem_obj *abo, u32 *size)
+{
+ struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
+ int cu_idx = amdxdna_cmd_get_cu_idx(abo);
+ u32 payload_len;
+ void *payload;
+
+ if (cu_idx < 0)
+ return -EINVAL;
+
+ payload = amdxdna_cmd_get_payload(abo, &payload_len);
+ if (!payload)
+ return -EINVAL;
+
+ if (!slot_cf_has_space(offset, payload_len))
+ return -ENOSPC;
+
+ buf->cu_idx = cu_idx;
+ buf->arg_cnt = payload_len / sizeof(u32);
+ memcpy(buf->args, payload, payload_len);
+ /* Accurate buf size to hint firmware to do necessary copy */
+ *size = sizeof(*buf) + payload_len;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
+ struct amdxdna_gem_obj *abo, u32 *size)
+{
+ struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
+ int cu_idx = amdxdna_cmd_get_cu_idx(abo);
+ struct amdxdna_cmd_start_npu *sn;
+ u32 payload_len;
+ void *payload;
+ u32 arg_sz;
+
+ if (cu_idx < 0)
+ return -EINVAL;
+
+ payload = amdxdna_cmd_get_payload(abo, &payload_len);
+ if (!payload)
+ return -EINVAL;
+ sn = payload;
+ arg_sz = payload_len - sizeof(*sn);
+ if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
+ return -EINVAL;
+
+ if (!slot_dpu_has_space(offset, arg_sz))
+ return -ENOSPC;
+
+ buf->inst_buf_addr = sn->buffer;
+ buf->inst_size = sn->buffer_size;
+ buf->inst_prop_cnt = sn->prop_count;
+ buf->cu_idx = cu_idx;
+ buf->arg_cnt = arg_sz / sizeof(u32);
+ memcpy(buf->args, sn->prop_args, arg_sz);
+
+ /* Accurate buf size to hint firmware to do necessary copy */
+ *size += sizeof(*buf) + arg_sz;
+ return 0;
+}
+
+static int
+aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset,
+ struct amdxdna_gem_obj *abo, u32 *size)
+{
+ u32 this_op = amdxdna_cmd_get_op(abo);
+ void *cmd_buf = cmdbuf_abo->mem.kva;
+ int ret;
+
+ if (this_op != op) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ switch (op) {
+ case ERT_START_CU:
+ ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size);
+ break;
+ case ERT_START_NPU:
+ ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+done:
+ if (ret) {
+ XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d",
+ op, ret);
+ }
+ return ret;
+}
+
+static inline struct amdxdna_gem_obj *
+aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
+{
+ int idx = get_job_idx(job->seq);
+
+ return job->hwctx->priv->cmd_buf[idx];
+}
+
+static void
+aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
+ struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt)
+{
+ req->buf_addr = cmdbuf_abo->mem.dev_addr;
+ req->buf_size = size;
+ req->count = cnt;
+ drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
+ XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d",
+ req->buf_addr, size, cnt);
+}
+
+static inline u32
+aie2_cmd_op_to_msg_op(u32 op)
+{
+ switch (op) {
+ case ERT_START_CU:
+ return MSG_OP_CHAIN_EXEC_BUFFER_CF;
+ case ERT_START_NPU:
+ return MSG_OP_CHAIN_EXEC_DPU;
+ default:
+ return MSG_OP_MAX_OPCODE;
+ }
+}
+
+int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t))
+{
+ struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_client *client = hwctx->client;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct amdxdna_cmd_chain *payload;
+ struct xdna_mailbox_msg msg;
+ struct cmd_chain_req req;
+ u32 payload_len;
+ u32 offset = 0;
+ u32 size;
+ int ret;
+ u32 op;
+ u32 i;
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
+ if (op != ERT_CMD_CHAIN || !payload ||
+ payload_len < struct_size(payload, data, payload->command_count))
+ return -EINVAL;
+
+ for (i = 0; i < payload->command_count; i++) {
+ u32 boh = (u32)(payload->data[i]);
+ struct amdxdna_gem_obj *abo;
+
+ abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
+ if (!abo) {
+ XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
+ return -ENOENT;
+ }
+
+ /* All sub-cmd should have same op, use the first one. */
+ if (i == 0)
+ op = amdxdna_cmd_get_op(abo);
+
+ ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size);
+ amdxdna_gem_put_obj(abo);
+ if (ret)
+ return -EINVAL;
+
+ offset += size;
+ }
+
+ /* The offset is the accumulated total size of the cmd buffer */
+ aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count);
+
+ msg.opcode = aie2_cmd_op_to_msg_op(op);
+ if (msg.opcode == MSG_OP_MAX_OPCODE)
+ return -EOPNOTSUPP;
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(hwctx->client->xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t))
+{
+ struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+ struct xdna_mailbox_msg msg;
+ struct cmd_chain_req req;
+ u32 size;
+ int ret;
+ u32 op;
+
+ op = amdxdna_cmd_get_op(cmd_abo);
+ ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size);
+ if (ret)
+ return ret;
+
+ aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
+
+ msg.opcode = aie2_cmd_op_to_msg_op(op);
+ if (msg.opcode == MSG_OP_MAX_OPCODE)
+ return -EOPNOTSUPP;
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(hwctx->client->xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t))
+{
+ struct mailbox_channel *chann = hwctx->priv->mbox_chann;
+ struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
+ struct amdxdna_dev *xdna = hwctx->client->xdna;
+ struct xdna_mailbox_msg msg;
+ struct sync_bo_req req;
+ int ret = 0;
+
+ req.src_addr = 0;
+ req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr;
+ req.size = abo->mem.size;
+
+ /* Device to Host */
+ req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) |
+ FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM);
+
+ XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed",
+ req.size, req.src_addr, req.dst_addr);
+
+ msg.handle = job;
+ msg.notify_cb = notify_cb;
+ msg.send_data = (u8 *)&req;
+ msg.send_size = sizeof(req);
+ msg.opcode = MSG_OP_SYNC_BO;
+
+ ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
+ if (ret) {
+ XDNA_ERR(xdna, "Send message failed");
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index caeb07d1dc6b..fb369d615969 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -5,8 +5,10 @@
#include <drm/amdxdna_accel.h>
#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
#include <linux/errno.h>
#include <linux/firmware.h>
#include <linux/iommu.h>
@@ -17,6 +19,7 @@
#include "aie2_pci.h"
#include "aie2_solver.h"
#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_pci_drv.h"
@@ -495,4 +498,6 @@ const struct amdxdna_dev_ops aie2_ops = {
.hwctx_init = aie2_hwctx_init,
.hwctx_fini = aie2_hwctx_fini,
.hwctx_config = aie2_hwctx_config,
+ .cmd_submit = aie2_cmd_submit,
+ .hmm_invalidate = aie2_hmm_invalidate,
};
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 3ac936e2c9d1..bc6910875d9d 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -6,6 +6,8 @@
#ifndef _AIE2_PCI_H_
#define _AIE2_PCI_H_
+#include <linux/semaphore.h>
+
#include "amdxdna_mailbox.h"
#define AIE2_INTERVAL 20000 /* us */
@@ -76,8 +78,10 @@ enum psp_reg_idx {
PSP_MAX_REGS /* Keep this at the end */
};
+struct amdxdna_client;
struct amdxdna_fw_ver;
struct amdxdna_hwctx;
+struct amdxdna_sched_job;
struct psp_config {
const void *fw_buf;
@@ -118,9 +122,31 @@ struct rt_config {
u32 value;
};
+/*
+ * Define the maximum number of pending commands in a hardware context.
+ * Must be power of 2!
+ */
+#define HWCTX_MAX_CMDS 4
+#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1))
struct amdxdna_hwctx_priv {
struct amdxdna_gem_obj *heap;
void *mbox_chann;
+
+ struct drm_gpu_scheduler sched;
+ struct drm_sched_entity entity;
+
+ struct mutex io_lock; /* protect seq and cmd order */
+ struct wait_queue_head job_free_wq;
+ u32 num_pending;
+ u64 seq;
+ struct semaphore job_sem;
+ bool job_done;
+
+ /* Completed job counter */
+ u64 completed;
+
+ struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS];
+ struct drm_syncobj *syncobj;
};
struct amdxdna_dev_hdl {
@@ -199,10 +225,24 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
int aie2_config_cu(struct amdxdna_hwctx *hwctx);
+int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t));
+int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t));
+int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
+ struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t));
+int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
+ int (*notify_cb)(void *, const u32 *, size_t));
/* aie2_hwctx.c */
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
+int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
+void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
+void aie2_stop_ctx_by_col_map(struct amdxdna_client *client, u32 col_map);
+void aie2_restart_ctx(struct amdxdna_client *client);
#endif /* _AIE2_PCI_H_ */
diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c
index b03501e81065..dc3a072ce3b6 100644
--- a/drivers/accel/amdxdna/aie2_psp.c
+++ b/drivers/accel/amdxdna/aie2_psp.c
@@ -4,8 +4,10 @@
*/
#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
#include <linux/bitfield.h>
#include <linux/iopoll.h>
diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c
index 3fa7064649aa..91893d438da7 100644
--- a/drivers/accel/amdxdna/aie2_smu.c
+++ b/drivers/accel/amdxdna/aie2_smu.c
@@ -4,7 +4,9 @@
*/
#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
#include <linux/iopoll.h>
#include "aie2_pci.h"
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
index 9489399adea1..13cfbab9caa0 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.c
+++ b/drivers/accel/amdxdna/amdxdna_ctx.c
@@ -7,17 +7,65 @@
#include <drm/drm_device.h>
#include <drm/drm_drv.h>
#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <trace/events/amdxdna.h>
#include "amdxdna_ctx.h"
+#include "amdxdna_gem.h"
#include "amdxdna_pci_drv.h"
#define MAX_HWCTX_ID 255
+#define MAX_ARG_COUNT 4095
-static void amdxdna_hwctx_destroy(struct amdxdna_hwctx *hwctx)
+struct amdxdna_fence {
+ struct dma_fence base;
+ spinlock_t lock; /* for base */
+ struct amdxdna_hwctx *hwctx;
+};
+
+static const char *amdxdna_fence_get_driver_name(struct dma_fence *fence)
+{
+ return KBUILD_MODNAME;
+}
+
+static const char *amdxdna_fence_get_timeline_name(struct dma_fence *fence)
+{
+ struct amdxdna_fence *xdna_fence;
+
+ xdna_fence = container_of(fence, struct amdxdna_fence, base);
+
+ return xdna_fence->hwctx->name;
+}
+
+static const struct dma_fence_ops fence_ops = {
+ .get_driver_name = amdxdna_fence_get_driver_name,
+ .get_timeline_name = amdxdna_fence_get_timeline_name,
+};
+
+static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx)
+{
+ struct amdxdna_fence *fence;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return NULL;
+
+ fence->hwctx = hwctx;
+ spin_lock_init(&fence->lock);
+ dma_fence_init(&fence->base, &fence_ops, &fence->lock, hwctx->id, 0);
+ return &fence->base;
+}
+
+static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx,
+ struct srcu_struct *ss)
{
struct amdxdna_dev *xdna = hwctx->client->xdna;
+ synchronize_srcu(ss);
+
/* At this point, user is not able to submit new commands */
mutex_lock(&xdna->dev_lock);
xdna->dev_info->ops->hwctx_fini(hwctx);
@@ -27,6 +75,46 @@ static void amdxdna_hwctx_destroy(struct amdxdna_hwctx *hwctx)
kfree(hwctx);
}
+void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+ u32 num_masks, count;
+
+ if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
+ num_masks = 0;
+ else
+ num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
+
+ if (size) {
+ count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header);
+ if (unlikely(count <= num_masks)) {
+ *size = 0;
+ return NULL;
+ }
+ *size = (count - num_masks) * sizeof(u32);
+ }
+ return &cmd->data[num_masks];
+}
+
+int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+ u32 num_masks, i;
+ u32 *cu_mask;
+
+ if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
+ return -1;
+
+ num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
+ cu_mask = cmd->data;
+ for (i = 0; i < num_masks; i++) {
+ if (cu_mask[i])
+ return ffs(cu_mask[i]) - 1;
+ }
+
+ return -1;
+}
+
/*
* This should be called in close() and remove(). DO NOT call in other syscalls.
* This guarantee that when hwctx and resources will be released, if user
@@ -43,7 +131,7 @@ void amdxdna_hwctx_remove_all(struct amdxdna_client *client)
client->pid, hwctx->id);
idr_remove(&client->hwctx_idr, hwctx->id);
mutex_unlock(&client->hwctx_lock);
- amdxdna_hwctx_destroy(hwctx);
+ amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
mutex_lock(&client->hwctx_lock);
}
mutex_unlock(&client->hwctx_lock);
@@ -135,6 +223,12 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d
if (!drm_dev_enter(dev, &idx))
return -ENODEV;
+ /*
+ * Use hwctx_lock to achieve exclusion with other hwctx writers,
+ * SRCU to synchronize with exec/wait command ioctls.
+ *
+ * The pushed jobs are handled by DRM scheduler during destroy.
+ */
mutex_lock(&client->hwctx_lock);
hwctx = idr_find(&client->hwctx_idr, args->handle);
if (!hwctx) {
@@ -147,7 +241,7 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d
idr_remove(&client->hwctx_idr, hwctx->id);
mutex_unlock(&client->hwctx_lock);
- amdxdna_hwctx_destroy(hwctx);
+ amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle);
out:
@@ -161,10 +255,10 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
struct amdxdna_drm_config_hwctx *args = data;
struct amdxdna_dev *xdna = to_xdna_dev(dev);
struct amdxdna_hwctx *hwctx;
+ int ret, idx;
u32 buf_size;
void *buf;
u64 val;
- int ret;
if (!xdna->dev_info->ops->hwctx_config)
return -EOPNOTSUPP;
@@ -203,17 +297,231 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
}
mutex_lock(&xdna->dev_lock);
+ idx = srcu_read_lock(&client->hwctx_srcu);
hwctx = idr_find(&client->hwctx_idr, args->handle);
if (!hwctx) {
XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
ret = -EINVAL;
- goto unlock;
+ goto unlock_srcu;
}
ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size);
-unlock:
+unlock_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
mutex_unlock(&xdna->dev_lock);
kfree(buf);
return ret;
}
+
+static void
+amdxdna_arg_bos_put(struct amdxdna_sched_job *job)
+{
+ int i;
+
+ for (i = 0; i < job->bo_cnt; i++) {
+ if (!job->bos[i])
+ break;
+ drm_gem_object_put(job->bos[i]);
+ }
+}
+
+static int
+amdxdna_arg_bos_lookup(struct amdxdna_client *client,
+ struct amdxdna_sched_job *job,
+ u32 *bo_hdls, u32 bo_cnt)
+{
+ struct drm_gem_object *gobj;
+ int i, ret;
+
+ job->bo_cnt = bo_cnt;
+ for (i = 0; i < job->bo_cnt; i++) {
+ struct amdxdna_gem_obj *abo;
+
+ gobj = drm_gem_object_lookup(client->filp, bo_hdls[i]);
+ if (!gobj) {
+ ret = -ENOENT;
+ goto put_shmem_bo;
+ }
+ abo = to_xdna_obj(gobj);
+
+ mutex_lock(&abo->lock);
+ if (abo->pinned) {
+ mutex_unlock(&abo->lock);
+ job->bos[i] = gobj;
+ continue;
+ }
+
+ ret = amdxdna_gem_pin_nolock(abo);
+ if (ret) {
+ mutex_unlock(&abo->lock);
+ drm_gem_object_put(gobj);
+ goto put_shmem_bo;
+ }
+ abo->pinned = true;
+ mutex_unlock(&abo->lock);
+
+ job->bos[i] = gobj;
+ }
+
+ return 0;
+
+put_shmem_bo:
+ amdxdna_arg_bos_put(job);
+ return ret;
+}
+
+void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job)
+{
+ trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release");
+ amdxdna_arg_bos_put(job);
+ amdxdna_gem_put_obj(job->cmd_bo);
+}
+
+int amdxdna_cmd_submit(struct amdxdna_client *client,
+ u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt,
+ u32 hwctx_hdl, u64 *seq)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ struct amdxdna_sched_job *job;
+ struct amdxdna_hwctx *hwctx;
+ int ret, idx;
+
+ XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt);
+ job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL);
+ if (!job)
+ return -ENOMEM;
+
+ if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) {
+ job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD);
+ if (!job->cmd_bo) {
+ XDNA_ERR(xdna, "Failed to get cmd bo from %d", cmd_bo_hdl);
+ ret = -EINVAL;
+ goto free_job;
+ }
+ } else {
+ job->cmd_bo = NULL;
+ }
+
+ ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt);
+ if (ret) {
+ XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret);
+ goto cmd_put;
+ }
+
+ idx = srcu_read_lock(&client->hwctx_srcu);
+ hwctx = idr_find(&client->hwctx_idr, hwctx_hdl);
+ if (!hwctx) {
+ XDNA_DBG(xdna, "PID %d failed to get hwctx %d",
+ client->pid, hwctx_hdl);
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ if (hwctx->status != HWCTX_STAT_READY) {
+ XDNA_ERR(xdna, "HW Context is not ready");
+ ret = -EINVAL;
+ goto unlock_srcu;
+ }
+
+ job->hwctx = hwctx;
+ job->mm = current->mm;
+
+ job->fence = amdxdna_fence_create(hwctx);
+ if (!job->fence) {
+ XDNA_ERR(xdna, "Failed to create fence");
+ ret = -ENOMEM;
+ goto unlock_srcu;
+ }
+ kref_init(&job->refcnt);
+
+ ret = xdna->dev_info->ops->cmd_submit(hwctx, job, seq);
+ if (ret)
+ goto put_fence;
+
+ /*
+ * The amdxdna_hwctx_destroy_rcu() will release hwctx and associated
+ * resource after synchronize_srcu(). The submitted jobs should be
+ * handled by the queue, for example DRM scheduler, in device layer.
+ * For here we can unlock SRCU.
+ */
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ trace_amdxdna_debug_point(hwctx->name, *seq, "job pushed");
+
+ return 0;
+
+put_fence:
+ dma_fence_put(job->fence);
+unlock_srcu:
+ srcu_read_unlock(&client->hwctx_srcu, idx);
+ amdxdna_arg_bos_put(job);
+cmd_put:
+ amdxdna_gem_put_obj(job->cmd_bo);
+free_job:
+ kfree(job);
+ return ret;
+}
+
+/*
+ * The submit command ioctl submits a command to firmware. One firmware command
+ * may contain multiple command BOs for processing as a whole.
+ * The command sequence number is returned which can be used for wait command ioctl.
+ */
+static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
+ struct amdxdna_drm_exec_cmd *args)
+{
+ struct amdxdna_dev *xdna = client->xdna;
+ u32 *arg_bo_hdls;
+ u32 cmd_bo_hdl;
+ int ret;
+
+ if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) {
+ XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count);
+ return -EINVAL;
+ }
+
+ /* Only support single command for now. */
+ if (args->cmd_count != 1) {
+ XDNA_ERR(xdna, "Invalid cmd bo count %d", args->cmd_count);
+ return -EINVAL;
+ }
+
+ cmd_bo_hdl = (u32)args->cmd_handles;
+ arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL);
+ if (!arg_bo_hdls)
+ return -ENOMEM;
+ ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args),
+ args->arg_count * sizeof(u32));
+ if (ret) {
+ ret = -EFAULT;
+ goto free_cmd_bo_hdls;
+ }
+
+ ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls,
+ args->arg_count, args->hwctx, &args->seq);
+ if (ret)
+ XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret);
+
+free_cmd_bo_hdls:
+ kfree(arg_bo_hdls);
+ if (!ret)
+ XDNA_DBG(xdna, "Pushed cmd %lld to scheduler", args->seq);
+ return ret;
+}
+
+int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdxdna_client *client = filp->driver_priv;
+ struct amdxdna_drm_exec_cmd *args = data;
+
+ if (args->ext || args->ext_flags)
+ return -EINVAL;
+
+ switch (args->type) {
+ case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
+ return amdxdna_drm_submit_execbuf(client, args);
+ }
+
+ XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
+ return -EINVAL;
+}
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index b409d0731ab8..18ed9bdbd6d8 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -6,6 +6,54 @@
#ifndef _AMDXDNA_CTX_H_
#define _AMDXDNA_CTX_H_
+#include <linux/bitfield.h>
+
+#include "amdxdna_gem.h"
+
+struct amdxdna_hwctx_priv;
+
+enum ert_cmd_opcode {
+ ERT_START_CU = 0,
+ ERT_CMD_CHAIN = 19,
+ ERT_START_NPU = 20,
+};
+
+enum ert_cmd_state {
+ ERT_CMD_STATE_INVALID,
+ ERT_CMD_STATE_NEW,
+ ERT_CMD_STATE_QUEUED,
+ ERT_CMD_STATE_RUNNING,
+ ERT_CMD_STATE_COMPLETED,
+ ERT_CMD_STATE_ERROR,
+ ERT_CMD_STATE_ABORT,
+ ERT_CMD_STATE_SUBMITTED,
+ ERT_CMD_STATE_TIMEOUT,
+ ERT_CMD_STATE_NORESPONSE,
+};
+
+/*
+ * Interpretation of the beginning of data payload for ERT_START_NPU in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args.
+ */
+struct amdxdna_cmd_start_npu {
+ u64 buffer; /* instruction buffer address */
+ u32 buffer_size; /* size of buffer in bytes */
+ u32 prop_count; /* properties count */
+ u32 prop_args[]; /* properties and regular kernel arguments */
+};
+
+/*
+ * Interpretation of the beginning of data payload for ERT_CMD_CHAIN in
+ * amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles.
+ */
+struct amdxdna_cmd_chain {
+ u32 command_count;
+ u32 submit_index;
+ u32 error_index;
+ u32 reserved[3];
+ u64 data[] __counted_by(command_count);
+};
+
/* Exec buffer command header format */
#define AMDXDNA_CMD_STATE GENMASK(3, 0)
#define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10)
@@ -41,9 +89,72 @@ struct amdxdna_hwctx {
u32 syncobj_hdl;
};
+#define drm_job_to_xdna_job(j) \
+ container_of(j, struct amdxdna_sched_job, base)
+
+struct amdxdna_sched_job {
+ struct drm_sched_job base;
+ struct kref refcnt;
+ struct amdxdna_hwctx *hwctx;
+ struct mm_struct *mm;
+ /* The fence to notice DRM scheduler that job is done by hardware */
+ struct dma_fence *fence;
+ /* user can wait on this fence */
+ struct dma_fence *out_fence;
+ bool job_done;
+ u64 seq;
+ struct amdxdna_gem_obj *cmd_bo;
+ size_t bo_cnt;
+ struct drm_gem_object *bos[] __counted_by(bo_cnt);
+};
+
+static inline u32
+amdxdna_cmd_get_op(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+
+ return FIELD_GET(AMDXDNA_CMD_OPCODE, cmd->header);
+}
+
+static inline void
+amdxdna_cmd_set_state(struct amdxdna_gem_obj *abo, enum ert_cmd_state s)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+
+ cmd->header &= ~AMDXDNA_CMD_STATE;
+ cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, s);
+}
+
+static inline enum ert_cmd_state
+amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
+{
+ struct amdxdna_cmd *cmd = abo->mem.kva;
+
+ return FIELD_GET(AMDXDNA_CMD_STATE, cmd->header);
+}
+
+void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
+int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
+
+static inline u32 amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx)
+{
+ return GENMASK(hwctx->start_col + hwctx->num_col - 1,
+ hwctx->start_col);
+}
+
+void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
+
+int amdxdna_cmd_submit(struct amdxdna_client *client,
+ u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt,
+ u32 hwctx_hdl, u64 *seq);
+
+int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
+ u64 seq, u32 timeout);
+
int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
#endif /* _AMDXDNA_CTX_H_ */
diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c
index f2ba86ae9e1a..4dfeca306d98 100644
--- a/drivers/accel/amdxdna/amdxdna_gem.c
+++ b/drivers/accel/amdxdna/amdxdna_gem.c
@@ -8,6 +8,7 @@
#include <drm/drm_device.h>
#include <drm/drm_gem.h>
#include <drm/drm_gem_shmem_helper.h>
+#include <drm/gpu_scheduler.h>
#include <linux/iosys-map.h>
#include <linux/vmalloc.h>
diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c
index 42b615394605..5139a9c96a91 100644
--- a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c
+++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c
@@ -3,10 +3,15 @@
* Copyright (C) 2024, Advanced Micro Devices, Inc.
*/
+#include <drm/amdxdna_accel.h>
#include <drm/drm_device.h>
#include <drm/drm_print.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/gpu_scheduler.h>
#include <linux/completion.h>
+#include "amdxdna_gem.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_mailbox_helper.h"
#include "amdxdna_pci_drv.h"
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
index 172109cc9617..32a58bb6e6b1 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
@@ -10,6 +10,7 @@
#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_ioctl.h>
#include <drm/drm_managed.h>
+#include <drm/gpu_scheduler.h>
#include <linux/iommu.h>
#include <linux/pci.h>
@@ -64,6 +65,7 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
goto unbind_sva;
}
mutex_init(&client->hwctx_lock);
+ init_srcu_struct(&client->hwctx_srcu);
idr_init_base(&client->hwctx_idr, AMDXDNA_INVALID_CTX_HANDLE + 1);
mutex_init(&client->mm_lock);
@@ -93,6 +95,7 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
XDNA_DBG(xdna, "closing pid %d", client->pid);
idr_destroy(&client->hwctx_idr);
+ cleanup_srcu_struct(&client->hwctx_srcu);
mutex_destroy(&client->hwctx_lock);
mutex_destroy(&client->mm_lock);
if (client->dev_heap)
@@ -133,6 +136,8 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_BO, amdxdna_drm_create_bo_ioctl, 0),
DRM_IOCTL_DEF_DRV(AMDXDNA_GET_BO_INFO, amdxdna_drm_get_bo_info_ioctl, 0),
DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
+ /* Execution */
+ DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
};
static const struct file_operations amdxdna_fops = {
@@ -190,9 +195,16 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return -ENODEV;
drmm_mutex_init(&xdna->ddev, &xdna->dev_lock);
+ rwlock_init(&xdna->notifier_lock);
INIT_LIST_HEAD(&xdna->client_list);
pci_set_drvdata(pdev, xdna);
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&xdna->notifier_lock);
+ fs_reclaim_release(GFP_KERNEL);
+ }
+
mutex_lock(&xdna->dev_lock);
ret = xdna->dev_info->ops->init(xdna);
mutex_unlock(&xdna->dev_lock);
diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h
index 3dddde4ac12a..ec22a074aac6 100644
--- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
+++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
@@ -20,6 +20,7 @@ extern const struct drm_driver amdxdna_drm_drv;
struct amdxdna_dev;
struct amdxdna_gem_obj;
struct amdxdna_hwctx;
+struct amdxdna_sched_job;
/*
* struct amdxdna_dev_ops - Device hardware operation callbacks
@@ -31,6 +32,7 @@ struct amdxdna_dev_ops {
void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
+ int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
};
/*
@@ -69,6 +71,7 @@ struct amdxdna_dev {
struct mutex dev_lock; /* per device lock */
struct list_head client_list;
struct amdxdna_fw_ver fw_ver;
+ rwlock_t notifier_lock; /* for mmu notifier*/
};
/*
@@ -88,6 +91,8 @@ struct amdxdna_client {
struct list_head node;
pid_t pid;
struct mutex hwctx_lock; /* protect hwctx */
+ /* do NOT wait this srcu when hwctx_lock is hold */
+ struct srcu_struct hwctx_srcu;
struct idr hwctx_idr;
struct amdxdna_dev *xdna;
struct drm_file *filp;
diff --git a/drivers/accel/amdxdna/amdxdna_sysfs.c b/drivers/accel/amdxdna/amdxdna_sysfs.c
index 668b94b92714..f27e4ee960a0 100644
--- a/drivers/accel/amdxdna/amdxdna_sysfs.c
+++ b/drivers/accel/amdxdna/amdxdna_sysfs.c
@@ -3,9 +3,14 @@
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
*/
+#include <drm/amdxdna_accel.h>
#include <drm/drm_device.h>
+#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/types.h>
+#include "amdxdna_gem.h"
#include "amdxdna_pci_drv.h"
static ssize_t vbnv_show(struct device *dev, struct device_attribute *attr, char *buf)
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index 720aab0ed7c4..f00c50461b09 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -5,6 +5,7 @@
#include <drm/amdxdna_accel.h>
#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
#include <linux/sizes.h>
#include "aie2_pci.h"
diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
index f3ea18bcf294..00cb381031d2 100644
--- a/drivers/accel/amdxdna/npu2_regs.c
+++ b/drivers/accel/amdxdna/npu2_regs.c
@@ -5,6 +5,7 @@
#include <drm/amdxdna_accel.h>
#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
#include <linux/sizes.h>
#include "aie2_pci.h"
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index db61142f0d4e..b6dae9667cca 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -5,6 +5,7 @@
#include <drm/amdxdna_accel.h>
#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
#include <linux/sizes.h>
#include "aie2_pci.h"
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
index debf4e95b9bb..bed1baf8e160 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -5,6 +5,7 @@
#include <drm/amdxdna_accel.h>
#include <drm/drm_device.h>
+#include <drm/gpu_scheduler.h>
#include <linux/sizes.h>
#include "aie2_pci.h"
diff --git a/include/trace/events/amdxdna.h b/include/trace/events/amdxdna.h
index 33343d8f0622..c6cb2da7b706 100644
--- a/include/trace/events/amdxdna.h
+++ b/include/trace/events/amdxdna.h
@@ -9,8 +9,49 @@
#if !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_AMDXDNA_H
+#include <drm/gpu_scheduler.h>
#include <linux/tracepoint.h>
+TRACE_EVENT(amdxdna_debug_point,
+ TP_PROTO(const char *name, u64 number, const char *str),
+
+ TP_ARGS(name, number, str),
+
+ TP_STRUCT__entry(__string(name, name)
+ __field(u64, number)
+ __string(str, str)),
+
+ TP_fast_assign(__assign_str(name);
+ __entry->number = number;
+ __assign_str(str);),
+
+ TP_printk("%s:%llu %s", __get_str(name), __entry->number,
+ __get_str(str))
+);
+
+TRACE_EVENT(xdna_job,
+ TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq),
+
+ TP_ARGS(sched_job, name, str, seq),
+
+ TP_STRUCT__entry(__string(name, name)
+ __string(str, str)
+ __field(u64, fence_context)
+ __field(u64, fence_seqno)
+ __field(u64, seq)),
+
+ TP_fast_assign(__assign_str(name);
+ __assign_str(str);
+ __entry->fence_context = sched_job->s_fence->finished.context;
+ __entry->fence_seqno = sched_job->s_fence->finished.seqno;
+ __entry->seq = seq;),
+
+ TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s",
+ __entry->fence_context, __entry->fence_seqno,
+ __get_str(name), __entry->seq,
+ __get_str(str))
+);
+
DECLARE_EVENT_CLASS(xdna_mbox_msg,
TP_PROTO(char *name, u8 chann_id, u32 opcode, u32 msg_id),
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index e3e78b79a8e7..3e88ed386fac 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -13,9 +13,11 @@
extern "C" {
#endif
+#define AMDXDNA_INVALID_CMD_HANDLE (~0UL)
#define AMDXDNA_INVALID_ADDR (~0UL)
#define AMDXDNA_INVALID_CTX_HANDLE 0
#define AMDXDNA_INVALID_BO_HANDLE 0
+#define AMDXDNA_INVALID_FENCE_HANDLE 0
enum amdxdna_device_type {
AMDXDNA_DEV_TYPE_UNKNOWN = -1,
@@ -29,6 +31,7 @@ enum amdxdna_drm_ioctl_id {
DRM_AMDXDNA_CREATE_BO,
DRM_AMDXDNA_GET_BO_INFO,
DRM_AMDXDNA_SYNC_BO,
+ DRM_AMDXDNA_EXEC_CMD,
};
/**
@@ -201,6 +204,37 @@ struct amdxdna_drm_sync_bo {
__u64 size;
};
+enum amdxdna_cmd_type {
+ AMDXDNA_CMD_SUBMIT_EXEC_BUF = 0,
+ AMDXDNA_CMD_SUBMIT_DEPENDENCY,
+ AMDXDNA_CMD_SUBMIT_SIGNAL,
+};
+
+/**
+ * struct amdxdna_drm_exec_cmd - Execute command.
+ * @ext: MBZ.
+ * @ext_flags: MBZ.
+ * @hwctx: Hardware context handle.
+ * @type: One of command type in enum amdxdna_cmd_type.
+ * @cmd_handles: Array of command handles or the command handle itself
+ * in case of just one.
+ * @args: Array of arguments for all command handles.
+ * @cmd_count: Number of command handles in the cmd_handles array.
+ * @arg_count: Number of arguments in the args array.
+ * @seq: Returned sequence number for this command.
+ */
+struct amdxdna_drm_exec_cmd {
+ __u64 ext;
+ __u64 ext_flags;
+ __u32 hwctx;
+ __u32 type;
+ __u64 cmd_handles;
+ __u64 args;
+ __u32 cmd_count;
+ __u32 arg_count;
+ __u64 seq;
+};
+
#define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \
struct amdxdna_drm_create_hwctx)
@@ -225,6 +259,10 @@ struct amdxdna_drm_sync_bo {
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SYNC_BO, \
struct amdxdna_drm_sync_bo)
+#define DRM_IOCTL_AMDXDNA_EXEC_CMD \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_EXEC_CMD, \
+ struct amdxdna_drm_exec_cmd)
+
#if defined(__cplusplus)
} /* extern c end */
#endif
--
2.34.1
On Thu, Nov 07, 2024 at 08:34:45PM -0800, Lizhi Hou wrote: > Add interfaces for user application to submit command and wait for its > completion. > > Co-developed-by: Min Ma <min.ma@amd.com> > Signed-off-by: Min Ma <min.ma@amd.com> > Signed-off-by: Lizhi Hou <lizhi.hou@amd.com> > --- > drivers/accel/amdxdna/aie2_ctx.c | 634 +++++++++++++++++- > drivers/accel/amdxdna/aie2_message.c | 343 ++++++++++ > drivers/accel/amdxdna/aie2_pci.c | 5 + > drivers/accel/amdxdna/aie2_pci.h | 40 ++ > drivers/accel/amdxdna/aie2_psp.c | 2 + > drivers/accel/amdxdna/aie2_smu.c | 2 + > drivers/accel/amdxdna/amdxdna_ctx.c | 320 ++++++++- > drivers/accel/amdxdna/amdxdna_ctx.h | 111 +++ > drivers/accel/amdxdna/amdxdna_gem.c | 1 + > .../accel/amdxdna/amdxdna_mailbox_helper.c | 5 + > drivers/accel/amdxdna/amdxdna_pci_drv.c | 12 + > drivers/accel/amdxdna/amdxdna_pci_drv.h | 5 + > drivers/accel/amdxdna/amdxdna_sysfs.c | 5 + > drivers/accel/amdxdna/npu1_regs.c | 1 + > drivers/accel/amdxdna/npu2_regs.c | 1 + > drivers/accel/amdxdna/npu4_regs.c | 1 + > drivers/accel/amdxdna/npu5_regs.c | 1 + > include/trace/events/amdxdna.h | 41 ++ > include/uapi/drm/amdxdna_accel.h | 38 ++ > 19 files changed, 1559 insertions(+), 9 deletions(-) > > diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c > index ae8a91dad042..4641e52b59e2 100644 > --- a/drivers/accel/amdxdna/aie2_ctx.c > +++ b/drivers/accel/amdxdna/aie2_ctx.c > @@ -8,8 +8,12 @@ > #include <drm/drm_gem.h> > #include <drm/drm_gem_shmem_helper.h> > #include <drm/drm_print.h> > +#include <drm/drm_syncobj.h> > +#include <linux/hmm.h> > #include <linux/types.h> > +#include <trace/events/amdxdna.h> > > +#include "aie2_msg_priv.h" > #include "aie2_pci.h" > #include "aie2_solver.h" > #include "amdxdna_ctx.h" > @@ -17,6 +21,342 @@ > #include "amdxdna_mailbox.h" > #include "amdxdna_pci_drv.h" > > +bool force_cmdlist; > +module_param(force_cmdlist, bool, 0600); > +MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)"); > + > +#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */ > + > +static void aie2_job_release(struct kref *ref) > +{ > + struct amdxdna_sched_job *job; > + > + job = container_of(ref, struct amdxdna_sched_job, refcnt); > + amdxdna_sched_job_cleanup(job); > + kfree(job); > +} > + > +static void aie2_job_put(struct amdxdna_sched_job *job) > +{ > + kref_put(&job->refcnt, aie2_job_release); > +} > + > +/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */ > +static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx, > + struct drm_sched_job *bad_job) > +{ > + drm_sched_stop(&hwctx->priv->sched, bad_job); > + aie2_destroy_context(xdna->dev_handle, hwctx); > +} > + > +static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) > +{ > + struct amdxdna_gem_obj *heap = hwctx->priv->heap; > + int ret; > + > + ret = aie2_create_context(xdna->dev_handle, hwctx); > + if (ret) { > + XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret); > + goto out; > + } > + > + ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, > + heap->mem.userptr, heap->mem.size); > + if (ret) { > + XDNA_ERR(xdna, "Map host buf failed, ret %d", ret); > + goto out; > + } > + > + if (hwctx->status != HWCTX_STAT_READY) { > + XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status); > + goto out; > + } > + > + ret = aie2_config_cu(hwctx); > + if (ret) { > + XDNA_ERR(xdna, "Config cu failed, ret %d", ret); > + goto out; > + } > + > +out: > + drm_sched_start(&hwctx->priv->sched); > + XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret); > + return ret; > +} > + > +void aie2_stop_ctx_by_col_map(struct amdxdna_client *client, u32 col_map) > +{ > + struct amdxdna_dev *xdna = client->xdna; > + struct amdxdna_hwctx *hwctx; > + int next = 0; > + > + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); > + mutex_lock(&client->hwctx_lock); > + idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) { > + /* check if the HW context uses the error column */ > + if (!(col_map & amdxdna_hwctx_col_map(hwctx))) > + continue; > + > + aie2_hwctx_stop(xdna, hwctx, NULL); > + hwctx->old_status = hwctx->status; > + hwctx->status = HWCTX_STAT_STOP; > + XDNA_DBG(xdna, "Stop %s", hwctx->name); > + } > + mutex_unlock(&client->hwctx_lock); > +} > + > +void aie2_restart_ctx(struct amdxdna_client *client) > +{ > + struct amdxdna_dev *xdna = client->xdna; > + struct amdxdna_hwctx *hwctx; > + int next = 0; > + > + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); > + mutex_lock(&client->hwctx_lock); > + idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) { > + if (hwctx->status != HWCTX_STAT_STOP) > + continue; > + > + hwctx->status = hwctx->old_status; > + XDNA_DBG(xdna, "Resetting %s", hwctx->name); > + aie2_hwctx_restart(xdna, hwctx); > + } > + mutex_unlock(&client->hwctx_lock); > +} > + > +static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq) > +{ > + struct dma_fence *fence, *out_fence = NULL; > + int ret; > + > + fence = drm_syncobj_fence_get(hwctx->priv->syncobj); > + if (!fence) > + return NULL; > + > + ret = dma_fence_chain_find_seqno(&fence, seq); > + if (ret) > + goto out; > + > + out_fence = dma_fence_get(dma_fence_chain_contained(fence)); > + > +out: > + dma_fence_put(fence); > + return out_fence; > +} > + > +static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx) > +{ > + struct dma_fence *fence; > + > + fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1); > + if (!fence) > + return; > + > + dma_fence_wait(fence, false); > + dma_fence_put(fence); > +} > + > +static void > +aie2_sched_notify(struct amdxdna_sched_job *job) > +{ > + struct dma_fence *fence = job->fence; > + > + trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq); > + job->hwctx->priv->completed++; > + dma_fence_signal(fence); > + > + up(&job->hwctx->priv->job_sem); > + job->job_done = true; > + dma_fence_put(fence); > + mmput(job->mm); > + aie2_job_put(job); > +} > + > +static int > +aie2_sched_resp_handler(void *handle, const u32 *data, size_t size) > +{ > + struct amdxdna_sched_job *job = handle; > + struct amdxdna_gem_obj *cmd_abo; > + u32 ret = 0; > + u32 status; > + > + cmd_abo = job->cmd_bo; > + > + if (unlikely(!data)) > + goto out; > + > + if (unlikely(size != sizeof(u32))) { > + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); > + ret = -EINVAL; > + goto out; > + } > + > + status = *data; > + XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); > + if (status == AIE2_STATUS_SUCCESS) > + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); > + else > + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR); > + > +out: > + aie2_sched_notify(job); > + return ret; > +} > + > +static int > +aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size) > +{ > + struct amdxdna_sched_job *job = handle; > + u32 ret = 0; > + u32 status; > + > + if (unlikely(!data)) > + goto out; > + > + if (unlikely(size != sizeof(u32))) { > + ret = -EINVAL; > + goto out; > + } > + > + status = *data; > + XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); > + > +out: > + aie2_sched_notify(job); > + return ret; > +} > + > +static int > +aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size) > +{ > + struct amdxdna_sched_job *job = handle; > + struct amdxdna_gem_obj *cmd_abo; > + struct cmd_chain_resp *resp; > + struct amdxdna_dev *xdna; > + u32 fail_cmd_status; > + u32 fail_cmd_idx; > + u32 ret = 0; > + > + cmd_abo = job->cmd_bo; > + if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) { > + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); > + ret = -EINVAL; > + goto out; > + } > + > + resp = (struct cmd_chain_resp *)data; > + xdna = job->hwctx->client->xdna; > + XDNA_DBG(xdna, "Status 0x%x", resp->status); > + if (resp->status == AIE2_STATUS_SUCCESS) { > + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); > + goto out; > + } > + > + /* Slow path to handle error, read from ringbuf on BAR */ > + fail_cmd_idx = resp->fail_cmd_idx; > + fail_cmd_status = resp->fail_cmd_status; > + XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x", > + fail_cmd_idx, fail_cmd_status); > + > + if (fail_cmd_status == AIE2_STATUS_SUCCESS) { > + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); > + ret = -EINVAL; > + goto out; > + } > + amdxdna_cmd_set_state(cmd_abo, fail_cmd_status); > + > + if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) { > + struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL); > + > + cc->error_index = fail_cmd_idx; > + if (cc->error_index >= cc->command_count) > + cc->error_index = 0; > + } > +out: > + aie2_sched_notify(job); > + return ret; > +} > + > +static struct dma_fence * > +aie2_sched_job_run(struct drm_sched_job *sched_job) > +{ > + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); > + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; > + struct amdxdna_hwctx *hwctx = job->hwctx; > + struct dma_fence *fence; > + int ret; > + > + if (!mmget_not_zero(job->mm)) > + return ERR_PTR(-ESRCH); > + > + kref_get(&job->refcnt); > + fence = dma_fence_get(job->fence); > + > + if (unlikely(!cmd_abo)) { > + ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler); > + goto out; > + } > + > + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW); > + > + if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) > + ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); > + else if (force_cmdlist) > + ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); > + else > + ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler); > + > +out: > + if (ret) { > + dma_fence_put(job->fence); > + aie2_job_put(job); > + mmput(job->mm); > + fence = ERR_PTR(ret); > + } > + trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq); > + > + return fence; > +} > + > +static void aie2_sched_job_free(struct drm_sched_job *sched_job) > +{ > + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); > + struct amdxdna_hwctx *hwctx = job->hwctx; > + > + trace_xdna_job(sched_job, hwctx->name, "job free", job->seq); > + if (!job->job_done) > + up(&hwctx->priv->job_sem); > + > + if (job->out_fence) > + dma_fence_put(job->out_fence); > + drm_sched_job_cleanup(sched_job); > + aie2_job_put(job); > +} > + > +static enum drm_gpu_sched_stat > +aie2_sched_job_timedout(struct drm_sched_job *sched_job) > +{ > + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); > + struct amdxdna_hwctx *hwctx = job->hwctx; > + struct amdxdna_dev *xdna; > + > + xdna = hwctx->client->xdna; > + trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq); > + mutex_lock(&xdna->dev_lock); > + aie2_hwctx_stop(xdna, hwctx, sched_job); > + > + aie2_hwctx_restart(xdna, hwctx); > + mutex_unlock(&xdna->dev_lock); > + > + return DRM_GPU_SCHED_STAT_NOMINAL; > +} > + > +const struct drm_sched_backend_ops sched_ops = { > + .run_job = aie2_sched_job_run, > + .free_job = aie2_sched_job_free, > + .timedout_job = aie2_sched_job_timedout, > +}; > + > static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx) > { > struct amdxdna_dev *xdna = hwctx->client->xdna; > @@ -126,13 +466,66 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx) > XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); > } > > +static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx) > +{ > + struct amdxdna_dev *xdna = hwctx->client->xdna; > + struct drm_file *filp = hwctx->client->filp; > + struct drm_syncobj *syncobj; > + u32 hdl; > + int ret; > + > + hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE; > + > + ret = drm_syncobj_create(&syncobj, 0, NULL); > + if (ret) { > + XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret); > + return ret; > + } > + ret = drm_syncobj_get_handle(filp, syncobj, &hdl); > + if (ret) { > + drm_syncobj_put(syncobj); > + XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret); > + return ret; > + } > + hwctx->priv->syncobj = syncobj; > + hwctx->syncobj_hdl = hdl; > + > + return 0; > +} > + > +static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx) > +{ > + /* > + * The syncobj_hdl is owned by user space and will be cleaned up > + * separately. > + */ > + drm_syncobj_put(hwctx->priv->syncobj); > +} > + > +static void aie2_ctx_syncobj_add_fence(struct amdxdna_hwctx *hwctx, > + struct dma_fence *ofence, u64 seq) > +{ > + struct drm_syncobj *syncobj = hwctx->priv->syncobj; > + struct dma_fence_chain *chain; > + > + if (!syncobj) > + return; > + > + chain = dma_fence_chain_alloc(); > + if (!chain) > + return; You have very subtlety broken dma-fencing rules. This is going to create a lock chain of: mutex_lock(&hwctx->priv->io_lock) recliam(); mutex_unlock(&hwctx->priv->io_lock) But you published the dma-fence for the job which is in the path of reclaim. Lockdep should complain if you have all the correct annotations and in theory you can deadlock. So I think you prealloc the chain() before publishing the dma-fence and then I'd also prime 'hwctx->priv->io_lock' which a reclaim annotation. e.g. fs_reclaim_acquire(); might_lock(&hwctx->priv->io_lock); fs_reclaim_release(); > + > + drm_syncobj_add_point(syncobj, chain, ofence, seq); > +} > + > int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) > { > struct amdxdna_client *client = hwctx->client; > struct amdxdna_dev *xdna = client->xdna; > + struct drm_gpu_scheduler *sched; > struct amdxdna_hwctx_priv *priv; > struct amdxdna_gem_obj *heap; > - int ret; > + int i, ret; > > priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL); > if (!priv) > @@ -150,6 +543,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) > drm_gem_object_get(to_gobj(heap)); > mutex_unlock(&client->mm_lock); > priv->heap = heap; > + sema_init(&priv->job_sem, HWCTX_MAX_CMDS); > > ret = amdxdna_gem_pin(heap); > if (ret) { > @@ -157,10 +551,47 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) > goto put_heap; > } > > + for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { > + struct amdxdna_gem_obj *abo; > + struct amdxdna_drm_create_bo args = { > + .flags = 0, > + .type = AMDXDNA_BO_DEV, > + .vaddr = 0, > + .size = MAX_CHAIN_CMDBUF_SIZE, > + }; > + > + abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true); > + if (IS_ERR(abo)) { > + ret = PTR_ERR(abo); > + goto free_cmd_bufs; > + } > + > + XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx", > + i, abo->mem.dev_addr, abo->mem.size); > + priv->cmd_buf[i] = abo; > + } > + > + sched = &priv->sched; > + mutex_init(&priv->io_lock); > + ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, > + HWCTX_MAX_CMDS, 0, msecs_to_jiffies(HWCTX_MAX_TIMEOUT), > + NULL, NULL, hwctx->name, xdna->ddev.dev); > + if (ret) { > + XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret); > + goto free_cmd_bufs; > + } > + > + ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL, > + &sched, 1, NULL); > + if (ret) { > + XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret); > + goto free_sched; > + } > + > ret = aie2_hwctx_col_list(hwctx); > if (ret) { > XDNA_ERR(xdna, "Create col list failed, ret %d", ret); > - goto unpin; > + goto free_entity; > } > > ret = aie2_alloc_resource(hwctx); > @@ -175,6 +606,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) > XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret); > goto release_resource; > } > + > + ret = aie2_ctx_syncobj_create(hwctx); > + if (ret) { > + XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret); > + goto release_resource; > + } > + > hwctx->status = HWCTX_STAT_INIT; > > XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); > @@ -185,7 +623,16 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) > aie2_release_resource(hwctx); > free_col_list: > kfree(hwctx->col_list); > -unpin: > +free_entity: > + drm_sched_entity_destroy(&priv->entity); > +free_sched: > + drm_sched_fini(&priv->sched); > +free_cmd_bufs: > + for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { > + if (!priv->cmd_buf[i]) > + continue; > + drm_gem_object_put(to_gobj(priv->cmd_buf[i])); > + } > amdxdna_gem_unpin(heap); > put_heap: > drm_gem_object_put(to_gobj(heap)); > @@ -196,11 +643,35 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) > > void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) > { > + struct amdxdna_dev *xdna; > + int idx; > + > + xdna = hwctx->client->xdna; > + drm_sched_wqueue_stop(&hwctx->priv->sched); > + > + /* Now, scheduler will not send command to device. */ > aie2_release_resource(hwctx); > > + /* > + * All submitted commands are aborted. > + * Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run() > + * will return NODEV if it is called. > + */ > + drm_sched_wqueue_start(&hwctx->priv->sched); > + > + aie2_hwctx_wait_for_idle(hwctx); > + drm_sched_entity_destroy(&hwctx->priv->entity); > + drm_sched_fini(&hwctx->priv->sched); > + aie2_ctx_syncobj_destroy(hwctx); > + > + XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); > + > + for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++) > + drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx])); > amdxdna_gem_unpin(hwctx->priv->heap); > drm_gem_object_put(to_gobj(hwctx->priv->heap)); > > + mutex_destroy(&hwctx->priv->io_lock); > kfree(hwctx->col_list); > kfree(hwctx->priv); > kfree(hwctx->cus); > @@ -267,3 +738,160 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu > return -EOPNOTSUPP; > } > } > + > +static int aie2_populate_range(struct amdxdna_gem_obj *abo) > +{ > + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); > + struct mm_struct *mm = abo->mem.notifier.mm; > + struct hmm_range range = { 0 }; > + unsigned long timeout; > + int ret; > + > + XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx", > + abo->mem.userptr, abo->mem.size); > + range.notifier = &abo->mem.notifier; > + range.start = abo->mem.userptr; > + range.end = abo->mem.userptr + abo->mem.size; > + range.hmm_pfns = abo->mem.pfns; > + range.default_flags = HMM_PFN_REQ_FAULT; > + > + if (!mmget_not_zero(mm)) > + return -EFAULT; > + > + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); > +again: > + range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier); > + mmap_read_lock(mm); > + ret = hmm_range_fault(&range); > + mmap_read_unlock(mm); > + if (ret) { > + if (time_after(jiffies, timeout)) { > + ret = -ETIME; > + goto put_mm; > + } > + > + if (ret == -EBUSY) > + goto again; > + > + goto put_mm; > + } > + > + read_lock(&xdna->notifier_lock); > + if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) { > + read_unlock(&xdna->notifier_lock); > + goto again; > + } > + abo->mem.map_invalid = false; > + read_unlock(&xdna->notifier_lock); > + > +put_mm: > + mmput(mm); > + return ret; > +} > + > +static void aie2_hwctx_push_job(struct amdxdna_sched_job *job, u64 *seq) > +{ > + struct amdxdna_hwctx *hwctx = job->hwctx; > + > + mutex_lock(&hwctx->priv->io_lock); > + drm_sched_job_arm(&job->base); > + job->seq = hwctx->priv->seq++; > + *seq = job->seq; > + > + job->out_fence = dma_fence_get(&job->base.s_fence->finished); > + drm_sched_entity_push_job(&job->base); > + aie2_ctx_syncobj_add_fence(hwctx, job->out_fence, *seq); > + mutex_unlock(&hwctx->priv->io_lock); > +} > + > +int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq) > +{ > + struct amdxdna_dev *xdna = hwctx->client->xdna; > + struct ww_acquire_ctx acquire_ctx; > + struct amdxdna_gem_obj *abo; > + unsigned long timeout = 0; > + int ret, i; > + > + ret = down_interruptible(&hwctx->priv->job_sem); > + if (ret) { > + XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret); > + return ret; > + } > + > + ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx); > + if (ret) { > + XDNA_ERR(xdna, "DRM job init failed, ret %d", ret); > + goto up_sem; > + } > + > +retry: > + ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx); > + if (ret) { > + XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret); > + goto cleanup_job; > + } > + > + for (i = 0; i < job->bo_cnt; i++) { > + ret = dma_resv_reserve_fences(job->bos[i]->resv, 1); > + if (ret) { > + XDNA_WARN(xdna, "Failed to reserve fences %d", ret); > + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); > + goto cleanup_job; > + } > + } > + > + read_lock(&xdna->notifier_lock); > + for (i = 0; i < job->bo_cnt; i++) { > + abo = to_xdna_obj(job->bos[i]); > + if (abo->mem.map_invalid) { > + read_unlock(&xdna->notifier_lock); > + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); > + if (!timeout) { > + timeout = jiffies + > + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); > + } else if (time_after(jiffies, timeout)) { > + ret = -ETIME; > + goto cleanup_job; > + } > + > + ret = aie2_populate_range(abo); > + if (ret) > + goto cleanup_job; > + goto retry; > + } > + } > + Any reason you can't arm the job here and install job's finished fence in dma-resv slot? IIRC at one point in time in Xe we used the 'job->fence', this is a hardware fence in Xe, assuming this the same here and it ended up causing some issues - exactly what I cannot recall. The scheduler again is really designed to install the job's finished fence in dma-resv / syncobjs rather than driver internal fences. That of course would mean taking hwctx->priv->io_lock underneath all other locks in this loop at the final step. Matt > + for (i = 0; i < job->bo_cnt; i++) > + dma_resv_add_fence(job->bos[i]->resv, job->fence, DMA_RESV_USAGE_WRITE); > + > + read_unlock(&xdna->notifier_lock); > + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); > + > + aie2_hwctx_push_job(job, seq); > + > + return 0; > + > +cleanup_job: > + drm_sched_job_cleanup(&job->base); > +up_sem: > + up(&hwctx->priv->job_sem); > + job->job_done = true; > + return ret; > +} > + > +void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, > + unsigned long cur_seq) > +{ > + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); > + struct drm_gem_object *gobj = to_gobj(abo); > + long ret; > + > + write_lock(&xdna->notifier_lock); > + abo->mem.map_invalid = true; > + mmu_interval_set_seq(&abo->mem.notifier, cur_seq); > + write_unlock(&xdna->notifier_lock); > + ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, > + true, MAX_SCHEDULE_TIMEOUT); > + if (!ret || ret == -ERESTARTSYS) > + XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret); > +} > diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c > index 40d9e4261e8b..db62954eb378 100644 > --- a/drivers/accel/amdxdna/aie2_message.c > +++ b/drivers/accel/amdxdna/aie2_message.c > @@ -4,10 +4,12 @@ > */ > > #include <drm/amdxdna_accel.h> > +#include <drm/drm_cache.h> > #include <drm/drm_device.h> > #include <drm/drm_gem.h> > #include <drm/drm_gem_shmem_helper.h> > #include <drm/drm_print.h> > +#include <drm/gpu_scheduler.h> > #include <linux/bitfield.h> > #include <linux/errno.h> > #include <linux/pci.h> > @@ -362,3 +364,344 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx) > msg.opcode, resp.status, ret); > return ret; > } > + > +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, > + int (*notify_cb)(void *, const u32 *, size_t)) > +{ > + struct mailbox_channel *chann = hwctx->priv->mbox_chann; > + struct amdxdna_dev *xdna = hwctx->client->xdna; > + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; > + union { > + struct execute_buffer_req ebuf; > + struct exec_dpu_req dpu; > + } req; > + struct xdna_mailbox_msg msg; > + u32 payload_len; > + void *payload; > + int cu_idx; > + int ret; > + u32 op; > + > + if (!chann) > + return -ENODEV; > + > + payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); > + if (!payload) { > + XDNA_ERR(xdna, "Invalid command, cannot get payload"); > + return -EINVAL; > + } > + > + cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo); > + if (cu_idx < 0) { > + XDNA_DBG(xdna, "Invalid cu idx"); > + return -EINVAL; > + } > + > + op = amdxdna_cmd_get_op(cmd_abo); > + switch (op) { > + case ERT_START_CU: > + if (unlikely(payload_len > sizeof(req.ebuf.payload))) > + XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len); > + req.ebuf.cu_idx = cu_idx; > + memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload)); > + msg.send_size = sizeof(req.ebuf); > + msg.opcode = MSG_OP_EXECUTE_BUFFER_CF; > + break; > + case ERT_START_NPU: { > + struct amdxdna_cmd_start_npu *sn = payload; > + > + if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload))) > + XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len); > + req.dpu.inst_buf_addr = sn->buffer; > + req.dpu.inst_size = sn->buffer_size; > + req.dpu.inst_prop_cnt = sn->prop_count; > + req.dpu.cu_idx = cu_idx; > + memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload)); > + msg.send_size = sizeof(req.dpu); > + msg.opcode = MSG_OP_EXEC_DPU; > + break; > + } > + default: > + XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op); > + return -EINVAL; > + } > + msg.handle = job; > + msg.notify_cb = notify_cb; > + msg.send_data = (u8 *)&req; > + print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req, > + 0x40, false); > + > + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); > + if (ret) { > + XDNA_ERR(xdna, "Send message failed"); > + return ret; > + } > + > + return 0; > +} > + > +static int > +aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset, > + struct amdxdna_gem_obj *abo, u32 *size) > +{ > + struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset; > + int cu_idx = amdxdna_cmd_get_cu_idx(abo); > + u32 payload_len; > + void *payload; > + > + if (cu_idx < 0) > + return -EINVAL; > + > + payload = amdxdna_cmd_get_payload(abo, &payload_len); > + if (!payload) > + return -EINVAL; > + > + if (!slot_cf_has_space(offset, payload_len)) > + return -ENOSPC; > + > + buf->cu_idx = cu_idx; > + buf->arg_cnt = payload_len / sizeof(u32); > + memcpy(buf->args, payload, payload_len); > + /* Accurate buf size to hint firmware to do necessary copy */ > + *size = sizeof(*buf) + payload_len; > + return 0; > +} > + > +static int > +aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, > + struct amdxdna_gem_obj *abo, u32 *size) > +{ > + struct cmd_chain_slot_dpu *buf = cmd_buf + offset; > + int cu_idx = amdxdna_cmd_get_cu_idx(abo); > + struct amdxdna_cmd_start_npu *sn; > + u32 payload_len; > + void *payload; > + u32 arg_sz; > + > + if (cu_idx < 0) > + return -EINVAL; > + > + payload = amdxdna_cmd_get_payload(abo, &payload_len); > + if (!payload) > + return -EINVAL; > + sn = payload; > + arg_sz = payload_len - sizeof(*sn); > + if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) > + return -EINVAL; > + > + if (!slot_dpu_has_space(offset, arg_sz)) > + return -ENOSPC; > + > + buf->inst_buf_addr = sn->buffer; > + buf->inst_size = sn->buffer_size; > + buf->inst_prop_cnt = sn->prop_count; > + buf->cu_idx = cu_idx; > + buf->arg_cnt = arg_sz / sizeof(u32); > + memcpy(buf->args, sn->prop_args, arg_sz); > + > + /* Accurate buf size to hint firmware to do necessary copy */ > + *size += sizeof(*buf) + arg_sz; > + return 0; > +} > + > +static int > +aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset, > + struct amdxdna_gem_obj *abo, u32 *size) > +{ > + u32 this_op = amdxdna_cmd_get_op(abo); > + void *cmd_buf = cmdbuf_abo->mem.kva; > + int ret; > + > + if (this_op != op) { > + ret = -EINVAL; > + goto done; > + } > + > + switch (op) { > + case ERT_START_CU: > + ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size); > + break; > + case ERT_START_NPU: > + ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size); > + break; > + default: > + ret = -EOPNOTSUPP; > + } > + > +done: > + if (ret) { > + XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d", > + op, ret); > + } > + return ret; > +} > + > +static inline struct amdxdna_gem_obj * > +aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) > +{ > + int idx = get_job_idx(job->seq); > + > + return job->hwctx->priv->cmd_buf[idx]; > +} > + > +static void > +aie2_cmdlist_prepare_request(struct cmd_chain_req *req, > + struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt) > +{ > + req->buf_addr = cmdbuf_abo->mem.dev_addr; > + req->buf_size = size; > + req->count = cnt; > + drm_clflush_virt_range(cmdbuf_abo->mem.kva, size); > + XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d", > + req->buf_addr, size, cnt); > +} > + > +static inline u32 > +aie2_cmd_op_to_msg_op(u32 op) > +{ > + switch (op) { > + case ERT_START_CU: > + return MSG_OP_CHAIN_EXEC_BUFFER_CF; > + case ERT_START_NPU: > + return MSG_OP_CHAIN_EXEC_DPU; > + default: > + return MSG_OP_MAX_OPCODE; > + } > +} > + > +int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, > + struct amdxdna_sched_job *job, > + int (*notify_cb)(void *, const u32 *, size_t)) > +{ > + struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); > + struct mailbox_channel *chann = hwctx->priv->mbox_chann; > + struct amdxdna_client *client = hwctx->client; > + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; > + struct amdxdna_cmd_chain *payload; > + struct xdna_mailbox_msg msg; > + struct cmd_chain_req req; > + u32 payload_len; > + u32 offset = 0; > + u32 size; > + int ret; > + u32 op; > + u32 i; > + > + op = amdxdna_cmd_get_op(cmd_abo); > + payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); > + if (op != ERT_CMD_CHAIN || !payload || > + payload_len < struct_size(payload, data, payload->command_count)) > + return -EINVAL; > + > + for (i = 0; i < payload->command_count; i++) { > + u32 boh = (u32)(payload->data[i]); > + struct amdxdna_gem_obj *abo; > + > + abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD); > + if (!abo) { > + XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh); > + return -ENOENT; > + } > + > + /* All sub-cmd should have same op, use the first one. */ > + if (i == 0) > + op = amdxdna_cmd_get_op(abo); > + > + ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size); > + amdxdna_gem_put_obj(abo); > + if (ret) > + return -EINVAL; > + > + offset += size; > + } > + > + /* The offset is the accumulated total size of the cmd buffer */ > + aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count); > + > + msg.opcode = aie2_cmd_op_to_msg_op(op); > + if (msg.opcode == MSG_OP_MAX_OPCODE) > + return -EOPNOTSUPP; > + msg.handle = job; > + msg.notify_cb = notify_cb; > + msg.send_data = (u8 *)&req; > + msg.send_size = sizeof(req); > + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); > + if (ret) { > + XDNA_ERR(hwctx->client->xdna, "Send message failed"); > + return ret; > + } > + > + return 0; > +} > + > +int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, > + struct amdxdna_sched_job *job, > + int (*notify_cb)(void *, const u32 *, size_t)) > +{ > + struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); > + struct mailbox_channel *chann = hwctx->priv->mbox_chann; > + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; > + struct xdna_mailbox_msg msg; > + struct cmd_chain_req req; > + u32 size; > + int ret; > + u32 op; > + > + op = amdxdna_cmd_get_op(cmd_abo); > + ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size); > + if (ret) > + return ret; > + > + aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1); > + > + msg.opcode = aie2_cmd_op_to_msg_op(op); > + if (msg.opcode == MSG_OP_MAX_OPCODE) > + return -EOPNOTSUPP; > + msg.handle = job; > + msg.notify_cb = notify_cb; > + msg.send_data = (u8 *)&req; > + msg.send_size = sizeof(req); > + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); > + if (ret) { > + XDNA_ERR(hwctx->client->xdna, "Send message failed"); > + return ret; > + } > + > + return 0; > +} > + > +int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, > + int (*notify_cb)(void *, const u32 *, size_t)) > +{ > + struct mailbox_channel *chann = hwctx->priv->mbox_chann; > + struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]); > + struct amdxdna_dev *xdna = hwctx->client->xdna; > + struct xdna_mailbox_msg msg; > + struct sync_bo_req req; > + int ret = 0; > + > + req.src_addr = 0; > + req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr; > + req.size = abo->mem.size; > + > + /* Device to Host */ > + req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) | > + FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM); > + > + XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed", > + req.size, req.src_addr, req.dst_addr); > + > + msg.handle = job; > + msg.notify_cb = notify_cb; > + msg.send_data = (u8 *)&req; > + msg.send_size = sizeof(req); > + msg.opcode = MSG_OP_SYNC_BO; > + > + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); > + if (ret) { > + XDNA_ERR(xdna, "Send message failed"); > + return ret; > + } > + > + return 0; > +} > diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c > index caeb07d1dc6b..fb369d615969 100644 > --- a/drivers/accel/amdxdna/aie2_pci.c > +++ b/drivers/accel/amdxdna/aie2_pci.c > @@ -5,8 +5,10 @@ > > #include <drm/amdxdna_accel.h> > #include <drm/drm_device.h> > +#include <drm/drm_gem_shmem_helper.h> > #include <drm/drm_managed.h> > #include <drm/drm_print.h> > +#include <drm/gpu_scheduler.h> > #include <linux/errno.h> > #include <linux/firmware.h> > #include <linux/iommu.h> > @@ -17,6 +19,7 @@ > #include "aie2_pci.h" > #include "aie2_solver.h" > #include "amdxdna_ctx.h" > +#include "amdxdna_gem.h" > #include "amdxdna_mailbox.h" > #include "amdxdna_pci_drv.h" > > @@ -495,4 +498,6 @@ const struct amdxdna_dev_ops aie2_ops = { > .hwctx_init = aie2_hwctx_init, > .hwctx_fini = aie2_hwctx_fini, > .hwctx_config = aie2_hwctx_config, > + .cmd_submit = aie2_cmd_submit, > + .hmm_invalidate = aie2_hmm_invalidate, > }; > diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h > index 3ac936e2c9d1..bc6910875d9d 100644 > --- a/drivers/accel/amdxdna/aie2_pci.h > +++ b/drivers/accel/amdxdna/aie2_pci.h > @@ -6,6 +6,8 @@ > #ifndef _AIE2_PCI_H_ > #define _AIE2_PCI_H_ > > +#include <linux/semaphore.h> > + > #include "amdxdna_mailbox.h" > > #define AIE2_INTERVAL 20000 /* us */ > @@ -76,8 +78,10 @@ enum psp_reg_idx { > PSP_MAX_REGS /* Keep this at the end */ > }; > > +struct amdxdna_client; > struct amdxdna_fw_ver; > struct amdxdna_hwctx; > +struct amdxdna_sched_job; > > struct psp_config { > const void *fw_buf; > @@ -118,9 +122,31 @@ struct rt_config { > u32 value; > }; > > +/* > + * Define the maximum number of pending commands in a hardware context. > + * Must be power of 2! > + */ > +#define HWCTX_MAX_CMDS 4 > +#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1)) > struct amdxdna_hwctx_priv { > struct amdxdna_gem_obj *heap; > void *mbox_chann; > + > + struct drm_gpu_scheduler sched; > + struct drm_sched_entity entity; > + > + struct mutex io_lock; /* protect seq and cmd order */ > + struct wait_queue_head job_free_wq; > + u32 num_pending; > + u64 seq; > + struct semaphore job_sem; > + bool job_done; > + > + /* Completed job counter */ > + u64 completed; > + > + struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS]; > + struct drm_syncobj *syncobj; > }; > > struct amdxdna_dev_hdl { > @@ -199,10 +225,24 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct > int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); > int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); > int aie2_config_cu(struct amdxdna_hwctx *hwctx); > +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, > + int (*notify_cb)(void *, const u32 *, size_t)); > +int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, > + struct amdxdna_sched_job *job, > + int (*notify_cb)(void *, const u32 *, size_t)); > +int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, > + struct amdxdna_sched_job *job, > + int (*notify_cb)(void *, const u32 *, size_t)); > +int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, > + int (*notify_cb)(void *, const u32 *, size_t)); > > /* aie2_hwctx.c */ > int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); > void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx); > int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); > +int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); > +void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq); > +void aie2_stop_ctx_by_col_map(struct amdxdna_client *client, u32 col_map); > +void aie2_restart_ctx(struct amdxdna_client *client); > > #endif /* _AIE2_PCI_H_ */ > diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c > index b03501e81065..dc3a072ce3b6 100644 > --- a/drivers/accel/amdxdna/aie2_psp.c > +++ b/drivers/accel/amdxdna/aie2_psp.c > @@ -4,8 +4,10 @@ > */ > > #include <drm/drm_device.h> > +#include <drm/drm_gem_shmem_helper.h> > #include <drm/drm_managed.h> > #include <drm/drm_print.h> > +#include <drm/gpu_scheduler.h> > #include <linux/bitfield.h> > #include <linux/iopoll.h> > > diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c > index 3fa7064649aa..91893d438da7 100644 > --- a/drivers/accel/amdxdna/aie2_smu.c > +++ b/drivers/accel/amdxdna/aie2_smu.c > @@ -4,7 +4,9 @@ > */ > > #include <drm/drm_device.h> > +#include <drm/drm_gem_shmem_helper.h> > #include <drm/drm_print.h> > +#include <drm/gpu_scheduler.h> > #include <linux/iopoll.h> > > #include "aie2_pci.h" > diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c > index 9489399adea1..13cfbab9caa0 100644 > --- a/drivers/accel/amdxdna/amdxdna_ctx.c > +++ b/drivers/accel/amdxdna/amdxdna_ctx.c > @@ -7,17 +7,65 @@ > #include <drm/drm_device.h> > #include <drm/drm_drv.h> > #include <drm/drm_file.h> > +#include <drm/drm_gem.h> > +#include <drm/drm_gem_shmem_helper.h> > #include <drm/drm_print.h> > +#include <drm/gpu_scheduler.h> > +#include <trace/events/amdxdna.h> > > #include "amdxdna_ctx.h" > +#include "amdxdna_gem.h" > #include "amdxdna_pci_drv.h" > > #define MAX_HWCTX_ID 255 > +#define MAX_ARG_COUNT 4095 > > -static void amdxdna_hwctx_destroy(struct amdxdna_hwctx *hwctx) > +struct amdxdna_fence { > + struct dma_fence base; > + spinlock_t lock; /* for base */ > + struct amdxdna_hwctx *hwctx; > +}; > + > +static const char *amdxdna_fence_get_driver_name(struct dma_fence *fence) > +{ > + return KBUILD_MODNAME; > +} > + > +static const char *amdxdna_fence_get_timeline_name(struct dma_fence *fence) > +{ > + struct amdxdna_fence *xdna_fence; > + > + xdna_fence = container_of(fence, struct amdxdna_fence, base); > + > + return xdna_fence->hwctx->name; > +} > + > +static const struct dma_fence_ops fence_ops = { > + .get_driver_name = amdxdna_fence_get_driver_name, > + .get_timeline_name = amdxdna_fence_get_timeline_name, > +}; > + > +static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx) > +{ > + struct amdxdna_fence *fence; > + > + fence = kzalloc(sizeof(*fence), GFP_KERNEL); > + if (!fence) > + return NULL; > + > + fence->hwctx = hwctx; > + spin_lock_init(&fence->lock); > + dma_fence_init(&fence->base, &fence_ops, &fence->lock, hwctx->id, 0); > + return &fence->base; > +} > + > +static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx, > + struct srcu_struct *ss) > { > struct amdxdna_dev *xdna = hwctx->client->xdna; > > + synchronize_srcu(ss); > + > /* At this point, user is not able to submit new commands */ > mutex_lock(&xdna->dev_lock); > xdna->dev_info->ops->hwctx_fini(hwctx); > @@ -27,6 +75,46 @@ static void amdxdna_hwctx_destroy(struct amdxdna_hwctx *hwctx) > kfree(hwctx); > } > > +void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size) > +{ > + struct amdxdna_cmd *cmd = abo->mem.kva; > + u32 num_masks, count; > + > + if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) > + num_masks = 0; > + else > + num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); > + > + if (size) { > + count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header); > + if (unlikely(count <= num_masks)) { > + *size = 0; > + return NULL; > + } > + *size = (count - num_masks) * sizeof(u32); > + } > + return &cmd->data[num_masks]; > +} > + > +int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) > +{ > + struct amdxdna_cmd *cmd = abo->mem.kva; > + u32 num_masks, i; > + u32 *cu_mask; > + > + if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) > + return -1; > + > + num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); > + cu_mask = cmd->data; > + for (i = 0; i < num_masks; i++) { > + if (cu_mask[i]) > + return ffs(cu_mask[i]) - 1; > + } > + > + return -1; > +} > + > /* > * This should be called in close() and remove(). DO NOT call in other syscalls. > * This guarantee that when hwctx and resources will be released, if user > @@ -43,7 +131,7 @@ void amdxdna_hwctx_remove_all(struct amdxdna_client *client) > client->pid, hwctx->id); > idr_remove(&client->hwctx_idr, hwctx->id); > mutex_unlock(&client->hwctx_lock); > - amdxdna_hwctx_destroy(hwctx); > + amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); > mutex_lock(&client->hwctx_lock); > } > mutex_unlock(&client->hwctx_lock); > @@ -135,6 +223,12 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d > if (!drm_dev_enter(dev, &idx)) > return -ENODEV; > > + /* > + * Use hwctx_lock to achieve exclusion with other hwctx writers, > + * SRCU to synchronize with exec/wait command ioctls. > + * > + * The pushed jobs are handled by DRM scheduler during destroy. > + */ > mutex_lock(&client->hwctx_lock); > hwctx = idr_find(&client->hwctx_idr, args->handle); > if (!hwctx) { > @@ -147,7 +241,7 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d > idr_remove(&client->hwctx_idr, hwctx->id); > mutex_unlock(&client->hwctx_lock); > > - amdxdna_hwctx_destroy(hwctx); > + amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); > > XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle); > out: > @@ -161,10 +255,10 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr > struct amdxdna_drm_config_hwctx *args = data; > struct amdxdna_dev *xdna = to_xdna_dev(dev); > struct amdxdna_hwctx *hwctx; > + int ret, idx; > u32 buf_size; > void *buf; > u64 val; > - int ret; > > if (!xdna->dev_info->ops->hwctx_config) > return -EOPNOTSUPP; > @@ -203,17 +297,231 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr > } > > mutex_lock(&xdna->dev_lock); > + idx = srcu_read_lock(&client->hwctx_srcu); > hwctx = idr_find(&client->hwctx_idr, args->handle); > if (!hwctx) { > XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle); > ret = -EINVAL; > - goto unlock; > + goto unlock_srcu; > } > > ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size); > > -unlock: > +unlock_srcu: > + srcu_read_unlock(&client->hwctx_srcu, idx); > mutex_unlock(&xdna->dev_lock); > kfree(buf); > return ret; > } > + > +static void > +amdxdna_arg_bos_put(struct amdxdna_sched_job *job) > +{ > + int i; > + > + for (i = 0; i < job->bo_cnt; i++) { > + if (!job->bos[i]) > + break; > + drm_gem_object_put(job->bos[i]); > + } > +} > + > +static int > +amdxdna_arg_bos_lookup(struct amdxdna_client *client, > + struct amdxdna_sched_job *job, > + u32 *bo_hdls, u32 bo_cnt) > +{ > + struct drm_gem_object *gobj; > + int i, ret; > + > + job->bo_cnt = bo_cnt; > + for (i = 0; i < job->bo_cnt; i++) { > + struct amdxdna_gem_obj *abo; > + > + gobj = drm_gem_object_lookup(client->filp, bo_hdls[i]); > + if (!gobj) { > + ret = -ENOENT; > + goto put_shmem_bo; > + } > + abo = to_xdna_obj(gobj); > + > + mutex_lock(&abo->lock); > + if (abo->pinned) { > + mutex_unlock(&abo->lock); > + job->bos[i] = gobj; > + continue; > + } > + > + ret = amdxdna_gem_pin_nolock(abo); > + if (ret) { > + mutex_unlock(&abo->lock); > + drm_gem_object_put(gobj); > + goto put_shmem_bo; > + } > + abo->pinned = true; > + mutex_unlock(&abo->lock); > + > + job->bos[i] = gobj; > + } > + > + return 0; > + > +put_shmem_bo: > + amdxdna_arg_bos_put(job); > + return ret; > +} > + > +void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job) > +{ > + trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release"); > + amdxdna_arg_bos_put(job); > + amdxdna_gem_put_obj(job->cmd_bo); > +} > + > +int amdxdna_cmd_submit(struct amdxdna_client *client, > + u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt, > + u32 hwctx_hdl, u64 *seq) > +{ > + struct amdxdna_dev *xdna = client->xdna; > + struct amdxdna_sched_job *job; > + struct amdxdna_hwctx *hwctx; > + int ret, idx; > + > + XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt); > + job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL); > + if (!job) > + return -ENOMEM; > + > + if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) { > + job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD); > + if (!job->cmd_bo) { > + XDNA_ERR(xdna, "Failed to get cmd bo from %d", cmd_bo_hdl); > + ret = -EINVAL; > + goto free_job; > + } > + } else { > + job->cmd_bo = NULL; > + } > + > + ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt); > + if (ret) { > + XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret); > + goto cmd_put; > + } > + > + idx = srcu_read_lock(&client->hwctx_srcu); > + hwctx = idr_find(&client->hwctx_idr, hwctx_hdl); > + if (!hwctx) { > + XDNA_DBG(xdna, "PID %d failed to get hwctx %d", > + client->pid, hwctx_hdl); > + ret = -EINVAL; > + goto unlock_srcu; > + } > + > + if (hwctx->status != HWCTX_STAT_READY) { > + XDNA_ERR(xdna, "HW Context is not ready"); > + ret = -EINVAL; > + goto unlock_srcu; > + } > + > + job->hwctx = hwctx; > + job->mm = current->mm; > + > + job->fence = amdxdna_fence_create(hwctx); > + if (!job->fence) { > + XDNA_ERR(xdna, "Failed to create fence"); > + ret = -ENOMEM; > + goto unlock_srcu; > + } > + kref_init(&job->refcnt); > + > + ret = xdna->dev_info->ops->cmd_submit(hwctx, job, seq); > + if (ret) > + goto put_fence; > + > + /* > + * The amdxdna_hwctx_destroy_rcu() will release hwctx and associated > + * resource after synchronize_srcu(). The submitted jobs should be > + * handled by the queue, for example DRM scheduler, in device layer. > + * For here we can unlock SRCU. > + */ > + srcu_read_unlock(&client->hwctx_srcu, idx); > + trace_amdxdna_debug_point(hwctx->name, *seq, "job pushed"); > + > + return 0; > + > +put_fence: > + dma_fence_put(job->fence); > +unlock_srcu: > + srcu_read_unlock(&client->hwctx_srcu, idx); > + amdxdna_arg_bos_put(job); > +cmd_put: > + amdxdna_gem_put_obj(job->cmd_bo); > +free_job: > + kfree(job); > + return ret; > +} > + > +/* > + * The submit command ioctl submits a command to firmware. One firmware command > + * may contain multiple command BOs for processing as a whole. > + * The command sequence number is returned which can be used for wait command ioctl. > + */ > +static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client, > + struct amdxdna_drm_exec_cmd *args) > +{ > + struct amdxdna_dev *xdna = client->xdna; > + u32 *arg_bo_hdls; > + u32 cmd_bo_hdl; > + int ret; > + > + if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) { > + XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count); > + return -EINVAL; > + } > + > + /* Only support single command for now. */ > + if (args->cmd_count != 1) { > + XDNA_ERR(xdna, "Invalid cmd bo count %d", args->cmd_count); > + return -EINVAL; > + } > + > + cmd_bo_hdl = (u32)args->cmd_handles; > + arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL); > + if (!arg_bo_hdls) > + return -ENOMEM; > + ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args), > + args->arg_count * sizeof(u32)); > + if (ret) { > + ret = -EFAULT; > + goto free_cmd_bo_hdls; > + } > + > + ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls, > + args->arg_count, args->hwctx, &args->seq); > + if (ret) > + XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret); > + > +free_cmd_bo_hdls: > + kfree(arg_bo_hdls); > + if (!ret) > + XDNA_DBG(xdna, "Pushed cmd %lld to scheduler", args->seq); > + return ret; > +} > + > +int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > +{ > + struct amdxdna_client *client = filp->driver_priv; > + struct amdxdna_drm_exec_cmd *args = data; > + > + if (args->ext || args->ext_flags) > + return -EINVAL; > + > + switch (args->type) { > + case AMDXDNA_CMD_SUBMIT_EXEC_BUF: > + return amdxdna_drm_submit_execbuf(client, args); > + } > + > + XDNA_ERR(client->xdna, "Invalid command type %d", args->type); > + return -EINVAL; > +} > diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h > index b409d0731ab8..18ed9bdbd6d8 100644 > --- a/drivers/accel/amdxdna/amdxdna_ctx.h > +++ b/drivers/accel/amdxdna/amdxdna_ctx.h > @@ -6,6 +6,54 @@ > #ifndef _AMDXDNA_CTX_H_ > #define _AMDXDNA_CTX_H_ > > +#include <linux/bitfield.h> > + > +#include "amdxdna_gem.h" > + > +struct amdxdna_hwctx_priv; > + > +enum ert_cmd_opcode { > + ERT_START_CU = 0, > + ERT_CMD_CHAIN = 19, > + ERT_START_NPU = 20, > +}; > + > +enum ert_cmd_state { > + ERT_CMD_STATE_INVALID, > + ERT_CMD_STATE_NEW, > + ERT_CMD_STATE_QUEUED, > + ERT_CMD_STATE_RUNNING, > + ERT_CMD_STATE_COMPLETED, > + ERT_CMD_STATE_ERROR, > + ERT_CMD_STATE_ABORT, > + ERT_CMD_STATE_SUBMITTED, > + ERT_CMD_STATE_TIMEOUT, > + ERT_CMD_STATE_NORESPONSE, > +}; > + > +/* > + * Interpretation of the beginning of data payload for ERT_START_NPU in > + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args. > + */ > +struct amdxdna_cmd_start_npu { > + u64 buffer; /* instruction buffer address */ > + u32 buffer_size; /* size of buffer in bytes */ > + u32 prop_count; /* properties count */ > + u32 prop_args[]; /* properties and regular kernel arguments */ > +}; > + > +/* > + * Interpretation of the beginning of data payload for ERT_CMD_CHAIN in > + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles. > + */ > +struct amdxdna_cmd_chain { > + u32 command_count; > + u32 submit_index; > + u32 error_index; > + u32 reserved[3]; > + u64 data[] __counted_by(command_count); > +}; > + > /* Exec buffer command header format */ > #define AMDXDNA_CMD_STATE GENMASK(3, 0) > #define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10) > @@ -41,9 +89,72 @@ struct amdxdna_hwctx { > u32 syncobj_hdl; > }; > > +#define drm_job_to_xdna_job(j) \ > + container_of(j, struct amdxdna_sched_job, base) > + > +struct amdxdna_sched_job { > + struct drm_sched_job base; > + struct kref refcnt; > + struct amdxdna_hwctx *hwctx; > + struct mm_struct *mm; > + /* The fence to notice DRM scheduler that job is done by hardware */ > + struct dma_fence *fence; > + /* user can wait on this fence */ > + struct dma_fence *out_fence; > + bool job_done; > + u64 seq; > + struct amdxdna_gem_obj *cmd_bo; > + size_t bo_cnt; > + struct drm_gem_object *bos[] __counted_by(bo_cnt); > +}; > + > +static inline u32 > +amdxdna_cmd_get_op(struct amdxdna_gem_obj *abo) > +{ > + struct amdxdna_cmd *cmd = abo->mem.kva; > + > + return FIELD_GET(AMDXDNA_CMD_OPCODE, cmd->header); > +} > + > +static inline void > +amdxdna_cmd_set_state(struct amdxdna_gem_obj *abo, enum ert_cmd_state s) > +{ > + struct amdxdna_cmd *cmd = abo->mem.kva; > + > + cmd->header &= ~AMDXDNA_CMD_STATE; > + cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, s); > +} > + > +static inline enum ert_cmd_state > +amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo) > +{ > + struct amdxdna_cmd *cmd = abo->mem.kva; > + > + return FIELD_GET(AMDXDNA_CMD_STATE, cmd->header); > +} > + > +void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size); > +int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); > + > +static inline u32 amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx) > +{ > + return GENMASK(hwctx->start_col + hwctx->num_col - 1, > + hwctx->start_col); > +} > + > +void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job); > void amdxdna_hwctx_remove_all(struct amdxdna_client *client); > + > +int amdxdna_cmd_submit(struct amdxdna_client *client, > + u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt, > + u32 hwctx_hdl, u64 *seq); > + > +int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl, > + u64 seq, u32 timeout); > + > int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); > int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); > int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); > +int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); > > #endif /* _AMDXDNA_CTX_H_ */ > diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c > index f2ba86ae9e1a..4dfeca306d98 100644 > --- a/drivers/accel/amdxdna/amdxdna_gem.c > +++ b/drivers/accel/amdxdna/amdxdna_gem.c > @@ -8,6 +8,7 @@ > #include <drm/drm_device.h> > #include <drm/drm_gem.h> > #include <drm/drm_gem_shmem_helper.h> > +#include <drm/gpu_scheduler.h> > #include <linux/iosys-map.h> > #include <linux/vmalloc.h> > > diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c > index 42b615394605..5139a9c96a91 100644 > --- a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c > +++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c > @@ -3,10 +3,15 @@ > * Copyright (C) 2024, Advanced Micro Devices, Inc. > */ > > +#include <drm/amdxdna_accel.h> > #include <drm/drm_device.h> > #include <drm/drm_print.h> > +#include <drm/drm_gem.h> > +#include <drm/drm_gem_shmem_helper.h> > +#include <drm/gpu_scheduler.h> > #include <linux/completion.h> > > +#include "amdxdna_gem.h" > #include "amdxdna_mailbox.h" > #include "amdxdna_mailbox_helper.h" > #include "amdxdna_pci_drv.h" > diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c > index 172109cc9617..32a58bb6e6b1 100644 > --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c > +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c > @@ -10,6 +10,7 @@ > #include <drm/drm_gem_shmem_helper.h> > #include <drm/drm_ioctl.h> > #include <drm/drm_managed.h> > +#include <drm/gpu_scheduler.h> > #include <linux/iommu.h> > #include <linux/pci.h> > > @@ -64,6 +65,7 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp) > goto unbind_sva; > } > mutex_init(&client->hwctx_lock); > + init_srcu_struct(&client->hwctx_srcu); > idr_init_base(&client->hwctx_idr, AMDXDNA_INVALID_CTX_HANDLE + 1); > mutex_init(&client->mm_lock); > > @@ -93,6 +95,7 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp) > XDNA_DBG(xdna, "closing pid %d", client->pid); > > idr_destroy(&client->hwctx_idr); > + cleanup_srcu_struct(&client->hwctx_srcu); > mutex_destroy(&client->hwctx_lock); > mutex_destroy(&client->mm_lock); > if (client->dev_heap) > @@ -133,6 +136,8 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = { > DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_BO, amdxdna_drm_create_bo_ioctl, 0), > DRM_IOCTL_DEF_DRV(AMDXDNA_GET_BO_INFO, amdxdna_drm_get_bo_info_ioctl, 0), > DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0), > + /* Execution */ > + DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0), > }; > > static const struct file_operations amdxdna_fops = { > @@ -190,9 +195,16 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id) > return -ENODEV; > > drmm_mutex_init(&xdna->ddev, &xdna->dev_lock); > + rwlock_init(&xdna->notifier_lock); > INIT_LIST_HEAD(&xdna->client_list); > pci_set_drvdata(pdev, xdna); > > + if (IS_ENABLED(CONFIG_LOCKDEP)) { > + fs_reclaim_acquire(GFP_KERNEL); > + might_lock(&xdna->notifier_lock); > + fs_reclaim_release(GFP_KERNEL); > + } > + > mutex_lock(&xdna->dev_lock); > ret = xdna->dev_info->ops->init(xdna); > mutex_unlock(&xdna->dev_lock); > diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h > index 3dddde4ac12a..ec22a074aac6 100644 > --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h > +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h > @@ -20,6 +20,7 @@ extern const struct drm_driver amdxdna_drm_drv; > struct amdxdna_dev; > struct amdxdna_gem_obj; > struct amdxdna_hwctx; > +struct amdxdna_sched_job; > > /* > * struct amdxdna_dev_ops - Device hardware operation callbacks > @@ -31,6 +32,7 @@ struct amdxdna_dev_ops { > void (*hwctx_fini)(struct amdxdna_hwctx *hwctx); > int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); > void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq); > + int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); > }; > > /* > @@ -69,6 +71,7 @@ struct amdxdna_dev { > struct mutex dev_lock; /* per device lock */ > struct list_head client_list; > struct amdxdna_fw_ver fw_ver; > + rwlock_t notifier_lock; /* for mmu notifier*/ > }; > > /* > @@ -88,6 +91,8 @@ struct amdxdna_client { > struct list_head node; > pid_t pid; > struct mutex hwctx_lock; /* protect hwctx */ > + /* do NOT wait this srcu when hwctx_lock is hold */ > + struct srcu_struct hwctx_srcu; > struct idr hwctx_idr; > struct amdxdna_dev *xdna; > struct drm_file *filp; > diff --git a/drivers/accel/amdxdna/amdxdna_sysfs.c b/drivers/accel/amdxdna/amdxdna_sysfs.c > index 668b94b92714..f27e4ee960a0 100644 > --- a/drivers/accel/amdxdna/amdxdna_sysfs.c > +++ b/drivers/accel/amdxdna/amdxdna_sysfs.c > @@ -3,9 +3,14 @@ > * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. > */ > > +#include <drm/amdxdna_accel.h> > #include <drm/drm_device.h> > +#include <drm/drm_gem_shmem_helper.h> > #include <drm/drm_print.h> > +#include <drm/gpu_scheduler.h> > +#include <linux/types.h> > > +#include "amdxdna_gem.h" > #include "amdxdna_pci_drv.h" > > static ssize_t vbnv_show(struct device *dev, struct device_attribute *attr, char *buf) > diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c > index 720aab0ed7c4..f00c50461b09 100644 > --- a/drivers/accel/amdxdna/npu1_regs.c > +++ b/drivers/accel/amdxdna/npu1_regs.c > @@ -5,6 +5,7 @@ > > #include <drm/amdxdna_accel.h> > #include <drm/drm_device.h> > +#include <drm/gpu_scheduler.h> > #include <linux/sizes.h> > > #include "aie2_pci.h" > diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c > index f3ea18bcf294..00cb381031d2 100644 > --- a/drivers/accel/amdxdna/npu2_regs.c > +++ b/drivers/accel/amdxdna/npu2_regs.c > @@ -5,6 +5,7 @@ > > #include <drm/amdxdna_accel.h> > #include <drm/drm_device.h> > +#include <drm/gpu_scheduler.h> > #include <linux/sizes.h> > > #include "aie2_pci.h" > diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c > index db61142f0d4e..b6dae9667cca 100644 > --- a/drivers/accel/amdxdna/npu4_regs.c > +++ b/drivers/accel/amdxdna/npu4_regs.c > @@ -5,6 +5,7 @@ > > #include <drm/amdxdna_accel.h> > #include <drm/drm_device.h> > +#include <drm/gpu_scheduler.h> > #include <linux/sizes.h> > > #include "aie2_pci.h" > diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c > index debf4e95b9bb..bed1baf8e160 100644 > --- a/drivers/accel/amdxdna/npu5_regs.c > +++ b/drivers/accel/amdxdna/npu5_regs.c > @@ -5,6 +5,7 @@ > > #include <drm/amdxdna_accel.h> > #include <drm/drm_device.h> > +#include <drm/gpu_scheduler.h> > #include <linux/sizes.h> > > #include "aie2_pci.h" > diff --git a/include/trace/events/amdxdna.h b/include/trace/events/amdxdna.h > index 33343d8f0622..c6cb2da7b706 100644 > --- a/include/trace/events/amdxdna.h > +++ b/include/trace/events/amdxdna.h > @@ -9,8 +9,49 @@ > #if !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ) > #define _TRACE_AMDXDNA_H > > +#include <drm/gpu_scheduler.h> > #include <linux/tracepoint.h> > > +TRACE_EVENT(amdxdna_debug_point, > + TP_PROTO(const char *name, u64 number, const char *str), > + > + TP_ARGS(name, number, str), > + > + TP_STRUCT__entry(__string(name, name) > + __field(u64, number) > + __string(str, str)), > + > + TP_fast_assign(__assign_str(name); > + __entry->number = number; > + __assign_str(str);), > + > + TP_printk("%s:%llu %s", __get_str(name), __entry->number, > + __get_str(str)) > +); > + > +TRACE_EVENT(xdna_job, > + TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq), > + > + TP_ARGS(sched_job, name, str, seq), > + > + TP_STRUCT__entry(__string(name, name) > + __string(str, str) > + __field(u64, fence_context) > + __field(u64, fence_seqno) > + __field(u64, seq)), > + > + TP_fast_assign(__assign_str(name); > + __assign_str(str); > + __entry->fence_context = sched_job->s_fence->finished.context; > + __entry->fence_seqno = sched_job->s_fence->finished.seqno; > + __entry->seq = seq;), > + > + TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s", > + __entry->fence_context, __entry->fence_seqno, > + __get_str(name), __entry->seq, > + __get_str(str)) > +); > + > DECLARE_EVENT_CLASS(xdna_mbox_msg, > TP_PROTO(char *name, u8 chann_id, u32 opcode, u32 msg_id), > > diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h > index e3e78b79a8e7..3e88ed386fac 100644 > --- a/include/uapi/drm/amdxdna_accel.h > +++ b/include/uapi/drm/amdxdna_accel.h > @@ -13,9 +13,11 @@ > extern "C" { > #endif > > +#define AMDXDNA_INVALID_CMD_HANDLE (~0UL) > #define AMDXDNA_INVALID_ADDR (~0UL) > #define AMDXDNA_INVALID_CTX_HANDLE 0 > #define AMDXDNA_INVALID_BO_HANDLE 0 > +#define AMDXDNA_INVALID_FENCE_HANDLE 0 > > enum amdxdna_device_type { > AMDXDNA_DEV_TYPE_UNKNOWN = -1, > @@ -29,6 +31,7 @@ enum amdxdna_drm_ioctl_id { > DRM_AMDXDNA_CREATE_BO, > DRM_AMDXDNA_GET_BO_INFO, > DRM_AMDXDNA_SYNC_BO, > + DRM_AMDXDNA_EXEC_CMD, > }; > > /** > @@ -201,6 +204,37 @@ struct amdxdna_drm_sync_bo { > __u64 size; > }; > > +enum amdxdna_cmd_type { > + AMDXDNA_CMD_SUBMIT_EXEC_BUF = 0, > + AMDXDNA_CMD_SUBMIT_DEPENDENCY, > + AMDXDNA_CMD_SUBMIT_SIGNAL, > +}; > + > +/** > + * struct amdxdna_drm_exec_cmd - Execute command. > + * @ext: MBZ. > + * @ext_flags: MBZ. > + * @hwctx: Hardware context handle. > + * @type: One of command type in enum amdxdna_cmd_type. > + * @cmd_handles: Array of command handles or the command handle itself > + * in case of just one. > + * @args: Array of arguments for all command handles. > + * @cmd_count: Number of command handles in the cmd_handles array. > + * @arg_count: Number of arguments in the args array. > + * @seq: Returned sequence number for this command. > + */ > +struct amdxdna_drm_exec_cmd { > + __u64 ext; > + __u64 ext_flags; > + __u32 hwctx; > + __u32 type; > + __u64 cmd_handles; > + __u64 args; > + __u32 cmd_count; > + __u32 arg_count; > + __u64 seq; > +}; > + > #define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \ > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \ > struct amdxdna_drm_create_hwctx) > @@ -225,6 +259,10 @@ struct amdxdna_drm_sync_bo { > DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SYNC_BO, \ > struct amdxdna_drm_sync_bo) > > +#define DRM_IOCTL_AMDXDNA_EXEC_CMD \ > + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_EXEC_CMD, \ > + struct amdxdna_drm_exec_cmd) > + > #if defined(__cplusplus) > } /* extern c end */ > #endif > -- > 2.34.1 >
On 11/8/24 09:21, Matthew Brost wrote: > On Thu, Nov 07, 2024 at 08:34:45PM -0800, Lizhi Hou wrote: >> Add interfaces for user application to submit command and wait for its >> completion. >> >> Co-developed-by: Min Ma <min.ma@amd.com> >> Signed-off-by: Min Ma <min.ma@amd.com> >> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com> >> --- >> drivers/accel/amdxdna/aie2_ctx.c | 634 +++++++++++++++++- >> drivers/accel/amdxdna/aie2_message.c | 343 ++++++++++ >> drivers/accel/amdxdna/aie2_pci.c | 5 + >> drivers/accel/amdxdna/aie2_pci.h | 40 ++ >> drivers/accel/amdxdna/aie2_psp.c | 2 + >> drivers/accel/amdxdna/aie2_smu.c | 2 + >> drivers/accel/amdxdna/amdxdna_ctx.c | 320 ++++++++- >> drivers/accel/amdxdna/amdxdna_ctx.h | 111 +++ >> drivers/accel/amdxdna/amdxdna_gem.c | 1 + >> .../accel/amdxdna/amdxdna_mailbox_helper.c | 5 + >> drivers/accel/amdxdna/amdxdna_pci_drv.c | 12 + >> drivers/accel/amdxdna/amdxdna_pci_drv.h | 5 + >> drivers/accel/amdxdna/amdxdna_sysfs.c | 5 + >> drivers/accel/amdxdna/npu1_regs.c | 1 + >> drivers/accel/amdxdna/npu2_regs.c | 1 + >> drivers/accel/amdxdna/npu4_regs.c | 1 + >> drivers/accel/amdxdna/npu5_regs.c | 1 + >> include/trace/events/amdxdna.h | 41 ++ >> include/uapi/drm/amdxdna_accel.h | 38 ++ >> 19 files changed, 1559 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c >> index ae8a91dad042..4641e52b59e2 100644 >> --- a/drivers/accel/amdxdna/aie2_ctx.c >> +++ b/drivers/accel/amdxdna/aie2_ctx.c >> @@ -8,8 +8,12 @@ >> #include <drm/drm_gem.h> >> #include <drm/drm_gem_shmem_helper.h> >> #include <drm/drm_print.h> >> +#include <drm/drm_syncobj.h> >> +#include <linux/hmm.h> >> #include <linux/types.h> >> +#include <trace/events/amdxdna.h> >> >> +#include "aie2_msg_priv.h" >> #include "aie2_pci.h" >> #include "aie2_solver.h" >> #include "amdxdna_ctx.h" >> @@ -17,6 +21,342 @@ >> #include "amdxdna_mailbox.h" >> #include "amdxdna_pci_drv.h" >> >> +bool force_cmdlist; >> +module_param(force_cmdlist, bool, 0600); >> +MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)"); >> + >> +#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */ >> + >> +static void aie2_job_release(struct kref *ref) >> +{ >> + struct amdxdna_sched_job *job; >> + >> + job = container_of(ref, struct amdxdna_sched_job, refcnt); >> + amdxdna_sched_job_cleanup(job); >> + kfree(job); >> +} >> + >> +static void aie2_job_put(struct amdxdna_sched_job *job) >> +{ >> + kref_put(&job->refcnt, aie2_job_release); >> +} >> + >> +/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */ >> +static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx, >> + struct drm_sched_job *bad_job) >> +{ >> + drm_sched_stop(&hwctx->priv->sched, bad_job); >> + aie2_destroy_context(xdna->dev_handle, hwctx); >> +} >> + >> +static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) >> +{ >> + struct amdxdna_gem_obj *heap = hwctx->priv->heap; >> + int ret; >> + >> + ret = aie2_create_context(xdna->dev_handle, hwctx); >> + if (ret) { >> + XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret); >> + goto out; >> + } >> + >> + ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, >> + heap->mem.userptr, heap->mem.size); >> + if (ret) { >> + XDNA_ERR(xdna, "Map host buf failed, ret %d", ret); >> + goto out; >> + } >> + >> + if (hwctx->status != HWCTX_STAT_READY) { >> + XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status); >> + goto out; >> + } >> + >> + ret = aie2_config_cu(hwctx); >> + if (ret) { >> + XDNA_ERR(xdna, "Config cu failed, ret %d", ret); >> + goto out; >> + } >> + >> +out: >> + drm_sched_start(&hwctx->priv->sched); >> + XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret); >> + return ret; >> +} >> + >> +void aie2_stop_ctx_by_col_map(struct amdxdna_client *client, u32 col_map) >> +{ >> + struct amdxdna_dev *xdna = client->xdna; >> + struct amdxdna_hwctx *hwctx; >> + int next = 0; >> + >> + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); >> + mutex_lock(&client->hwctx_lock); >> + idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) { >> + /* check if the HW context uses the error column */ >> + if (!(col_map & amdxdna_hwctx_col_map(hwctx))) >> + continue; >> + >> + aie2_hwctx_stop(xdna, hwctx, NULL); >> + hwctx->old_status = hwctx->status; >> + hwctx->status = HWCTX_STAT_STOP; >> + XDNA_DBG(xdna, "Stop %s", hwctx->name); >> + } >> + mutex_unlock(&client->hwctx_lock); >> +} >> + >> +void aie2_restart_ctx(struct amdxdna_client *client) >> +{ >> + struct amdxdna_dev *xdna = client->xdna; >> + struct amdxdna_hwctx *hwctx; >> + int next = 0; >> + >> + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); >> + mutex_lock(&client->hwctx_lock); >> + idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) { >> + if (hwctx->status != HWCTX_STAT_STOP) >> + continue; >> + >> + hwctx->status = hwctx->old_status; >> + XDNA_DBG(xdna, "Resetting %s", hwctx->name); >> + aie2_hwctx_restart(xdna, hwctx); >> + } >> + mutex_unlock(&client->hwctx_lock); >> +} >> + >> +static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq) >> +{ >> + struct dma_fence *fence, *out_fence = NULL; >> + int ret; >> + >> + fence = drm_syncobj_fence_get(hwctx->priv->syncobj); >> + if (!fence) >> + return NULL; >> + >> + ret = dma_fence_chain_find_seqno(&fence, seq); >> + if (ret) >> + goto out; >> + >> + out_fence = dma_fence_get(dma_fence_chain_contained(fence)); >> + >> +out: >> + dma_fence_put(fence); >> + return out_fence; >> +} >> + >> +static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx) >> +{ >> + struct dma_fence *fence; >> + >> + fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1); >> + if (!fence) >> + return; >> + >> + dma_fence_wait(fence, false); >> + dma_fence_put(fence); >> +} >> + >> +static void >> +aie2_sched_notify(struct amdxdna_sched_job *job) >> +{ >> + struct dma_fence *fence = job->fence; >> + >> + trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq); >> + job->hwctx->priv->completed++; >> + dma_fence_signal(fence); >> + >> + up(&job->hwctx->priv->job_sem); >> + job->job_done = true; >> + dma_fence_put(fence); >> + mmput(job->mm); >> + aie2_job_put(job); >> +} >> + >> +static int >> +aie2_sched_resp_handler(void *handle, const u32 *data, size_t size) >> +{ >> + struct amdxdna_sched_job *job = handle; >> + struct amdxdna_gem_obj *cmd_abo; >> + u32 ret = 0; >> + u32 status; >> + >> + cmd_abo = job->cmd_bo; >> + >> + if (unlikely(!data)) >> + goto out; >> + >> + if (unlikely(size != sizeof(u32))) { >> + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); >> + ret = -EINVAL; >> + goto out; >> + } >> + >> + status = *data; >> + XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); >> + if (status == AIE2_STATUS_SUCCESS) >> + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); >> + else >> + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR); >> + >> +out: >> + aie2_sched_notify(job); >> + return ret; >> +} >> + >> +static int >> +aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size) >> +{ >> + struct amdxdna_sched_job *job = handle; >> + u32 ret = 0; >> + u32 status; >> + >> + if (unlikely(!data)) >> + goto out; >> + >> + if (unlikely(size != sizeof(u32))) { >> + ret = -EINVAL; >> + goto out; >> + } >> + >> + status = *data; >> + XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status); >> + >> +out: >> + aie2_sched_notify(job); >> + return ret; >> +} >> + >> +static int >> +aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size) >> +{ >> + struct amdxdna_sched_job *job = handle; >> + struct amdxdna_gem_obj *cmd_abo; >> + struct cmd_chain_resp *resp; >> + struct amdxdna_dev *xdna; >> + u32 fail_cmd_status; >> + u32 fail_cmd_idx; >> + u32 ret = 0; >> + >> + cmd_abo = job->cmd_bo; >> + if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) { >> + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); >> + ret = -EINVAL; >> + goto out; >> + } >> + >> + resp = (struct cmd_chain_resp *)data; >> + xdna = job->hwctx->client->xdna; >> + XDNA_DBG(xdna, "Status 0x%x", resp->status); >> + if (resp->status == AIE2_STATUS_SUCCESS) { >> + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED); >> + goto out; >> + } >> + >> + /* Slow path to handle error, read from ringbuf on BAR */ >> + fail_cmd_idx = resp->fail_cmd_idx; >> + fail_cmd_status = resp->fail_cmd_status; >> + XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x", >> + fail_cmd_idx, fail_cmd_status); >> + >> + if (fail_cmd_status == AIE2_STATUS_SUCCESS) { >> + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT); >> + ret = -EINVAL; >> + goto out; >> + } >> + amdxdna_cmd_set_state(cmd_abo, fail_cmd_status); >> + >> + if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) { >> + struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL); >> + >> + cc->error_index = fail_cmd_idx; >> + if (cc->error_index >= cc->command_count) >> + cc->error_index = 0; >> + } >> +out: >> + aie2_sched_notify(job); >> + return ret; >> +} >> + >> +static struct dma_fence * >> +aie2_sched_job_run(struct drm_sched_job *sched_job) >> +{ >> + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); >> + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; >> + struct amdxdna_hwctx *hwctx = job->hwctx; >> + struct dma_fence *fence; >> + int ret; >> + >> + if (!mmget_not_zero(job->mm)) >> + return ERR_PTR(-ESRCH); >> + >> + kref_get(&job->refcnt); >> + fence = dma_fence_get(job->fence); >> + >> + if (unlikely(!cmd_abo)) { >> + ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler); >> + goto out; >> + } >> + >> + amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW); >> + >> + if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) >> + ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); >> + else if (force_cmdlist) >> + ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler); >> + else >> + ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler); >> + >> +out: >> + if (ret) { >> + dma_fence_put(job->fence); >> + aie2_job_put(job); >> + mmput(job->mm); >> + fence = ERR_PTR(ret); >> + } >> + trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq); >> + >> + return fence; >> +} >> + >> +static void aie2_sched_job_free(struct drm_sched_job *sched_job) >> +{ >> + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); >> + struct amdxdna_hwctx *hwctx = job->hwctx; >> + >> + trace_xdna_job(sched_job, hwctx->name, "job free", job->seq); >> + if (!job->job_done) >> + up(&hwctx->priv->job_sem); >> + >> + if (job->out_fence) >> + dma_fence_put(job->out_fence); >> + drm_sched_job_cleanup(sched_job); >> + aie2_job_put(job); >> +} >> + >> +static enum drm_gpu_sched_stat >> +aie2_sched_job_timedout(struct drm_sched_job *sched_job) >> +{ >> + struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job); >> + struct amdxdna_hwctx *hwctx = job->hwctx; >> + struct amdxdna_dev *xdna; >> + >> + xdna = hwctx->client->xdna; >> + trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq); >> + mutex_lock(&xdna->dev_lock); >> + aie2_hwctx_stop(xdna, hwctx, sched_job); >> + >> + aie2_hwctx_restart(xdna, hwctx); >> + mutex_unlock(&xdna->dev_lock); >> + >> + return DRM_GPU_SCHED_STAT_NOMINAL; >> +} >> + >> +const struct drm_sched_backend_ops sched_ops = { >> + .run_job = aie2_sched_job_run, >> + .free_job = aie2_sched_job_free, >> + .timedout_job = aie2_sched_job_timedout, >> +}; >> + >> static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx) >> { >> struct amdxdna_dev *xdna = hwctx->client->xdna; >> @@ -126,13 +466,66 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx) >> XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret); >> } >> >> +static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx) >> +{ >> + struct amdxdna_dev *xdna = hwctx->client->xdna; >> + struct drm_file *filp = hwctx->client->filp; >> + struct drm_syncobj *syncobj; >> + u32 hdl; >> + int ret; >> + >> + hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE; >> + >> + ret = drm_syncobj_create(&syncobj, 0, NULL); >> + if (ret) { >> + XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret); >> + return ret; >> + } >> + ret = drm_syncobj_get_handle(filp, syncobj, &hdl); >> + if (ret) { >> + drm_syncobj_put(syncobj); >> + XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret); >> + return ret; >> + } >> + hwctx->priv->syncobj = syncobj; >> + hwctx->syncobj_hdl = hdl; >> + >> + return 0; >> +} >> + >> +static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx) >> +{ >> + /* >> + * The syncobj_hdl is owned by user space and will be cleaned up >> + * separately. >> + */ >> + drm_syncobj_put(hwctx->priv->syncobj); >> +} >> + >> +static void aie2_ctx_syncobj_add_fence(struct amdxdna_hwctx *hwctx, >> + struct dma_fence *ofence, u64 seq) >> +{ >> + struct drm_syncobj *syncobj = hwctx->priv->syncobj; >> + struct dma_fence_chain *chain; >> + >> + if (!syncobj) >> + return; >> + >> + chain = dma_fence_chain_alloc(); >> + if (!chain) >> + return; > You have very subtlety broken dma-fencing rules. This is going to create > a lock chain of: > > mutex_lock(&hwctx->priv->io_lock) > recliam(); > mutex_unlock(&hwctx->priv->io_lock) > > But you published the dma-fence for the job which is in the path of > reclaim. Lockdep should complain if you have all the correct annotations > and in theory you can deadlock. > > So I think you prealloc the chain() before publishing the dma-fence and > then I'd also prime 'hwctx->priv->io_lock' which a reclaim annotation. > > e.g. > > fs_reclaim_acquire(); > might_lock(&hwctx->priv->io_lock); > fs_reclaim_release(); Ok, it makes sense to prealloc the chain. > >> + >> + drm_syncobj_add_point(syncobj, chain, ofence, seq); >> +} >> + >> int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) >> { >> struct amdxdna_client *client = hwctx->client; >> struct amdxdna_dev *xdna = client->xdna; >> + struct drm_gpu_scheduler *sched; >> struct amdxdna_hwctx_priv *priv; >> struct amdxdna_gem_obj *heap; >> - int ret; >> + int i, ret; >> >> priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL); >> if (!priv) >> @@ -150,6 +543,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) >> drm_gem_object_get(to_gobj(heap)); >> mutex_unlock(&client->mm_lock); >> priv->heap = heap; >> + sema_init(&priv->job_sem, HWCTX_MAX_CMDS); >> >> ret = amdxdna_gem_pin(heap); >> if (ret) { >> @@ -157,10 +551,47 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) >> goto put_heap; >> } >> >> + for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { >> + struct amdxdna_gem_obj *abo; >> + struct amdxdna_drm_create_bo args = { >> + .flags = 0, >> + .type = AMDXDNA_BO_DEV, >> + .vaddr = 0, >> + .size = MAX_CHAIN_CMDBUF_SIZE, >> + }; >> + >> + abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true); >> + if (IS_ERR(abo)) { >> + ret = PTR_ERR(abo); >> + goto free_cmd_bufs; >> + } >> + >> + XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx", >> + i, abo->mem.dev_addr, abo->mem.size); >> + priv->cmd_buf[i] = abo; >> + } >> + >> + sched = &priv->sched; >> + mutex_init(&priv->io_lock); >> + ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, >> + HWCTX_MAX_CMDS, 0, msecs_to_jiffies(HWCTX_MAX_TIMEOUT), >> + NULL, NULL, hwctx->name, xdna->ddev.dev); >> + if (ret) { >> + XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret); >> + goto free_cmd_bufs; >> + } >> + >> + ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL, >> + &sched, 1, NULL); >> + if (ret) { >> + XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret); >> + goto free_sched; >> + } >> + >> ret = aie2_hwctx_col_list(hwctx); >> if (ret) { >> XDNA_ERR(xdna, "Create col list failed, ret %d", ret); >> - goto unpin; >> + goto free_entity; >> } >> >> ret = aie2_alloc_resource(hwctx); >> @@ -175,6 +606,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) >> XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret); >> goto release_resource; >> } >> + >> + ret = aie2_ctx_syncobj_create(hwctx); >> + if (ret) { >> + XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret); >> + goto release_resource; >> + } >> + >> hwctx->status = HWCTX_STAT_INIT; >> >> XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name); >> @@ -185,7 +623,16 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) >> aie2_release_resource(hwctx); >> free_col_list: >> kfree(hwctx->col_list); >> -unpin: >> +free_entity: >> + drm_sched_entity_destroy(&priv->entity); >> +free_sched: >> + drm_sched_fini(&priv->sched); >> +free_cmd_bufs: >> + for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { >> + if (!priv->cmd_buf[i]) >> + continue; >> + drm_gem_object_put(to_gobj(priv->cmd_buf[i])); >> + } >> amdxdna_gem_unpin(heap); >> put_heap: >> drm_gem_object_put(to_gobj(heap)); >> @@ -196,11 +643,35 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) >> >> void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) >> { >> + struct amdxdna_dev *xdna; >> + int idx; >> + >> + xdna = hwctx->client->xdna; >> + drm_sched_wqueue_stop(&hwctx->priv->sched); >> + >> + /* Now, scheduler will not send command to device. */ >> aie2_release_resource(hwctx); >> >> + /* >> + * All submitted commands are aborted. >> + * Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run() >> + * will return NODEV if it is called. >> + */ >> + drm_sched_wqueue_start(&hwctx->priv->sched); >> + >> + aie2_hwctx_wait_for_idle(hwctx); >> + drm_sched_entity_destroy(&hwctx->priv->entity); >> + drm_sched_fini(&hwctx->priv->sched); >> + aie2_ctx_syncobj_destroy(hwctx); >> + >> + XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq); >> + >> + for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++) >> + drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx])); >> amdxdna_gem_unpin(hwctx->priv->heap); >> drm_gem_object_put(to_gobj(hwctx->priv->heap)); >> >> + mutex_destroy(&hwctx->priv->io_lock); >> kfree(hwctx->col_list); >> kfree(hwctx->priv); >> kfree(hwctx->cus); >> @@ -267,3 +738,160 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu >> return -EOPNOTSUPP; >> } >> } >> + >> +static int aie2_populate_range(struct amdxdna_gem_obj *abo) >> +{ >> + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); >> + struct mm_struct *mm = abo->mem.notifier.mm; >> + struct hmm_range range = { 0 }; >> + unsigned long timeout; >> + int ret; >> + >> + XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx", >> + abo->mem.userptr, abo->mem.size); >> + range.notifier = &abo->mem.notifier; >> + range.start = abo->mem.userptr; >> + range.end = abo->mem.userptr + abo->mem.size; >> + range.hmm_pfns = abo->mem.pfns; >> + range.default_flags = HMM_PFN_REQ_FAULT; >> + >> + if (!mmget_not_zero(mm)) >> + return -EFAULT; >> + >> + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); >> +again: >> + range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier); >> + mmap_read_lock(mm); >> + ret = hmm_range_fault(&range); >> + mmap_read_unlock(mm); >> + if (ret) { >> + if (time_after(jiffies, timeout)) { >> + ret = -ETIME; >> + goto put_mm; >> + } >> + >> + if (ret == -EBUSY) >> + goto again; >> + >> + goto put_mm; >> + } >> + >> + read_lock(&xdna->notifier_lock); >> + if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) { >> + read_unlock(&xdna->notifier_lock); >> + goto again; >> + } >> + abo->mem.map_invalid = false; >> + read_unlock(&xdna->notifier_lock); >> + >> +put_mm: >> + mmput(mm); >> + return ret; >> +} >> + >> +static void aie2_hwctx_push_job(struct amdxdna_sched_job *job, u64 *seq) >> +{ >> + struct amdxdna_hwctx *hwctx = job->hwctx; >> + >> + mutex_lock(&hwctx->priv->io_lock); >> + drm_sched_job_arm(&job->base); >> + job->seq = hwctx->priv->seq++; >> + *seq = job->seq; >> + >> + job->out_fence = dma_fence_get(&job->base.s_fence->finished); >> + drm_sched_entity_push_job(&job->base); >> + aie2_ctx_syncobj_add_fence(hwctx, job->out_fence, *seq); >> + mutex_unlock(&hwctx->priv->io_lock); >> +} >> + >> +int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq) >> +{ >> + struct amdxdna_dev *xdna = hwctx->client->xdna; >> + struct ww_acquire_ctx acquire_ctx; >> + struct amdxdna_gem_obj *abo; >> + unsigned long timeout = 0; >> + int ret, i; >> + >> + ret = down_interruptible(&hwctx->priv->job_sem); >> + if (ret) { >> + XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret); >> + return ret; >> + } >> + >> + ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx); >> + if (ret) { >> + XDNA_ERR(xdna, "DRM job init failed, ret %d", ret); >> + goto up_sem; >> + } >> + >> +retry: >> + ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx); >> + if (ret) { >> + XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret); >> + goto cleanup_job; >> + } >> + >> + for (i = 0; i < job->bo_cnt; i++) { >> + ret = dma_resv_reserve_fences(job->bos[i]->resv, 1); >> + if (ret) { >> + XDNA_WARN(xdna, "Failed to reserve fences %d", ret); >> + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); >> + goto cleanup_job; >> + } >> + } >> + >> + read_lock(&xdna->notifier_lock); >> + for (i = 0; i < job->bo_cnt; i++) { >> + abo = to_xdna_obj(job->bos[i]); >> + if (abo->mem.map_invalid) { >> + read_unlock(&xdna->notifier_lock); >> + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); >> + if (!timeout) { >> + timeout = jiffies + >> + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); >> + } else if (time_after(jiffies, timeout)) { >> + ret = -ETIME; >> + goto cleanup_job; >> + } >> + >> + ret = aie2_populate_range(abo); >> + if (ret) >> + goto cleanup_job; >> + goto retry; >> + } >> + } >> + > Any reason you can't arm the job here and install job's finished fence > in dma-resv slot? IIRC at one point in time in Xe we used the > 'job->fence', this is a hardware fence in Xe, assuming this the same > here and it ended up causing some issues - exactly what I cannot recall. > The scheduler again is really designed to install the job's finished > fence in dma-resv / syncobjs rather than driver internal fences. > > That of course would mean taking hwctx->priv->io_lock underneath all > other locks in this loop at the final step. Ok, I will change this. Thanks, Lizhi > > Matt > >> + for (i = 0; i < job->bo_cnt; i++) >> + dma_resv_add_fence(job->bos[i]->resv, job->fence, DMA_RESV_USAGE_WRITE); >> + >> + read_unlock(&xdna->notifier_lock); >> + drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); >> + >> + aie2_hwctx_push_job(job, seq); >> + >> + return 0; >> + >> +cleanup_job: >> + drm_sched_job_cleanup(&job->base); >> +up_sem: >> + up(&hwctx->priv->job_sem); >> + job->job_done = true; >> + return ret; >> +} >> + >> +void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, >> + unsigned long cur_seq) >> +{ >> + struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev); >> + struct drm_gem_object *gobj = to_gobj(abo); >> + long ret; >> + >> + write_lock(&xdna->notifier_lock); >> + abo->mem.map_invalid = true; >> + mmu_interval_set_seq(&abo->mem.notifier, cur_seq); >> + write_unlock(&xdna->notifier_lock); >> + ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP, >> + true, MAX_SCHEDULE_TIMEOUT); >> + if (!ret || ret == -ERESTARTSYS) >> + XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret); >> +} >> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c >> index 40d9e4261e8b..db62954eb378 100644 >> --- a/drivers/accel/amdxdna/aie2_message.c >> +++ b/drivers/accel/amdxdna/aie2_message.c >> @@ -4,10 +4,12 @@ >> */ >> >> #include <drm/amdxdna_accel.h> >> +#include <drm/drm_cache.h> >> #include <drm/drm_device.h> >> #include <drm/drm_gem.h> >> #include <drm/drm_gem_shmem_helper.h> >> #include <drm/drm_print.h> >> +#include <drm/gpu_scheduler.h> >> #include <linux/bitfield.h> >> #include <linux/errno.h> >> #include <linux/pci.h> >> @@ -362,3 +364,344 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx) >> msg.opcode, resp.status, ret); >> return ret; >> } >> + >> +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, >> + int (*notify_cb)(void *, const u32 *, size_t)) >> +{ >> + struct mailbox_channel *chann = hwctx->priv->mbox_chann; >> + struct amdxdna_dev *xdna = hwctx->client->xdna; >> + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; >> + union { >> + struct execute_buffer_req ebuf; >> + struct exec_dpu_req dpu; >> + } req; >> + struct xdna_mailbox_msg msg; >> + u32 payload_len; >> + void *payload; >> + int cu_idx; >> + int ret; >> + u32 op; >> + >> + if (!chann) >> + return -ENODEV; >> + >> + payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); >> + if (!payload) { >> + XDNA_ERR(xdna, "Invalid command, cannot get payload"); >> + return -EINVAL; >> + } >> + >> + cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo); >> + if (cu_idx < 0) { >> + XDNA_DBG(xdna, "Invalid cu idx"); >> + return -EINVAL; >> + } >> + >> + op = amdxdna_cmd_get_op(cmd_abo); >> + switch (op) { >> + case ERT_START_CU: >> + if (unlikely(payload_len > sizeof(req.ebuf.payload))) >> + XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len); >> + req.ebuf.cu_idx = cu_idx; >> + memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload)); >> + msg.send_size = sizeof(req.ebuf); >> + msg.opcode = MSG_OP_EXECUTE_BUFFER_CF; >> + break; >> + case ERT_START_NPU: { >> + struct amdxdna_cmd_start_npu *sn = payload; >> + >> + if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload))) >> + XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len); >> + req.dpu.inst_buf_addr = sn->buffer; >> + req.dpu.inst_size = sn->buffer_size; >> + req.dpu.inst_prop_cnt = sn->prop_count; >> + req.dpu.cu_idx = cu_idx; >> + memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload)); >> + msg.send_size = sizeof(req.dpu); >> + msg.opcode = MSG_OP_EXEC_DPU; >> + break; >> + } >> + default: >> + XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op); >> + return -EINVAL; >> + } >> + msg.handle = job; >> + msg.notify_cb = notify_cb; >> + msg.send_data = (u8 *)&req; >> + print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req, >> + 0x40, false); >> + >> + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); >> + if (ret) { >> + XDNA_ERR(xdna, "Send message failed"); >> + return ret; >> + } >> + >> + return 0; >> +} >> + >> +static int >> +aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset, >> + struct amdxdna_gem_obj *abo, u32 *size) >> +{ >> + struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset; >> + int cu_idx = amdxdna_cmd_get_cu_idx(abo); >> + u32 payload_len; >> + void *payload; >> + >> + if (cu_idx < 0) >> + return -EINVAL; >> + >> + payload = amdxdna_cmd_get_payload(abo, &payload_len); >> + if (!payload) >> + return -EINVAL; >> + >> + if (!slot_cf_has_space(offset, payload_len)) >> + return -ENOSPC; >> + >> + buf->cu_idx = cu_idx; >> + buf->arg_cnt = payload_len / sizeof(u32); >> + memcpy(buf->args, payload, payload_len); >> + /* Accurate buf size to hint firmware to do necessary copy */ >> + *size = sizeof(*buf) + payload_len; >> + return 0; >> +} >> + >> +static int >> +aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, >> + struct amdxdna_gem_obj *abo, u32 *size) >> +{ >> + struct cmd_chain_slot_dpu *buf = cmd_buf + offset; >> + int cu_idx = amdxdna_cmd_get_cu_idx(abo); >> + struct amdxdna_cmd_start_npu *sn; >> + u32 payload_len; >> + void *payload; >> + u32 arg_sz; >> + >> + if (cu_idx < 0) >> + return -EINVAL; >> + >> + payload = amdxdna_cmd_get_payload(abo, &payload_len); >> + if (!payload) >> + return -EINVAL; >> + sn = payload; >> + arg_sz = payload_len - sizeof(*sn); >> + if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) >> + return -EINVAL; >> + >> + if (!slot_dpu_has_space(offset, arg_sz)) >> + return -ENOSPC; >> + >> + buf->inst_buf_addr = sn->buffer; >> + buf->inst_size = sn->buffer_size; >> + buf->inst_prop_cnt = sn->prop_count; >> + buf->cu_idx = cu_idx; >> + buf->arg_cnt = arg_sz / sizeof(u32); >> + memcpy(buf->args, sn->prop_args, arg_sz); >> + >> + /* Accurate buf size to hint firmware to do necessary copy */ >> + *size += sizeof(*buf) + arg_sz; >> + return 0; >> +} >> + >> +static int >> +aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset, >> + struct amdxdna_gem_obj *abo, u32 *size) >> +{ >> + u32 this_op = amdxdna_cmd_get_op(abo); >> + void *cmd_buf = cmdbuf_abo->mem.kva; >> + int ret; >> + >> + if (this_op != op) { >> + ret = -EINVAL; >> + goto done; >> + } >> + >> + switch (op) { >> + case ERT_START_CU: >> + ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size); >> + break; >> + case ERT_START_NPU: >> + ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size); >> + break; >> + default: >> + ret = -EOPNOTSUPP; >> + } >> + >> +done: >> + if (ret) { >> + XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d", >> + op, ret); >> + } >> + return ret; >> +} >> + >> +static inline struct amdxdna_gem_obj * >> +aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job) >> +{ >> + int idx = get_job_idx(job->seq); >> + >> + return job->hwctx->priv->cmd_buf[idx]; >> +} >> + >> +static void >> +aie2_cmdlist_prepare_request(struct cmd_chain_req *req, >> + struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt) >> +{ >> + req->buf_addr = cmdbuf_abo->mem.dev_addr; >> + req->buf_size = size; >> + req->count = cnt; >> + drm_clflush_virt_range(cmdbuf_abo->mem.kva, size); >> + XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d", >> + req->buf_addr, size, cnt); >> +} >> + >> +static inline u32 >> +aie2_cmd_op_to_msg_op(u32 op) >> +{ >> + switch (op) { >> + case ERT_START_CU: >> + return MSG_OP_CHAIN_EXEC_BUFFER_CF; >> + case ERT_START_NPU: >> + return MSG_OP_CHAIN_EXEC_DPU; >> + default: >> + return MSG_OP_MAX_OPCODE; >> + } >> +} >> + >> +int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, >> + struct amdxdna_sched_job *job, >> + int (*notify_cb)(void *, const u32 *, size_t)) >> +{ >> + struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); >> + struct mailbox_channel *chann = hwctx->priv->mbox_chann; >> + struct amdxdna_client *client = hwctx->client; >> + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; >> + struct amdxdna_cmd_chain *payload; >> + struct xdna_mailbox_msg msg; >> + struct cmd_chain_req req; >> + u32 payload_len; >> + u32 offset = 0; >> + u32 size; >> + int ret; >> + u32 op; >> + u32 i; >> + >> + op = amdxdna_cmd_get_op(cmd_abo); >> + payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len); >> + if (op != ERT_CMD_CHAIN || !payload || >> + payload_len < struct_size(payload, data, payload->command_count)) >> + return -EINVAL; >> + >> + for (i = 0; i < payload->command_count; i++) { >> + u32 boh = (u32)(payload->data[i]); >> + struct amdxdna_gem_obj *abo; >> + >> + abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD); >> + if (!abo) { >> + XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh); >> + return -ENOENT; >> + } >> + >> + /* All sub-cmd should have same op, use the first one. */ >> + if (i == 0) >> + op = amdxdna_cmd_get_op(abo); >> + >> + ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size); >> + amdxdna_gem_put_obj(abo); >> + if (ret) >> + return -EINVAL; >> + >> + offset += size; >> + } >> + >> + /* The offset is the accumulated total size of the cmd buffer */ >> + aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count); >> + >> + msg.opcode = aie2_cmd_op_to_msg_op(op); >> + if (msg.opcode == MSG_OP_MAX_OPCODE) >> + return -EOPNOTSUPP; >> + msg.handle = job; >> + msg.notify_cb = notify_cb; >> + msg.send_data = (u8 *)&req; >> + msg.send_size = sizeof(req); >> + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); >> + if (ret) { >> + XDNA_ERR(hwctx->client->xdna, "Send message failed"); >> + return ret; >> + } >> + >> + return 0; >> +} >> + >> +int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, >> + struct amdxdna_sched_job *job, >> + int (*notify_cb)(void *, const u32 *, size_t)) >> +{ >> + struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job); >> + struct mailbox_channel *chann = hwctx->priv->mbox_chann; >> + struct amdxdna_gem_obj *cmd_abo = job->cmd_bo; >> + struct xdna_mailbox_msg msg; >> + struct cmd_chain_req req; >> + u32 size; >> + int ret; >> + u32 op; >> + >> + op = amdxdna_cmd_get_op(cmd_abo); >> + ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size); >> + if (ret) >> + return ret; >> + >> + aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1); >> + >> + msg.opcode = aie2_cmd_op_to_msg_op(op); >> + if (msg.opcode == MSG_OP_MAX_OPCODE) >> + return -EOPNOTSUPP; >> + msg.handle = job; >> + msg.notify_cb = notify_cb; >> + msg.send_data = (u8 *)&req; >> + msg.send_size = sizeof(req); >> + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); >> + if (ret) { >> + XDNA_ERR(hwctx->client->xdna, "Send message failed"); >> + return ret; >> + } >> + >> + return 0; >> +} >> + >> +int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, >> + int (*notify_cb)(void *, const u32 *, size_t)) >> +{ >> + struct mailbox_channel *chann = hwctx->priv->mbox_chann; >> + struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]); >> + struct amdxdna_dev *xdna = hwctx->client->xdna; >> + struct xdna_mailbox_msg msg; >> + struct sync_bo_req req; >> + int ret = 0; >> + >> + req.src_addr = 0; >> + req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr; >> + req.size = abo->mem.size; >> + >> + /* Device to Host */ >> + req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) | >> + FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM); >> + >> + XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed", >> + req.size, req.src_addr, req.dst_addr); >> + >> + msg.handle = job; >> + msg.notify_cb = notify_cb; >> + msg.send_data = (u8 *)&req; >> + msg.send_size = sizeof(req); >> + msg.opcode = MSG_OP_SYNC_BO; >> + >> + ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); >> + if (ret) { >> + XDNA_ERR(xdna, "Send message failed"); >> + return ret; >> + } >> + >> + return 0; >> +} >> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c >> index caeb07d1dc6b..fb369d615969 100644 >> --- a/drivers/accel/amdxdna/aie2_pci.c >> +++ b/drivers/accel/amdxdna/aie2_pci.c >> @@ -5,8 +5,10 @@ >> >> #include <drm/amdxdna_accel.h> >> #include <drm/drm_device.h> >> +#include <drm/drm_gem_shmem_helper.h> >> #include <drm/drm_managed.h> >> #include <drm/drm_print.h> >> +#include <drm/gpu_scheduler.h> >> #include <linux/errno.h> >> #include <linux/firmware.h> >> #include <linux/iommu.h> >> @@ -17,6 +19,7 @@ >> #include "aie2_pci.h" >> #include "aie2_solver.h" >> #include "amdxdna_ctx.h" >> +#include "amdxdna_gem.h" >> #include "amdxdna_mailbox.h" >> #include "amdxdna_pci_drv.h" >> >> @@ -495,4 +498,6 @@ const struct amdxdna_dev_ops aie2_ops = { >> .hwctx_init = aie2_hwctx_init, >> .hwctx_fini = aie2_hwctx_fini, >> .hwctx_config = aie2_hwctx_config, >> + .cmd_submit = aie2_cmd_submit, >> + .hmm_invalidate = aie2_hmm_invalidate, >> }; >> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h >> index 3ac936e2c9d1..bc6910875d9d 100644 >> --- a/drivers/accel/amdxdna/aie2_pci.h >> +++ b/drivers/accel/amdxdna/aie2_pci.h >> @@ -6,6 +6,8 @@ >> #ifndef _AIE2_PCI_H_ >> #define _AIE2_PCI_H_ >> >> +#include <linux/semaphore.h> >> + >> #include "amdxdna_mailbox.h" >> >> #define AIE2_INTERVAL 20000 /* us */ >> @@ -76,8 +78,10 @@ enum psp_reg_idx { >> PSP_MAX_REGS /* Keep this at the end */ >> }; >> >> +struct amdxdna_client; >> struct amdxdna_fw_ver; >> struct amdxdna_hwctx; >> +struct amdxdna_sched_job; >> >> struct psp_config { >> const void *fw_buf; >> @@ -118,9 +122,31 @@ struct rt_config { >> u32 value; >> }; >> >> +/* >> + * Define the maximum number of pending commands in a hardware context. >> + * Must be power of 2! >> + */ >> +#define HWCTX_MAX_CMDS 4 >> +#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1)) >> struct amdxdna_hwctx_priv { >> struct amdxdna_gem_obj *heap; >> void *mbox_chann; >> + >> + struct drm_gpu_scheduler sched; >> + struct drm_sched_entity entity; >> + >> + struct mutex io_lock; /* protect seq and cmd order */ >> + struct wait_queue_head job_free_wq; >> + u32 num_pending; >> + u64 seq; >> + struct semaphore job_sem; >> + bool job_done; >> + >> + /* Completed job counter */ >> + u64 completed; >> + >> + struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS]; >> + struct drm_syncobj *syncobj; >> }; >> >> struct amdxdna_dev_hdl { >> @@ -199,10 +225,24 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct >> int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); >> int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); >> int aie2_config_cu(struct amdxdna_hwctx *hwctx); >> +int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, >> + int (*notify_cb)(void *, const u32 *, size_t)); >> +int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, >> + struct amdxdna_sched_job *job, >> + int (*notify_cb)(void *, const u32 *, size_t)); >> +int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, >> + struct amdxdna_sched_job *job, >> + int (*notify_cb)(void *, const u32 *, size_t)); >> +int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, >> + int (*notify_cb)(void *, const u32 *, size_t)); >> >> /* aie2_hwctx.c */ >> int aie2_hwctx_init(struct amdxdna_hwctx *hwctx); >> void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx); >> int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); >> +int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); >> +void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq); >> +void aie2_stop_ctx_by_col_map(struct amdxdna_client *client, u32 col_map); >> +void aie2_restart_ctx(struct amdxdna_client *client); >> >> #endif /* _AIE2_PCI_H_ */ >> diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c >> index b03501e81065..dc3a072ce3b6 100644 >> --- a/drivers/accel/amdxdna/aie2_psp.c >> +++ b/drivers/accel/amdxdna/aie2_psp.c >> @@ -4,8 +4,10 @@ >> */ >> >> #include <drm/drm_device.h> >> +#include <drm/drm_gem_shmem_helper.h> >> #include <drm/drm_managed.h> >> #include <drm/drm_print.h> >> +#include <drm/gpu_scheduler.h> >> #include <linux/bitfield.h> >> #include <linux/iopoll.h> >> >> diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/amdxdna/aie2_smu.c >> index 3fa7064649aa..91893d438da7 100644 >> --- a/drivers/accel/amdxdna/aie2_smu.c >> +++ b/drivers/accel/amdxdna/aie2_smu.c >> @@ -4,7 +4,9 @@ >> */ >> >> #include <drm/drm_device.h> >> +#include <drm/drm_gem_shmem_helper.h> >> #include <drm/drm_print.h> >> +#include <drm/gpu_scheduler.h> >> #include <linux/iopoll.h> >> >> #include "aie2_pci.h" >> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c >> index 9489399adea1..13cfbab9caa0 100644 >> --- a/drivers/accel/amdxdna/amdxdna_ctx.c >> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c >> @@ -7,17 +7,65 @@ >> #include <drm/drm_device.h> >> #include <drm/drm_drv.h> >> #include <drm/drm_file.h> >> +#include <drm/drm_gem.h> >> +#include <drm/drm_gem_shmem_helper.h> >> #include <drm/drm_print.h> >> +#include <drm/gpu_scheduler.h> >> +#include <trace/events/amdxdna.h> >> >> #include "amdxdna_ctx.h" >> +#include "amdxdna_gem.h" >> #include "amdxdna_pci_drv.h" >> >> #define MAX_HWCTX_ID 255 >> +#define MAX_ARG_COUNT 4095 >> >> -static void amdxdna_hwctx_destroy(struct amdxdna_hwctx *hwctx) >> +struct amdxdna_fence { >> + struct dma_fence base; >> + spinlock_t lock; /* for base */ >> + struct amdxdna_hwctx *hwctx; >> +}; >> + >> +static const char *amdxdna_fence_get_driver_name(struct dma_fence *fence) >> +{ >> + return KBUILD_MODNAME; >> +} >> + >> +static const char *amdxdna_fence_get_timeline_name(struct dma_fence *fence) >> +{ >> + struct amdxdna_fence *xdna_fence; >> + >> + xdna_fence = container_of(fence, struct amdxdna_fence, base); >> + >> + return xdna_fence->hwctx->name; >> +} >> + >> +static const struct dma_fence_ops fence_ops = { >> + .get_driver_name = amdxdna_fence_get_driver_name, >> + .get_timeline_name = amdxdna_fence_get_timeline_name, >> +}; >> + >> +static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx) >> +{ >> + struct amdxdna_fence *fence; >> + >> + fence = kzalloc(sizeof(*fence), GFP_KERNEL); >> + if (!fence) >> + return NULL; >> + >> + fence->hwctx = hwctx; >> + spin_lock_init(&fence->lock); >> + dma_fence_init(&fence->base, &fence_ops, &fence->lock, hwctx->id, 0); >> + return &fence->base; >> +} >> + >> +static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx, >> + struct srcu_struct *ss) >> { >> struct amdxdna_dev *xdna = hwctx->client->xdna; >> >> + synchronize_srcu(ss); >> + >> /* At this point, user is not able to submit new commands */ >> mutex_lock(&xdna->dev_lock); >> xdna->dev_info->ops->hwctx_fini(hwctx); >> @@ -27,6 +75,46 @@ static void amdxdna_hwctx_destroy(struct amdxdna_hwctx *hwctx) >> kfree(hwctx); >> } >> >> +void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size) >> +{ >> + struct amdxdna_cmd *cmd = abo->mem.kva; >> + u32 num_masks, count; >> + >> + if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) >> + num_masks = 0; >> + else >> + num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); >> + >> + if (size) { >> + count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header); >> + if (unlikely(count <= num_masks)) { >> + *size = 0; >> + return NULL; >> + } >> + *size = (count - num_masks) * sizeof(u32); >> + } >> + return &cmd->data[num_masks]; >> +} >> + >> +int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo) >> +{ >> + struct amdxdna_cmd *cmd = abo->mem.kva; >> + u32 num_masks, i; >> + u32 *cu_mask; >> + >> + if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) >> + return -1; >> + >> + num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header); >> + cu_mask = cmd->data; >> + for (i = 0; i < num_masks; i++) { >> + if (cu_mask[i]) >> + return ffs(cu_mask[i]) - 1; >> + } >> + >> + return -1; >> +} >> + >> /* >> * This should be called in close() and remove(). DO NOT call in other syscalls. >> * This guarantee that when hwctx and resources will be released, if user >> @@ -43,7 +131,7 @@ void amdxdna_hwctx_remove_all(struct amdxdna_client *client) >> client->pid, hwctx->id); >> idr_remove(&client->hwctx_idr, hwctx->id); >> mutex_unlock(&client->hwctx_lock); >> - amdxdna_hwctx_destroy(hwctx); >> + amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); >> mutex_lock(&client->hwctx_lock); >> } >> mutex_unlock(&client->hwctx_lock); >> @@ -135,6 +223,12 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d >> if (!drm_dev_enter(dev, &idx)) >> return -ENODEV; >> >> + /* >> + * Use hwctx_lock to achieve exclusion with other hwctx writers, >> + * SRCU to synchronize with exec/wait command ioctls. >> + * >> + * The pushed jobs are handled by DRM scheduler during destroy. >> + */ >> mutex_lock(&client->hwctx_lock); >> hwctx = idr_find(&client->hwctx_idr, args->handle); >> if (!hwctx) { >> @@ -147,7 +241,7 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d >> idr_remove(&client->hwctx_idr, hwctx->id); >> mutex_unlock(&client->hwctx_lock); >> >> - amdxdna_hwctx_destroy(hwctx); >> + amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu); >> >> XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle); >> out: >> @@ -161,10 +255,10 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr >> struct amdxdna_drm_config_hwctx *args = data; >> struct amdxdna_dev *xdna = to_xdna_dev(dev); >> struct amdxdna_hwctx *hwctx; >> + int ret, idx; >> u32 buf_size; >> void *buf; >> u64 val; >> - int ret; >> >> if (!xdna->dev_info->ops->hwctx_config) >> return -EOPNOTSUPP; >> @@ -203,17 +297,231 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr >> } >> >> mutex_lock(&xdna->dev_lock); >> + idx = srcu_read_lock(&client->hwctx_srcu); >> hwctx = idr_find(&client->hwctx_idr, args->handle); >> if (!hwctx) { >> XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle); >> ret = -EINVAL; >> - goto unlock; >> + goto unlock_srcu; >> } >> >> ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size); >> >> -unlock: >> +unlock_srcu: >> + srcu_read_unlock(&client->hwctx_srcu, idx); >> mutex_unlock(&xdna->dev_lock); >> kfree(buf); >> return ret; >> } >> + >> +static void >> +amdxdna_arg_bos_put(struct amdxdna_sched_job *job) >> +{ >> + int i; >> + >> + for (i = 0; i < job->bo_cnt; i++) { >> + if (!job->bos[i]) >> + break; >> + drm_gem_object_put(job->bos[i]); >> + } >> +} >> + >> +static int >> +amdxdna_arg_bos_lookup(struct amdxdna_client *client, >> + struct amdxdna_sched_job *job, >> + u32 *bo_hdls, u32 bo_cnt) >> +{ >> + struct drm_gem_object *gobj; >> + int i, ret; >> + >> + job->bo_cnt = bo_cnt; >> + for (i = 0; i < job->bo_cnt; i++) { >> + struct amdxdna_gem_obj *abo; >> + >> + gobj = drm_gem_object_lookup(client->filp, bo_hdls[i]); >> + if (!gobj) { >> + ret = -ENOENT; >> + goto put_shmem_bo; >> + } >> + abo = to_xdna_obj(gobj); >> + >> + mutex_lock(&abo->lock); >> + if (abo->pinned) { >> + mutex_unlock(&abo->lock); >> + job->bos[i] = gobj; >> + continue; >> + } >> + >> + ret = amdxdna_gem_pin_nolock(abo); >> + if (ret) { >> + mutex_unlock(&abo->lock); >> + drm_gem_object_put(gobj); >> + goto put_shmem_bo; >> + } >> + abo->pinned = true; >> + mutex_unlock(&abo->lock); >> + >> + job->bos[i] = gobj; >> + } >> + >> + return 0; >> + >> +put_shmem_bo: >> + amdxdna_arg_bos_put(job); >> + return ret; >> +} >> + >> +void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job) >> +{ >> + trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release"); >> + amdxdna_arg_bos_put(job); >> + amdxdna_gem_put_obj(job->cmd_bo); >> +} >> + >> +int amdxdna_cmd_submit(struct amdxdna_client *client, >> + u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt, >> + u32 hwctx_hdl, u64 *seq) >> +{ >> + struct amdxdna_dev *xdna = client->xdna; >> + struct amdxdna_sched_job *job; >> + struct amdxdna_hwctx *hwctx; >> + int ret, idx; >> + >> + XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt); >> + job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL); >> + if (!job) >> + return -ENOMEM; >> + >> + if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) { >> + job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD); >> + if (!job->cmd_bo) { >> + XDNA_ERR(xdna, "Failed to get cmd bo from %d", cmd_bo_hdl); >> + ret = -EINVAL; >> + goto free_job; >> + } >> + } else { >> + job->cmd_bo = NULL; >> + } >> + >> + ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt); >> + if (ret) { >> + XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret); >> + goto cmd_put; >> + } >> + >> + idx = srcu_read_lock(&client->hwctx_srcu); >> + hwctx = idr_find(&client->hwctx_idr, hwctx_hdl); >> + if (!hwctx) { >> + XDNA_DBG(xdna, "PID %d failed to get hwctx %d", >> + client->pid, hwctx_hdl); >> + ret = -EINVAL; >> + goto unlock_srcu; >> + } >> + >> + if (hwctx->status != HWCTX_STAT_READY) { >> + XDNA_ERR(xdna, "HW Context is not ready"); >> + ret = -EINVAL; >> + goto unlock_srcu; >> + } >> + >> + job->hwctx = hwctx; >> + job->mm = current->mm; >> + >> + job->fence = amdxdna_fence_create(hwctx); >> + if (!job->fence) { >> + XDNA_ERR(xdna, "Failed to create fence"); >> + ret = -ENOMEM; >> + goto unlock_srcu; >> + } >> + kref_init(&job->refcnt); >> + >> + ret = xdna->dev_info->ops->cmd_submit(hwctx, job, seq); >> + if (ret) >> + goto put_fence; >> + >> + /* >> + * The amdxdna_hwctx_destroy_rcu() will release hwctx and associated >> + * resource after synchronize_srcu(). The submitted jobs should be >> + * handled by the queue, for example DRM scheduler, in device layer. >> + * For here we can unlock SRCU. >> + */ >> + srcu_read_unlock(&client->hwctx_srcu, idx); >> + trace_amdxdna_debug_point(hwctx->name, *seq, "job pushed"); >> + >> + return 0; >> + >> +put_fence: >> + dma_fence_put(job->fence); >> +unlock_srcu: >> + srcu_read_unlock(&client->hwctx_srcu, idx); >> + amdxdna_arg_bos_put(job); >> +cmd_put: >> + amdxdna_gem_put_obj(job->cmd_bo); >> +free_job: >> + kfree(job); >> + return ret; >> +} >> + >> +/* >> + * The submit command ioctl submits a command to firmware. One firmware command >> + * may contain multiple command BOs for processing as a whole. >> + * The command sequence number is returned which can be used for wait command ioctl. >> + */ >> +static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client, >> + struct amdxdna_drm_exec_cmd *args) >> +{ >> + struct amdxdna_dev *xdna = client->xdna; >> + u32 *arg_bo_hdls; >> + u32 cmd_bo_hdl; >> + int ret; >> + >> + if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) { >> + XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count); >> + return -EINVAL; >> + } >> + >> + /* Only support single command for now. */ >> + if (args->cmd_count != 1) { >> + XDNA_ERR(xdna, "Invalid cmd bo count %d", args->cmd_count); >> + return -EINVAL; >> + } >> + >> + cmd_bo_hdl = (u32)args->cmd_handles; >> + arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL); >> + if (!arg_bo_hdls) >> + return -ENOMEM; >> + ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args), >> + args->arg_count * sizeof(u32)); >> + if (ret) { >> + ret = -EFAULT; >> + goto free_cmd_bo_hdls; >> + } >> + >> + ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls, >> + args->arg_count, args->hwctx, &args->seq); >> + if (ret) >> + XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret); >> + >> +free_cmd_bo_hdls: >> + kfree(arg_bo_hdls); >> + if (!ret) >> + XDNA_DBG(xdna, "Pushed cmd %lld to scheduler", args->seq); >> + return ret; >> +} >> + >> +int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) >> +{ >> + struct amdxdna_client *client = filp->driver_priv; >> + struct amdxdna_drm_exec_cmd *args = data; >> + >> + if (args->ext || args->ext_flags) >> + return -EINVAL; >> + >> + switch (args->type) { >> + case AMDXDNA_CMD_SUBMIT_EXEC_BUF: >> + return amdxdna_drm_submit_execbuf(client, args); >> + } >> + >> + XDNA_ERR(client->xdna, "Invalid command type %d", args->type); >> + return -EINVAL; >> +} >> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h >> index b409d0731ab8..18ed9bdbd6d8 100644 >> --- a/drivers/accel/amdxdna/amdxdna_ctx.h >> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h >> @@ -6,6 +6,54 @@ >> #ifndef _AMDXDNA_CTX_H_ >> #define _AMDXDNA_CTX_H_ >> >> +#include <linux/bitfield.h> >> + >> +#include "amdxdna_gem.h" >> + >> +struct amdxdna_hwctx_priv; >> + >> +enum ert_cmd_opcode { >> + ERT_START_CU = 0, >> + ERT_CMD_CHAIN = 19, >> + ERT_START_NPU = 20, >> +}; >> + >> +enum ert_cmd_state { >> + ERT_CMD_STATE_INVALID, >> + ERT_CMD_STATE_NEW, >> + ERT_CMD_STATE_QUEUED, >> + ERT_CMD_STATE_RUNNING, >> + ERT_CMD_STATE_COMPLETED, >> + ERT_CMD_STATE_ERROR, >> + ERT_CMD_STATE_ABORT, >> + ERT_CMD_STATE_SUBMITTED, >> + ERT_CMD_STATE_TIMEOUT, >> + ERT_CMD_STATE_NORESPONSE, >> +}; >> + >> +/* >> + * Interpretation of the beginning of data payload for ERT_START_NPU in >> + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args. >> + */ >> +struct amdxdna_cmd_start_npu { >> + u64 buffer; /* instruction buffer address */ >> + u32 buffer_size; /* size of buffer in bytes */ >> + u32 prop_count; /* properties count */ >> + u32 prop_args[]; /* properties and regular kernel arguments */ >> +}; >> + >> +/* >> + * Interpretation of the beginning of data payload for ERT_CMD_CHAIN in >> + * amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles. >> + */ >> +struct amdxdna_cmd_chain { >> + u32 command_count; >> + u32 submit_index; >> + u32 error_index; >> + u32 reserved[3]; >> + u64 data[] __counted_by(command_count); >> +}; >> + >> /* Exec buffer command header format */ >> #define AMDXDNA_CMD_STATE GENMASK(3, 0) >> #define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10) >> @@ -41,9 +89,72 @@ struct amdxdna_hwctx { >> u32 syncobj_hdl; >> }; >> >> +#define drm_job_to_xdna_job(j) \ >> + container_of(j, struct amdxdna_sched_job, base) >> + >> +struct amdxdna_sched_job { >> + struct drm_sched_job base; >> + struct kref refcnt; >> + struct amdxdna_hwctx *hwctx; >> + struct mm_struct *mm; >> + /* The fence to notice DRM scheduler that job is done by hardware */ >> + struct dma_fence *fence; >> + /* user can wait on this fence */ >> + struct dma_fence *out_fence; >> + bool job_done; >> + u64 seq; >> + struct amdxdna_gem_obj *cmd_bo; >> + size_t bo_cnt; >> + struct drm_gem_object *bos[] __counted_by(bo_cnt); >> +}; >> + >> +static inline u32 >> +amdxdna_cmd_get_op(struct amdxdna_gem_obj *abo) >> +{ >> + struct amdxdna_cmd *cmd = abo->mem.kva; >> + >> + return FIELD_GET(AMDXDNA_CMD_OPCODE, cmd->header); >> +} >> + >> +static inline void >> +amdxdna_cmd_set_state(struct amdxdna_gem_obj *abo, enum ert_cmd_state s) >> +{ >> + struct amdxdna_cmd *cmd = abo->mem.kva; >> + >> + cmd->header &= ~AMDXDNA_CMD_STATE; >> + cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, s); >> +} >> + >> +static inline enum ert_cmd_state >> +amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo) >> +{ >> + struct amdxdna_cmd *cmd = abo->mem.kva; >> + >> + return FIELD_GET(AMDXDNA_CMD_STATE, cmd->header); >> +} >> + >> +void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size); >> +int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo); >> + >> +static inline u32 amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx) >> +{ >> + return GENMASK(hwctx->start_col + hwctx->num_col - 1, >> + hwctx->start_col); >> +} >> + >> +void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job); >> void amdxdna_hwctx_remove_all(struct amdxdna_client *client); >> + >> +int amdxdna_cmd_submit(struct amdxdna_client *client, >> + u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt, >> + u32 hwctx_hdl, u64 *seq); >> + >> +int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl, >> + u64 seq, u32 timeout); >> + >> int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); >> int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); >> int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); >> +int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); >> >> #endif /* _AMDXDNA_CTX_H_ */ >> diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c >> index f2ba86ae9e1a..4dfeca306d98 100644 >> --- a/drivers/accel/amdxdna/amdxdna_gem.c >> +++ b/drivers/accel/amdxdna/amdxdna_gem.c >> @@ -8,6 +8,7 @@ >> #include <drm/drm_device.h> >> #include <drm/drm_gem.h> >> #include <drm/drm_gem_shmem_helper.h> >> +#include <drm/gpu_scheduler.h> >> #include <linux/iosys-map.h> >> #include <linux/vmalloc.h> >> >> diff --git a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c >> index 42b615394605..5139a9c96a91 100644 >> --- a/drivers/accel/amdxdna/amdxdna_mailbox_helper.c >> +++ b/drivers/accel/amdxdna/amdxdna_mailbox_helper.c >> @@ -3,10 +3,15 @@ >> * Copyright (C) 2024, Advanced Micro Devices, Inc. >> */ >> >> +#include <drm/amdxdna_accel.h> >> #include <drm/drm_device.h> >> #include <drm/drm_print.h> >> +#include <drm/drm_gem.h> >> +#include <drm/drm_gem_shmem_helper.h> >> +#include <drm/gpu_scheduler.h> >> #include <linux/completion.h> >> >> +#include "amdxdna_gem.h" >> #include "amdxdna_mailbox.h" >> #include "amdxdna_mailbox_helper.h" >> #include "amdxdna_pci_drv.h" >> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c >> index 172109cc9617..32a58bb6e6b1 100644 >> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c >> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c >> @@ -10,6 +10,7 @@ >> #include <drm/drm_gem_shmem_helper.h> >> #include <drm/drm_ioctl.h> >> #include <drm/drm_managed.h> >> +#include <drm/gpu_scheduler.h> >> #include <linux/iommu.h> >> #include <linux/pci.h> >> >> @@ -64,6 +65,7 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp) >> goto unbind_sva; >> } >> mutex_init(&client->hwctx_lock); >> + init_srcu_struct(&client->hwctx_srcu); >> idr_init_base(&client->hwctx_idr, AMDXDNA_INVALID_CTX_HANDLE + 1); >> mutex_init(&client->mm_lock); >> >> @@ -93,6 +95,7 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp) >> XDNA_DBG(xdna, "closing pid %d", client->pid); >> >> idr_destroy(&client->hwctx_idr); >> + cleanup_srcu_struct(&client->hwctx_srcu); >> mutex_destroy(&client->hwctx_lock); >> mutex_destroy(&client->mm_lock); >> if (client->dev_heap) >> @@ -133,6 +136,8 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = { >> DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_BO, amdxdna_drm_create_bo_ioctl, 0), >> DRM_IOCTL_DEF_DRV(AMDXDNA_GET_BO_INFO, amdxdna_drm_get_bo_info_ioctl, 0), >> DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0), >> + /* Execution */ >> + DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0), >> }; >> >> static const struct file_operations amdxdna_fops = { >> @@ -190,9 +195,16 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id) >> return -ENODEV; >> >> drmm_mutex_init(&xdna->ddev, &xdna->dev_lock); >> + rwlock_init(&xdna->notifier_lock); >> INIT_LIST_HEAD(&xdna->client_list); >> pci_set_drvdata(pdev, xdna); >> >> + if (IS_ENABLED(CONFIG_LOCKDEP)) { >> + fs_reclaim_acquire(GFP_KERNEL); >> + might_lock(&xdna->notifier_lock); >> + fs_reclaim_release(GFP_KERNEL); >> + } >> + >> mutex_lock(&xdna->dev_lock); >> ret = xdna->dev_info->ops->init(xdna); >> mutex_unlock(&xdna->dev_lock); >> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h >> index 3dddde4ac12a..ec22a074aac6 100644 >> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h >> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h >> @@ -20,6 +20,7 @@ extern const struct drm_driver amdxdna_drm_drv; >> struct amdxdna_dev; >> struct amdxdna_gem_obj; >> struct amdxdna_hwctx; >> +struct amdxdna_sched_job; >> >> /* >> * struct amdxdna_dev_ops - Device hardware operation callbacks >> @@ -31,6 +32,7 @@ struct amdxdna_dev_ops { >> void (*hwctx_fini)(struct amdxdna_hwctx *hwctx); >> int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size); >> void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq); >> + int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); >> }; >> >> /* >> @@ -69,6 +71,7 @@ struct amdxdna_dev { >> struct mutex dev_lock; /* per device lock */ >> struct list_head client_list; >> struct amdxdna_fw_ver fw_ver; >> + rwlock_t notifier_lock; /* for mmu notifier*/ >> }; >> >> /* >> @@ -88,6 +91,8 @@ struct amdxdna_client { >> struct list_head node; >> pid_t pid; >> struct mutex hwctx_lock; /* protect hwctx */ >> + /* do NOT wait this srcu when hwctx_lock is hold */ >> + struct srcu_struct hwctx_srcu; >> struct idr hwctx_idr; >> struct amdxdna_dev *xdna; >> struct drm_file *filp; >> diff --git a/drivers/accel/amdxdna/amdxdna_sysfs.c b/drivers/accel/amdxdna/amdxdna_sysfs.c >> index 668b94b92714..f27e4ee960a0 100644 >> --- a/drivers/accel/amdxdna/amdxdna_sysfs.c >> +++ b/drivers/accel/amdxdna/amdxdna_sysfs.c >> @@ -3,9 +3,14 @@ >> * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. >> */ >> >> +#include <drm/amdxdna_accel.h> >> #include <drm/drm_device.h> >> +#include <drm/drm_gem_shmem_helper.h> >> #include <drm/drm_print.h> >> +#include <drm/gpu_scheduler.h> >> +#include <linux/types.h> >> >> +#include "amdxdna_gem.h" >> #include "amdxdna_pci_drv.h" >> >> static ssize_t vbnv_show(struct device *dev, struct device_attribute *attr, char *buf) >> diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c >> index 720aab0ed7c4..f00c50461b09 100644 >> --- a/drivers/accel/amdxdna/npu1_regs.c >> +++ b/drivers/accel/amdxdna/npu1_regs.c >> @@ -5,6 +5,7 @@ >> >> #include <drm/amdxdna_accel.h> >> #include <drm/drm_device.h> >> +#include <drm/gpu_scheduler.h> >> #include <linux/sizes.h> >> >> #include "aie2_pci.h" >> diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c >> index f3ea18bcf294..00cb381031d2 100644 >> --- a/drivers/accel/amdxdna/npu2_regs.c >> +++ b/drivers/accel/amdxdna/npu2_regs.c >> @@ -5,6 +5,7 @@ >> >> #include <drm/amdxdna_accel.h> >> #include <drm/drm_device.h> >> +#include <drm/gpu_scheduler.h> >> #include <linux/sizes.h> >> >> #include "aie2_pci.h" >> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c >> index db61142f0d4e..b6dae9667cca 100644 >> --- a/drivers/accel/amdxdna/npu4_regs.c >> +++ b/drivers/accel/amdxdna/npu4_regs.c >> @@ -5,6 +5,7 @@ >> >> #include <drm/amdxdna_accel.h> >> #include <drm/drm_device.h> >> +#include <drm/gpu_scheduler.h> >> #include <linux/sizes.h> >> >> #include "aie2_pci.h" >> diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c >> index debf4e95b9bb..bed1baf8e160 100644 >> --- a/drivers/accel/amdxdna/npu5_regs.c >> +++ b/drivers/accel/amdxdna/npu5_regs.c >> @@ -5,6 +5,7 @@ >> >> #include <drm/amdxdna_accel.h> >> #include <drm/drm_device.h> >> +#include <drm/gpu_scheduler.h> >> #include <linux/sizes.h> >> >> #include "aie2_pci.h" >> diff --git a/include/trace/events/amdxdna.h b/include/trace/events/amdxdna.h >> index 33343d8f0622..c6cb2da7b706 100644 >> --- a/include/trace/events/amdxdna.h >> +++ b/include/trace/events/amdxdna.h >> @@ -9,8 +9,49 @@ >> #if !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ) >> #define _TRACE_AMDXDNA_H >> >> +#include <drm/gpu_scheduler.h> >> #include <linux/tracepoint.h> >> >> +TRACE_EVENT(amdxdna_debug_point, >> + TP_PROTO(const char *name, u64 number, const char *str), >> + >> + TP_ARGS(name, number, str), >> + >> + TP_STRUCT__entry(__string(name, name) >> + __field(u64, number) >> + __string(str, str)), >> + >> + TP_fast_assign(__assign_str(name); >> + __entry->number = number; >> + __assign_str(str);), >> + >> + TP_printk("%s:%llu %s", __get_str(name), __entry->number, >> + __get_str(str)) >> +); >> + >> +TRACE_EVENT(xdna_job, >> + TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq), >> + >> + TP_ARGS(sched_job, name, str, seq), >> + >> + TP_STRUCT__entry(__string(name, name) >> + __string(str, str) >> + __field(u64, fence_context) >> + __field(u64, fence_seqno) >> + __field(u64, seq)), >> + >> + TP_fast_assign(__assign_str(name); >> + __assign_str(str); >> + __entry->fence_context = sched_job->s_fence->finished.context; >> + __entry->fence_seqno = sched_job->s_fence->finished.seqno; >> + __entry->seq = seq;), >> + >> + TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s", >> + __entry->fence_context, __entry->fence_seqno, >> + __get_str(name), __entry->seq, >> + __get_str(str)) >> +); >> + >> DECLARE_EVENT_CLASS(xdna_mbox_msg, >> TP_PROTO(char *name, u8 chann_id, u32 opcode, u32 msg_id), >> >> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h >> index e3e78b79a8e7..3e88ed386fac 100644 >> --- a/include/uapi/drm/amdxdna_accel.h >> +++ b/include/uapi/drm/amdxdna_accel.h >> @@ -13,9 +13,11 @@ >> extern "C" { >> #endif >> >> +#define AMDXDNA_INVALID_CMD_HANDLE (~0UL) >> #define AMDXDNA_INVALID_ADDR (~0UL) >> #define AMDXDNA_INVALID_CTX_HANDLE 0 >> #define AMDXDNA_INVALID_BO_HANDLE 0 >> +#define AMDXDNA_INVALID_FENCE_HANDLE 0 >> >> enum amdxdna_device_type { >> AMDXDNA_DEV_TYPE_UNKNOWN = -1, >> @@ -29,6 +31,7 @@ enum amdxdna_drm_ioctl_id { >> DRM_AMDXDNA_CREATE_BO, >> DRM_AMDXDNA_GET_BO_INFO, >> DRM_AMDXDNA_SYNC_BO, >> + DRM_AMDXDNA_EXEC_CMD, >> }; >> >> /** >> @@ -201,6 +204,37 @@ struct amdxdna_drm_sync_bo { >> __u64 size; >> }; >> >> +enum amdxdna_cmd_type { >> + AMDXDNA_CMD_SUBMIT_EXEC_BUF = 0, >> + AMDXDNA_CMD_SUBMIT_DEPENDENCY, >> + AMDXDNA_CMD_SUBMIT_SIGNAL, >> +}; >> + >> +/** >> + * struct amdxdna_drm_exec_cmd - Execute command. >> + * @ext: MBZ. >> + * @ext_flags: MBZ. >> + * @hwctx: Hardware context handle. >> + * @type: One of command type in enum amdxdna_cmd_type. >> + * @cmd_handles: Array of command handles or the command handle itself >> + * in case of just one. >> + * @args: Array of arguments for all command handles. >> + * @cmd_count: Number of command handles in the cmd_handles array. >> + * @arg_count: Number of arguments in the args array. >> + * @seq: Returned sequence number for this command. >> + */ >> +struct amdxdna_drm_exec_cmd { >> + __u64 ext; >> + __u64 ext_flags; >> + __u32 hwctx; >> + __u32 type; >> + __u64 cmd_handles; >> + __u64 args; >> + __u32 cmd_count; >> + __u32 arg_count; >> + __u64 seq; >> +}; >> + >> #define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \ >> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \ >> struct amdxdna_drm_create_hwctx) >> @@ -225,6 +259,10 @@ struct amdxdna_drm_sync_bo { >> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SYNC_BO, \ >> struct amdxdna_drm_sync_bo) >> >> +#define DRM_IOCTL_AMDXDNA_EXEC_CMD \ >> + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_EXEC_CMD, \ >> + struct amdxdna_drm_exec_cmd) >> + >> #if defined(__cplusplus) >> } /* extern c end */ >> #endif >> -- >> 2.34.1 >>
© 2016 - 2024 Red Hat, Inc.