drivers/accel/amdxdna/aie2_ctx.c | 11 ++++++++++- drivers/accel/amdxdna/aie2_message.c | 1 + drivers/accel/amdxdna/aie2_msg_priv.h | 3 ++- drivers/accel/amdxdna/aie2_pci.h | 1 + drivers/accel/amdxdna/amdxdna_ctx.h | 1 + drivers/accel/amdxdna/npu4_regs.c | 1 + 6 files changed, 16 insertions(+), 2 deletions(-)
Newer firmware versions prefer temporal sharing only mode. In this mode,
the driver no longer needs to manage AIE array column allocation. Instead,
a new field, num_unused_col, is added to the hardware context creation
request to specify how many columns will not be used by this hardware
context.
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
drivers/accel/amdxdna/aie2_ctx.c | 11 ++++++++++-
drivers/accel/amdxdna/aie2_message.c | 1 +
drivers/accel/amdxdna/aie2_msg_priv.h | 3 ++-
drivers/accel/amdxdna/aie2_pci.h | 1 +
drivers/accel/amdxdna/amdxdna_ctx.h | 1 +
drivers/accel/amdxdna/npu4_regs.c | 1 +
6 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 42d876a427c5..2ed087803628 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -468,6 +468,12 @@ static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
struct alloc_requests *xrs_req;
int ret;
+ if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
+ hwctx->num_unused_col = xdna->dev_handle->total_col - hwctx->num_col;
+ hwctx->num_col = xdna->dev_handle->total_col;
+ return aie2_create_context(xdna->dev_handle, hwctx);
+ }
+
xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
if (!xrs_req)
return -ENOMEM;
@@ -499,7 +505,10 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
struct amdxdna_dev *xdna = hwctx->client->xdna;
int ret;
- ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
+ if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY))
+ ret = aie2_destroy_context(xdna->dev_handle, hwctx);
+ else
+ ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
if (ret)
XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
}
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 9ec973028221..e77a353cadc5 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -218,6 +218,7 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
req.aie_type = 1;
req.start_col = hwctx->start_col;
req.num_col = hwctx->num_col;
+ req.num_unused_col = hwctx->num_unused_col;
req.num_cq_pairs_requested = 1;
req.pasid = hwctx->client->pasid;
req.context_priority = 2;
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
index 1c957a6298d3..cc912b7899ce 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -112,7 +112,8 @@ struct create_ctx_req {
__u32 aie_type;
__u8 start_col;
__u8 num_col;
- __u16 reserved;
+ __u8 num_unused_col;
+ __u8 reserved;
__u8 num_cq_pairs_requested;
__u8 reserved1;
__u16 pasid;
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index c6b5cf4ae5c4..a929fa98a121 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -232,6 +232,7 @@ struct aie2_hw_ops {
enum aie2_fw_feature {
AIE2_NPU_COMMAND,
AIE2_PREEMPT,
+ AIE2_TEMPORAL_ONLY,
AIE2_FEATURE_MAX
};
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index b6151244d64f..b29449a92f60 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -98,6 +98,7 @@ struct amdxdna_hwctx {
u32 *col_list;
u32 start_col;
u32 num_col;
+ u32 num_unused_col;
#define HWCTX_STAT_INIT 0
#define HWCTX_STAT_READY 1
#define HWCTX_STAT_STOP 2
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index 4ca21db70478..a62234fd266d 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -90,6 +90,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
{ .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
{ .feature = AIE2_PREEMPT, .min_minor = 12 },
+ { .feature = AIE2_TEMPORAL_ONLY, .min_minor = 12 },
{ 0 }
};
--
2.34.1
On 12/17/25 11:17 AM, Lizhi Hou wrote:
> Newer firmware versions prefer temporal sharing only mode. In this mode,
> the driver no longer needs to manage AIE array column allocation. Instead,
> a new field, num_unused_col, is added to the hardware context creation
> request to specify how many columns will not be used by this hardware
> context.
>
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
> drivers/accel/amdxdna/aie2_ctx.c | 11 ++++++++++-
> drivers/accel/amdxdna/aie2_message.c | 1 +
> drivers/accel/amdxdna/aie2_msg_priv.h | 3 ++-
> drivers/accel/amdxdna/aie2_pci.h | 1 +
> drivers/accel/amdxdna/amdxdna_ctx.h | 1 +
> drivers/accel/amdxdna/npu4_regs.c | 1 +
> 6 files changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
> index 42d876a427c5..2ed087803628 100644
> --- a/drivers/accel/amdxdna/aie2_ctx.c
> +++ b/drivers/accel/amdxdna/aie2_ctx.c
> @@ -468,6 +468,12 @@ static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
> struct alloc_requests *xrs_req;
> int ret;
>
> + if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
> + hwctx->num_unused_col = xdna->dev_handle->total_col - hwctx->num_col;
> + hwctx->num_col = xdna->dev_handle->total_col;
> + return aie2_create_context(xdna->dev_handle, hwctx);
> + }
> +
> xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
> if (!xrs_req)
> return -ENOMEM;
> @@ -499,7 +505,10 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
> struct amdxdna_dev *xdna = hwctx->client->xdna;
> int ret;
>
> - ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
> + if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY))
> + ret = aie2_destroy_context(xdna->dev_handle, hwctx);
> + else
> + ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
> if (ret)
> XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
To avoid confusion, I think you want to pull the error string into the
if/else branch and have a unique error string for context destroy
failure or release failure.
> }
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index 9ec973028221..e77a353cadc5 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -218,6 +218,7 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
> req.aie_type = 1;
> req.start_col = hwctx->start_col;
> req.num_col = hwctx->num_col;
> + req.num_unused_col = hwctx->num_unused_col;
> req.num_cq_pairs_requested = 1;
> req.pasid = hwctx->client->pasid;
> req.context_priority = 2;
> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
> index 1c957a6298d3..cc912b7899ce 100644
> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
> @@ -112,7 +112,8 @@ struct create_ctx_req {
> __u32 aie_type;
> __u8 start_col;
> __u8 num_col;
> - __u16 reserved;
> + __u8 num_unused_col;
> + __u8 reserved;
> __u8 num_cq_pairs_requested;
> __u8 reserved1;
> __u16 pasid;
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index c6b5cf4ae5c4..a929fa98a121 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -232,6 +232,7 @@ struct aie2_hw_ops {
> enum aie2_fw_feature {
> AIE2_NPU_COMMAND,
> AIE2_PREEMPT,
> + AIE2_TEMPORAL_ONLY,
> AIE2_FEATURE_MAX
> };
>
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> index b6151244d64f..b29449a92f60 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -98,6 +98,7 @@ struct amdxdna_hwctx {
> u32 *col_list;
> u32 start_col;
> u32 num_col;
> + u32 num_unused_col;
> #define HWCTX_STAT_INIT 0
> #define HWCTX_STAT_READY 1
> #define HWCTX_STAT_STOP 2
> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
> index 4ca21db70478..a62234fd266d 100644
> --- a/drivers/accel/amdxdna/npu4_regs.c
> +++ b/drivers/accel/amdxdna/npu4_regs.c
> @@ -90,6 +90,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
> const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
> { .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
> { .feature = AIE2_PREEMPT, .min_minor = 12 },
> + { .feature = AIE2_TEMPORAL_ONLY, .min_minor = 12 },
Similar to my comment on other thread, is this really NPU2,4,5,6
feature? Or it's 4+?
> { 0 }
> };
>
On 12/17/25 10:20, Mario Limonciello wrote:
> On 12/17/25 11:17 AM, Lizhi Hou wrote:
>> Newer firmware versions prefer temporal sharing only mode. In this mode,
>> the driver no longer needs to manage AIE array column allocation.
>> Instead,
>> a new field, num_unused_col, is added to the hardware context creation
>> request to specify how many columns will not be used by this hardware
>> context.
>>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>> drivers/accel/amdxdna/aie2_ctx.c | 11 ++++++++++-
>> drivers/accel/amdxdna/aie2_message.c | 1 +
>> drivers/accel/amdxdna/aie2_msg_priv.h | 3 ++-
>> drivers/accel/amdxdna/aie2_pci.h | 1 +
>> drivers/accel/amdxdna/amdxdna_ctx.h | 1 +
>> drivers/accel/amdxdna/npu4_regs.c | 1 +
>> 6 files changed, 16 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c
>> b/drivers/accel/amdxdna/aie2_ctx.c
>> index 42d876a427c5..2ed087803628 100644
>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>> @@ -468,6 +468,12 @@ static int aie2_alloc_resource(struct
>> amdxdna_hwctx *hwctx)
>> struct alloc_requests *xrs_req;
>> int ret;
>> + if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
>> + hwctx->num_unused_col = xdna->dev_handle->total_col -
>> hwctx->num_col;
>> + hwctx->num_col = xdna->dev_handle->total_col;
>> + return aie2_create_context(xdna->dev_handle, hwctx);
>> + }
>> +
>> xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
>> if (!xrs_req)
>> return -ENOMEM;
>> @@ -499,7 +505,10 @@ static void aie2_release_resource(struct
>> amdxdna_hwctx *hwctx)
>> struct amdxdna_dev *xdna = hwctx->client->xdna;
>> int ret;
>> - ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
>> + if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY))
>> + ret = aie2_destroy_context(xdna->dev_handle, hwctx);
>> + else
>> + ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
>> if (ret)
>> XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
>
> To avoid confusion, I think you want to pull the error string into the
> if/else branch and have a unique error string for context destroy
> failure or release failure.
Sure.
>
>> }
>> diff --git a/drivers/accel/amdxdna/aie2_message.c
>> b/drivers/accel/amdxdna/aie2_message.c
>> index 9ec973028221..e77a353cadc5 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -218,6 +218,7 @@ int aie2_create_context(struct amdxdna_dev_hdl
>> *ndev, struct amdxdna_hwctx *hwct
>> req.aie_type = 1;
>> req.start_col = hwctx->start_col;
>> req.num_col = hwctx->num_col;
>> + req.num_unused_col = hwctx->num_unused_col;
>> req.num_cq_pairs_requested = 1;
>> req.pasid = hwctx->client->pasid;
>> req.context_priority = 2;
>> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h
>> b/drivers/accel/amdxdna/aie2_msg_priv.h
>> index 1c957a6298d3..cc912b7899ce 100644
>> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
>> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
>> @@ -112,7 +112,8 @@ struct create_ctx_req {
>> __u32 aie_type;
>> __u8 start_col;
>> __u8 num_col;
>> - __u16 reserved;
>> + __u8 num_unused_col;
>> + __u8 reserved;
>> __u8 num_cq_pairs_requested;
>> __u8 reserved1;
>> __u16 pasid;
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index c6b5cf4ae5c4..a929fa98a121 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -232,6 +232,7 @@ struct aie2_hw_ops {
>> enum aie2_fw_feature {
>> AIE2_NPU_COMMAND,
>> AIE2_PREEMPT,
>> + AIE2_TEMPORAL_ONLY,
>> AIE2_FEATURE_MAX
>> };
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h
>> b/drivers/accel/amdxdna/amdxdna_ctx.h
>> index b6151244d64f..b29449a92f60 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>> @@ -98,6 +98,7 @@ struct amdxdna_hwctx {
>> u32 *col_list;
>> u32 start_col;
>> u32 num_col;
>> + u32 num_unused_col;
>> #define HWCTX_STAT_INIT 0
>> #define HWCTX_STAT_READY 1
>> #define HWCTX_STAT_STOP 2
>> diff --git a/drivers/accel/amdxdna/npu4_regs.c
>> b/drivers/accel/amdxdna/npu4_regs.c
>> index 4ca21db70478..a62234fd266d 100644
>> --- a/drivers/accel/amdxdna/npu4_regs.c
>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>> @@ -90,6 +90,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>> const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
>> { .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
>> { .feature = AIE2_PREEMPT, .min_minor = 12 },
>> + { .feature = AIE2_TEMPORAL_ONLY, .min_minor = 12 },
>
> Similar to my comment on other thread, is this really NPU2,4,5,6
> feature? Or it's 4+?
NPU2 is obsoleted. I will remove NPU2 later.
Thanks,
Lizhi
>
>> { 0 }
>> };
>
© 2016 - 2026 Red Hat, Inc.