[PATCH V1] accel/amdxdna: Enable temporal sharing only mode

Lizhi Hou posted 1 patch 1 month, 3 weeks ago
There is a newer version of this series
drivers/accel/amdxdna/aie2_ctx.c      | 11 ++++++++++-
drivers/accel/amdxdna/aie2_message.c  |  1 +
drivers/accel/amdxdna/aie2_msg_priv.h |  3 ++-
drivers/accel/amdxdna/aie2_pci.h      |  1 +
drivers/accel/amdxdna/amdxdna_ctx.h   |  1 +
drivers/accel/amdxdna/npu4_regs.c     |  1 +
6 files changed, 16 insertions(+), 2 deletions(-)
[PATCH V1] accel/amdxdna: Enable temporal sharing only mode
Posted by Lizhi Hou 1 month, 3 weeks ago
Newer firmware versions prefer temporal sharing only mode. In this mode,
the driver no longer needs to manage AIE array column allocation. Instead,
a new field, num_unused_col, is added to the hardware context creation
request to specify how many columns will not be used by this hardware
context.

Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_ctx.c      | 11 ++++++++++-
 drivers/accel/amdxdna/aie2_message.c  |  1 +
 drivers/accel/amdxdna/aie2_msg_priv.h |  3 ++-
 drivers/accel/amdxdna/aie2_pci.h      |  1 +
 drivers/accel/amdxdna/amdxdna_ctx.h   |  1 +
 drivers/accel/amdxdna/npu4_regs.c     |  1 +
 6 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 42d876a427c5..2ed087803628 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -468,6 +468,12 @@ static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
 	struct alloc_requests *xrs_req;
 	int ret;
 
+	if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
+		hwctx->num_unused_col = xdna->dev_handle->total_col - hwctx->num_col;
+		hwctx->num_col = xdna->dev_handle->total_col;
+		return aie2_create_context(xdna->dev_handle, hwctx);
+	}
+
 	xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
 	if (!xrs_req)
 		return -ENOMEM;
@@ -499,7 +505,10 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
 	struct amdxdna_dev *xdna = hwctx->client->xdna;
 	int ret;
 
-	ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
+	if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY))
+		ret = aie2_destroy_context(xdna->dev_handle, hwctx);
+	else
+		ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
 	if (ret)
 		XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
 }
diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
index 9ec973028221..e77a353cadc5 100644
--- a/drivers/accel/amdxdna/aie2_message.c
+++ b/drivers/accel/amdxdna/aie2_message.c
@@ -218,6 +218,7 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
 	req.aie_type = 1;
 	req.start_col = hwctx->start_col;
 	req.num_col = hwctx->num_col;
+	req.num_unused_col = hwctx->num_unused_col;
 	req.num_cq_pairs_requested = 1;
 	req.pasid = hwctx->client->pasid;
 	req.context_priority = 2;
diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
index 1c957a6298d3..cc912b7899ce 100644
--- a/drivers/accel/amdxdna/aie2_msg_priv.h
+++ b/drivers/accel/amdxdna/aie2_msg_priv.h
@@ -112,7 +112,8 @@ struct create_ctx_req {
 	__u32	aie_type;
 	__u8	start_col;
 	__u8	num_col;
-	__u16	reserved;
+	__u8    num_unused_col;
+	__u8	reserved;
 	__u8	num_cq_pairs_requested;
 	__u8	reserved1;
 	__u16	pasid;
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index c6b5cf4ae5c4..a929fa98a121 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -232,6 +232,7 @@ struct aie2_hw_ops {
 enum aie2_fw_feature {
 	AIE2_NPU_COMMAND,
 	AIE2_PREEMPT,
+	AIE2_TEMPORAL_ONLY,
 	AIE2_FEATURE_MAX
 };
 
diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
index b6151244d64f..b29449a92f60 100644
--- a/drivers/accel/amdxdna/amdxdna_ctx.h
+++ b/drivers/accel/amdxdna/amdxdna_ctx.h
@@ -98,6 +98,7 @@ struct amdxdna_hwctx {
 	u32				*col_list;
 	u32				start_col;
 	u32				num_col;
+	u32				num_unused_col;
 #define HWCTX_STAT_INIT  0
 #define HWCTX_STAT_READY 1
 #define HWCTX_STAT_STOP  2
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index 4ca21db70478..a62234fd266d 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -90,6 +90,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
 const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
 	{ .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
 	{ .feature = AIE2_PREEMPT, .min_minor = 12 },
+	{ .feature = AIE2_TEMPORAL_ONLY, .min_minor = 12 },
 	{ 0 }
 };
 
-- 
2.34.1
Re: [PATCH V1] accel/amdxdna: Enable temporal sharing only mode
Posted by Mario Limonciello 1 month, 3 weeks ago
On 12/17/25 11:17 AM, Lizhi Hou wrote:
> Newer firmware versions prefer temporal sharing only mode. In this mode,
> the driver no longer needs to manage AIE array column allocation. Instead,
> a new field, num_unused_col, is added to the hardware context creation
> request to specify how many columns will not be used by this hardware
> context.
> 
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> ---
>   drivers/accel/amdxdna/aie2_ctx.c      | 11 ++++++++++-
>   drivers/accel/amdxdna/aie2_message.c  |  1 +
>   drivers/accel/amdxdna/aie2_msg_priv.h |  3 ++-
>   drivers/accel/amdxdna/aie2_pci.h      |  1 +
>   drivers/accel/amdxdna/amdxdna_ctx.h   |  1 +
>   drivers/accel/amdxdna/npu4_regs.c     |  1 +
>   6 files changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
> index 42d876a427c5..2ed087803628 100644
> --- a/drivers/accel/amdxdna/aie2_ctx.c
> +++ b/drivers/accel/amdxdna/aie2_ctx.c
> @@ -468,6 +468,12 @@ static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
>   	struct alloc_requests *xrs_req;
>   	int ret;
>   
> +	if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
> +		hwctx->num_unused_col = xdna->dev_handle->total_col - hwctx->num_col;
> +		hwctx->num_col = xdna->dev_handle->total_col;
> +		return aie2_create_context(xdna->dev_handle, hwctx);
> +	}
> +
>   	xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
>   	if (!xrs_req)
>   		return -ENOMEM;
> @@ -499,7 +505,10 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
>   	struct amdxdna_dev *xdna = hwctx->client->xdna;
>   	int ret;
>   
> -	ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
> +	if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY))
> +		ret = aie2_destroy_context(xdna->dev_handle, hwctx);
> +	else
> +		ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
>   	if (ret)
>   		XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);

To avoid confusion, I think you want to pull the error string into the 
if/else branch and have a unique error string for context destroy 
failure or release failure.

>   }
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index 9ec973028221..e77a353cadc5 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -218,6 +218,7 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
>   	req.aie_type = 1;
>   	req.start_col = hwctx->start_col;
>   	req.num_col = hwctx->num_col;
> +	req.num_unused_col = hwctx->num_unused_col;
>   	req.num_cq_pairs_requested = 1;
>   	req.pasid = hwctx->client->pasid;
>   	req.context_priority = 2;
> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h
> index 1c957a6298d3..cc912b7899ce 100644
> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
> @@ -112,7 +112,8 @@ struct create_ctx_req {
>   	__u32	aie_type;
>   	__u8	start_col;
>   	__u8	num_col;
> -	__u16	reserved;
> +	__u8    num_unused_col;
> +	__u8	reserved;
>   	__u8	num_cq_pairs_requested;
>   	__u8	reserved1;
>   	__u16	pasid;
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index c6b5cf4ae5c4..a929fa98a121 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -232,6 +232,7 @@ struct aie2_hw_ops {
>   enum aie2_fw_feature {
>   	AIE2_NPU_COMMAND,
>   	AIE2_PREEMPT,
> +	AIE2_TEMPORAL_ONLY,
>   	AIE2_FEATURE_MAX
>   };
>   
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> index b6151244d64f..b29449a92f60 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -98,6 +98,7 @@ struct amdxdna_hwctx {
>   	u32				*col_list;
>   	u32				start_col;
>   	u32				num_col;
> +	u32				num_unused_col;
>   #define HWCTX_STAT_INIT  0
>   #define HWCTX_STAT_READY 1
>   #define HWCTX_STAT_STOP  2
> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
> index 4ca21db70478..a62234fd266d 100644
> --- a/drivers/accel/amdxdna/npu4_regs.c
> +++ b/drivers/accel/amdxdna/npu4_regs.c
> @@ -90,6 +90,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>   const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
>   	{ .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
>   	{ .feature = AIE2_PREEMPT, .min_minor = 12 },
> +	{ .feature = AIE2_TEMPORAL_ONLY, .min_minor = 12 },

Similar to my comment on other thread, is this really NPU2,4,5,6 
feature?  Or it's 4+?

>   	{ 0 }
>   };
>
Re: [PATCH V1] accel/amdxdna: Enable temporal sharing only mode
Posted by Lizhi Hou 1 month, 3 weeks ago
On 12/17/25 10:20, Mario Limonciello wrote:
> On 12/17/25 11:17 AM, Lizhi Hou wrote:
>> Newer firmware versions prefer temporal sharing only mode. In this mode,
>> the driver no longer needs to manage AIE array column allocation. 
>> Instead,
>> a new field, num_unused_col, is added to the hardware context creation
>> request to specify how many columns will not be used by this hardware
>> context.
>>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>> ---
>>   drivers/accel/amdxdna/aie2_ctx.c      | 11 ++++++++++-
>>   drivers/accel/amdxdna/aie2_message.c  |  1 +
>>   drivers/accel/amdxdna/aie2_msg_priv.h |  3 ++-
>>   drivers/accel/amdxdna/aie2_pci.h      |  1 +
>>   drivers/accel/amdxdna/amdxdna_ctx.h   |  1 +
>>   drivers/accel/amdxdna/npu4_regs.c     |  1 +
>>   6 files changed, 16 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c 
>> b/drivers/accel/amdxdna/aie2_ctx.c
>> index 42d876a427c5..2ed087803628 100644
>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>> @@ -468,6 +468,12 @@ static int aie2_alloc_resource(struct 
>> amdxdna_hwctx *hwctx)
>>       struct alloc_requests *xrs_req;
>>       int ret;
>>   +    if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
>> +        hwctx->num_unused_col = xdna->dev_handle->total_col - 
>> hwctx->num_col;
>> +        hwctx->num_col = xdna->dev_handle->total_col;
>> +        return aie2_create_context(xdna->dev_handle, hwctx);
>> +    }
>> +
>>       xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
>>       if (!xrs_req)
>>           return -ENOMEM;
>> @@ -499,7 +505,10 @@ static void aie2_release_resource(struct 
>> amdxdna_hwctx *hwctx)
>>       struct amdxdna_dev *xdna = hwctx->client->xdna;
>>       int ret;
>>   -    ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
>> +    if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY))
>> +        ret = aie2_destroy_context(xdna->dev_handle, hwctx);
>> +    else
>> +        ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
>>       if (ret)
>>           XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
>
> To avoid confusion, I think you want to pull the error string into the 
> if/else branch and have a unique error string for context destroy 
> failure or release failure.
Sure.
>
>>   }
>> diff --git a/drivers/accel/amdxdna/aie2_message.c 
>> b/drivers/accel/amdxdna/aie2_message.c
>> index 9ec973028221..e77a353cadc5 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -218,6 +218,7 @@ int aie2_create_context(struct amdxdna_dev_hdl 
>> *ndev, struct amdxdna_hwctx *hwct
>>       req.aie_type = 1;
>>       req.start_col = hwctx->start_col;
>>       req.num_col = hwctx->num_col;
>> +    req.num_unused_col = hwctx->num_unused_col;
>>       req.num_cq_pairs_requested = 1;
>>       req.pasid = hwctx->client->pasid;
>>       req.context_priority = 2;
>> diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h 
>> b/drivers/accel/amdxdna/aie2_msg_priv.h
>> index 1c957a6298d3..cc912b7899ce 100644
>> --- a/drivers/accel/amdxdna/aie2_msg_priv.h
>> +++ b/drivers/accel/amdxdna/aie2_msg_priv.h
>> @@ -112,7 +112,8 @@ struct create_ctx_req {
>>       __u32    aie_type;
>>       __u8    start_col;
>>       __u8    num_col;
>> -    __u16    reserved;
>> +    __u8    num_unused_col;
>> +    __u8    reserved;
>>       __u8    num_cq_pairs_requested;
>>       __u8    reserved1;
>>       __u16    pasid;
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h 
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index c6b5cf4ae5c4..a929fa98a121 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -232,6 +232,7 @@ struct aie2_hw_ops {
>>   enum aie2_fw_feature {
>>       AIE2_NPU_COMMAND,
>>       AIE2_PREEMPT,
>> +    AIE2_TEMPORAL_ONLY,
>>       AIE2_FEATURE_MAX
>>   };
>>   diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h 
>> b/drivers/accel/amdxdna/amdxdna_ctx.h
>> index b6151244d64f..b29449a92f60 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>> @@ -98,6 +98,7 @@ struct amdxdna_hwctx {
>>       u32                *col_list;
>>       u32                start_col;
>>       u32                num_col;
>> +    u32                num_unused_col;
>>   #define HWCTX_STAT_INIT  0
>>   #define HWCTX_STAT_READY 1
>>   #define HWCTX_STAT_STOP  2
>> diff --git a/drivers/accel/amdxdna/npu4_regs.c 
>> b/drivers/accel/amdxdna/npu4_regs.c
>> index 4ca21db70478..a62234fd266d 100644
>> --- a/drivers/accel/amdxdna/npu4_regs.c
>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>> @@ -90,6 +90,7 @@ const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>>   const struct aie2_fw_feature_tbl npu4_fw_feature_table[] = {
>>       { .feature = AIE2_NPU_COMMAND, .min_minor = 15 },
>>       { .feature = AIE2_PREEMPT, .min_minor = 12 },
>> +    { .feature = AIE2_TEMPORAL_ONLY, .min_minor = 12 },
>
> Similar to my comment on other thread, is this really NPU2,4,5,6 
> feature?  Or it's 4+?

NPU2 is obsoleted. I will remove NPU2 later.


Thanks,

Lizhi

>
>>       { 0 }
>>   };
>