[PATCH V2] accel/amdxdna: Support sensors for column utilization

Lizhi Hou posted 1 patch 3 weeks, 6 days ago
drivers/accel/amdxdna/aie2_pci.c | 34 +++++++++++++++++++++++++++-----
drivers/accel/amdxdna/aie2_pci.h |  8 ++++++++
include/uapi/drm/amdxdna_accel.h |  3 ++-
3 files changed, 39 insertions(+), 6 deletions(-)
[PATCH V2] accel/amdxdna: Support sensors for column utilization
Posted by Lizhi Hou 3 weeks, 6 days ago
From: "Mario Limonciello (AMD)" <superm1@kernel.org>

The AMD PMF driver provides realtime column utilization (npu_busy)
metrics for the NPU. Extend the DRM_IOCTL_AMDXDNA_GET_INFO sensor
query to expose these metrics to userspace.

Add AMDXDNA_SENSOR_TYPE_COLUMN_UTILIZATION to the sensor type enum
and update aie2_get_sensors() to return both the total power and up
to 8 column utilization sensors if the user buffer permits.

Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
[lizhi: support legacy tool which uses small buffer. checkpatch cleanup]
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_pci.c | 34 +++++++++++++++++++++++++++-----
 drivers/accel/amdxdna/aie2_pci.h |  8 ++++++++
 include/uapi/drm/amdxdna_accel.h |  3 ++-
 3 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index a2e586512e26..c57c785a2d15 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -787,16 +787,18 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
 static int aie2_get_sensors(struct amdxdna_client *client,
 			    struct amdxdna_drm_get_info *args)
 {
+	struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle;
 	struct amdxdna_drm_query_sensor sensor = {};
+	struct amd_pmf_npu_metrics npu_metrics;
+	u32 sensors_count = 0, i;
 	int ret;
 
-	if (args->buffer_size < sizeof(sensor))
-		return -EINVAL;
-
-	ret = AIE2_GET_PMF_NPU_DATA(npu_power, sensor.input);
+	ret = AIE2_GET_PMF_NPU_METRICS(&npu_metrics);
 	if (ret)
 		return ret;
+
 	sensor.type = AMDXDNA_SENSOR_TYPE_POWER;
+	sensor.input = npu_metrics.npu_power;
 	sensor.unitm = -3;
 	scnprintf(sensor.label, sizeof(sensor.label), "Total Power");
 	scnprintf(sensor.units, sizeof(sensor.units), "mW");
@@ -804,7 +806,29 @@ static int aie2_get_sensors(struct amdxdna_client *client,
 	if (copy_to_user(u64_to_user_ptr(args->buffer), &sensor, sizeof(sensor)))
 		return -EFAULT;
 
-	args->buffer_size = sizeof(sensor);
+	sensors_count++;
+	if (args->buffer_size <= sensors_count * sizeof(sensor))
+		goto out;
+
+	for (i = 0; i < min_t(u32, ndev->total_col, 8); i++) {
+		memset(&sensor, 0, sizeof(sensor));
+		sensor.input = npu_metrics.npu_busy[i];
+		sensor.type = AMDXDNA_SENSOR_TYPE_COLUMN_UTILIZATION;
+		sensor.unitm = 0;
+		scnprintf(sensor.label, sizeof(sensor.label), "Column %d Utilization", i);
+		scnprintf(sensor.units, sizeof(sensor.units), "%%");
+
+		if (copy_to_user(u64_to_user_ptr(args->buffer) + sensors_count * sizeof(sensor),
+				 &sensor, sizeof(sensor)))
+			return -EFAULT;
+
+		sensors_count++;
+		if (args->buffer_size <= sensors_count * sizeof(sensor))
+			goto out;
+	}
+
+out:
+	args->buffer_size = sensors_count * sizeof(sensor);
 
 	return 0;
 }
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 1bb88711bedb..0ae174862592 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -48,6 +48,7 @@
 })
 
 #if IS_ENABLED(CONFIG_AMD_PMF)
+#define AIE2_GET_PMF_NPU_METRICS(metrics) amd_pmf_get_npu_data(metrics)
 #define AIE2_GET_PMF_NPU_DATA(field, val)				\
 ({									\
 	struct amd_pmf_npu_metrics _npu_metrics;			\
@@ -58,6 +59,13 @@
 	(_ret);								\
 })
 #else
+#define AIE2_GET_PMF_NPU_METRICS(metrics)				\
+({									\
+	typeof(metrics) _m = metrics;					\
+	memset(_m, 0xff, sizeof(*_m));					\
+	(-EOPNOTSUPP);							\
+})
+
 #define SENSOR_DEFAULT_npu_power	U32_MAX
 #define AIE2_GET_PMF_NPU_DATA(field, val)				\
 ({									\
diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
index 9c44db2b3dcd..5bd13f4435f5 100644
--- a/include/uapi/drm/amdxdna_accel.h
+++ b/include/uapi/drm/amdxdna_accel.h
@@ -353,7 +353,8 @@ struct amdxdna_drm_query_clock_metadata {
 };
 
 enum amdxdna_sensor_type {
-	AMDXDNA_SENSOR_TYPE_POWER
+	AMDXDNA_SENSOR_TYPE_POWER,
+	AMDXDNA_SENSOR_TYPE_COLUMN_UTILIZATION
 };
 
 /**
-- 
2.34.1
Re: [PATCH V2] accel/amdxdna: Support sensors for column utilization
Posted by Mario Limonciello 3 weeks, 6 days ago

On 3/11/26 12:18, Lizhi Hou wrote:
> From: "Mario Limonciello (AMD)" <superm1@kernel.org>
> 
> The AMD PMF driver provides realtime column utilization (npu_busy)
> metrics for the NPU. Extend the DRM_IOCTL_AMDXDNA_GET_INFO sensor
> query to expose these metrics to userspace.
> 
> Add AMDXDNA_SENSOR_TYPE_COLUMN_UTILIZATION to the sensor type enum
> and update aie2_get_sensors() to return both the total power and up
> to 8 column utilization sensors if the user buffer permits.
> 
> Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
> [lizhi: support legacy tool which uses small buffer. checkpatch cleanup]
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>

Thanks, no concerns on the cleanups.

> ---
>   drivers/accel/amdxdna/aie2_pci.c | 34 +++++++++++++++++++++++++++-----
>   drivers/accel/amdxdna/aie2_pci.h |  8 ++++++++
>   include/uapi/drm/amdxdna_accel.h |  3 ++-
>   3 files changed, 39 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index a2e586512e26..c57c785a2d15 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -787,16 +787,18 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
>   static int aie2_get_sensors(struct amdxdna_client *client,
>   			    struct amdxdna_drm_get_info *args)
>   {
> +	struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle;
>   	struct amdxdna_drm_query_sensor sensor = {};
> +	struct amd_pmf_npu_metrics npu_metrics;
> +	u32 sensors_count = 0, i;
>   	int ret;
>   
> -	if (args->buffer_size < sizeof(sensor))
> -		return -EINVAL;
> -
> -	ret = AIE2_GET_PMF_NPU_DATA(npu_power, sensor.input);
> +	ret = AIE2_GET_PMF_NPU_METRICS(&npu_metrics);
>   	if (ret)
>   		return ret;
> +
>   	sensor.type = AMDXDNA_SENSOR_TYPE_POWER;
> +	sensor.input = npu_metrics.npu_power;
>   	sensor.unitm = -3;
>   	scnprintf(sensor.label, sizeof(sensor.label), "Total Power");
>   	scnprintf(sensor.units, sizeof(sensor.units), "mW");
> @@ -804,7 +806,29 @@ static int aie2_get_sensors(struct amdxdna_client *client,
>   	if (copy_to_user(u64_to_user_ptr(args->buffer), &sensor, sizeof(sensor)))
>   		return -EFAULT;
>   
> -	args->buffer_size = sizeof(sensor);
> +	sensors_count++;
> +	if (args->buffer_size <= sensors_count * sizeof(sensor))
> +		goto out;
> +
> +	for (i = 0; i < min_t(u32, ndev->total_col, 8); i++) {
> +		memset(&sensor, 0, sizeof(sensor));
> +		sensor.input = npu_metrics.npu_busy[i];
> +		sensor.type = AMDXDNA_SENSOR_TYPE_COLUMN_UTILIZATION;
> +		sensor.unitm = 0;
> +		scnprintf(sensor.label, sizeof(sensor.label), "Column %d Utilization", i);
> +		scnprintf(sensor.units, sizeof(sensor.units), "%%");
> +
> +		if (copy_to_user(u64_to_user_ptr(args->buffer) + sensors_count * sizeof(sensor),
> +				 &sensor, sizeof(sensor)))
> +			return -EFAULT;
> +
> +		sensors_count++;
> +		if (args->buffer_size <= sensors_count * sizeof(sensor))
> +			goto out;
> +	}
> +
> +out:
> +	args->buffer_size = sensors_count * sizeof(sensor);
>   
>   	return 0;
>   }
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index 1bb88711bedb..0ae174862592 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -48,6 +48,7 @@
>   })
>   
>   #if IS_ENABLED(CONFIG_AMD_PMF)
> +#define AIE2_GET_PMF_NPU_METRICS(metrics) amd_pmf_get_npu_data(metrics)
>   #define AIE2_GET_PMF_NPU_DATA(field, val)				\
>   ({									\
>   	struct amd_pmf_npu_metrics _npu_metrics;			\
> @@ -58,6 +59,13 @@
>   	(_ret);								\
>   })
>   #else
> +#define AIE2_GET_PMF_NPU_METRICS(metrics)				\
> +({									\
> +	typeof(metrics) _m = metrics;					\
> +	memset(_m, 0xff, sizeof(*_m));					\
> +	(-EOPNOTSUPP);							\
> +})
> +
>   #define SENSOR_DEFAULT_npu_power	U32_MAX
>   #define AIE2_GET_PMF_NPU_DATA(field, val)				\
>   ({									\
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index 9c44db2b3dcd..5bd13f4435f5 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -353,7 +353,8 @@ struct amdxdna_drm_query_clock_metadata {
>   };
>   
>   enum amdxdna_sensor_type {
> -	AMDXDNA_SENSOR_TYPE_POWER
> +	AMDXDNA_SENSOR_TYPE_POWER,
> +	AMDXDNA_SENSOR_TYPE_COLUMN_UTILIZATION
>   };
>   
>   /**
Re: [PATCH V2] accel/amdxdna: Support sensors for column utilization
Posted by Lizhi Hou 3 weeks, 5 days ago
Applied to drm-misc-next

On 3/11/26 10:21, Mario Limonciello wrote:
>
>
> On 3/11/26 12:18, Lizhi Hou wrote:
>> From: "Mario Limonciello (AMD)" <superm1@kernel.org>
>>
>> The AMD PMF driver provides realtime column utilization (npu_busy)
>> metrics for the NPU. Extend the DRM_IOCTL_AMDXDNA_GET_INFO sensor
>> query to expose these metrics to userspace.
>>
>> Add AMDXDNA_SENSOR_TYPE_COLUMN_UTILIZATION to the sensor type enum
>> and update aie2_get_sensors() to return both the total power and up
>> to 8 column utilization sensors if the user buffer permits.
>>
>> Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
>> Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
>> [lizhi: support legacy tool which uses small buffer. checkpatch cleanup]
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
>
> Thanks, no concerns on the cleanups.
>
>> ---
>>   drivers/accel/amdxdna/aie2_pci.c | 34 +++++++++++++++++++++++++++-----
>>   drivers/accel/amdxdna/aie2_pci.h |  8 ++++++++
>>   include/uapi/drm/amdxdna_accel.h |  3 ++-
>>   3 files changed, 39 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index a2e586512e26..c57c785a2d15 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -787,16 +787,18 @@ static int aie2_get_clock_metadata(struct 
>> amdxdna_client *client,
>>   static int aie2_get_sensors(struct amdxdna_client *client,
>>                   struct amdxdna_drm_get_info *args)
>>   {
>> +    struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle;
>>       struct amdxdna_drm_query_sensor sensor = {};
>> +    struct amd_pmf_npu_metrics npu_metrics;
>> +    u32 sensors_count = 0, i;
>>       int ret;
>>   -    if (args->buffer_size < sizeof(sensor))
>> -        return -EINVAL;
>> -
>> -    ret = AIE2_GET_PMF_NPU_DATA(npu_power, sensor.input);
>> +    ret = AIE2_GET_PMF_NPU_METRICS(&npu_metrics);
>>       if (ret)
>>           return ret;
>> +
>>       sensor.type = AMDXDNA_SENSOR_TYPE_POWER;
>> +    sensor.input = npu_metrics.npu_power;
>>       sensor.unitm = -3;
>>       scnprintf(sensor.label, sizeof(sensor.label), "Total Power");
>>       scnprintf(sensor.units, sizeof(sensor.units), "mW");
>> @@ -804,7 +806,29 @@ static int aie2_get_sensors(struct 
>> amdxdna_client *client,
>>       if (copy_to_user(u64_to_user_ptr(args->buffer), &sensor, 
>> sizeof(sensor)))
>>           return -EFAULT;
>>   -    args->buffer_size = sizeof(sensor);
>> +    sensors_count++;
>> +    if (args->buffer_size <= sensors_count * sizeof(sensor))
>> +        goto out;
>> +
>> +    for (i = 0; i < min_t(u32, ndev->total_col, 8); i++) {
>> +        memset(&sensor, 0, sizeof(sensor));
>> +        sensor.input = npu_metrics.npu_busy[i];
>> +        sensor.type = AMDXDNA_SENSOR_TYPE_COLUMN_UTILIZATION;
>> +        sensor.unitm = 0;
>> +        scnprintf(sensor.label, sizeof(sensor.label), "Column %d 
>> Utilization", i);
>> +        scnprintf(sensor.units, sizeof(sensor.units), "%%");
>> +
>> +        if (copy_to_user(u64_to_user_ptr(args->buffer) + 
>> sensors_count * sizeof(sensor),
>> +                 &sensor, sizeof(sensor)))
>> +            return -EFAULT;
>> +
>> +        sensors_count++;
>> +        if (args->buffer_size <= sensors_count * sizeof(sensor))
>> +            goto out;
>> +    }
>> +
>> +out:
>> +    args->buffer_size = sensors_count * sizeof(sensor);
>>         return 0;
>>   }
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h 
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index 1bb88711bedb..0ae174862592 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -48,6 +48,7 @@
>>   })
>>     #if IS_ENABLED(CONFIG_AMD_PMF)
>> +#define AIE2_GET_PMF_NPU_METRICS(metrics) amd_pmf_get_npu_data(metrics)
>>   #define AIE2_GET_PMF_NPU_DATA(field, val)                \
>>   ({                                    \
>>       struct amd_pmf_npu_metrics _npu_metrics;            \
>> @@ -58,6 +59,13 @@
>>       (_ret);                                \
>>   })
>>   #else
>> +#define AIE2_GET_PMF_NPU_METRICS(metrics)                \
>> +({                                    \
>> +    typeof(metrics) _m = metrics;                    \
>> +    memset(_m, 0xff, sizeof(*_m));                    \
>> +    (-EOPNOTSUPP);                            \
>> +})
>> +
>>   #define SENSOR_DEFAULT_npu_power    U32_MAX
>>   #define AIE2_GET_PMF_NPU_DATA(field, val)                \
>>   ({                                    \
>> diff --git a/include/uapi/drm/amdxdna_accel.h 
>> b/include/uapi/drm/amdxdna_accel.h
>> index 9c44db2b3dcd..5bd13f4435f5 100644
>> --- a/include/uapi/drm/amdxdna_accel.h
>> +++ b/include/uapi/drm/amdxdna_accel.h
>> @@ -353,7 +353,8 @@ struct amdxdna_drm_query_clock_metadata {
>>   };
>>     enum amdxdna_sensor_type {
>> -    AMDXDNA_SENSOR_TYPE_POWER
>> +    AMDXDNA_SENSOR_TYPE_POWER,
>> +    AMDXDNA_SENSOR_TYPE_COLUMN_UTILIZATION
>>   };
>>     /**
>