[PATCH V1] accel/amdxdna: Read real-time clock frequencies

Lizhi Hou posted 1 patch 2 months, 2 weeks ago
drivers/accel/amdxdna/aie2_pci.c  |  4 +++-
drivers/accel/amdxdna/aie2_pci.h  | 12 ++++++++--
drivers/accel/amdxdna/aie2_pm.c   |  6 ++---
drivers/accel/amdxdna/npu1_regs.c |  2 +-
drivers/accel/amdxdna/npu4_regs.c | 39 +++++++++++++++++++++----------
drivers/accel/amdxdna/npu5_regs.c |  4 +---
drivers/accel/amdxdna/npu6_regs.c |  4 +---
7 files changed, 46 insertions(+), 25 deletions(-)
[PATCH V1] accel/amdxdna: Read real-time clock frequencies
Posted by Lizhi Hou 2 months, 2 weeks ago
Add support for reading real-time clock frequencies through the PMF
interface.

Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
---
 drivers/accel/amdxdna/aie2_pci.c  |  4 +++-
 drivers/accel/amdxdna/aie2_pci.h  | 12 ++++++++--
 drivers/accel/amdxdna/aie2_pm.c   |  6 ++---
 drivers/accel/amdxdna/npu1_regs.c |  2 +-
 drivers/accel/amdxdna/npu4_regs.c | 39 +++++++++++++++++++++----------
 drivers/accel/amdxdna/npu5_regs.c |  4 +---
 drivers/accel/amdxdna/npu6_regs.c |  4 +---
 7 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
index 041cbc8cd7e5..c9c23c889c78 100644
--- a/drivers/accel/amdxdna/aie2_pci.c
+++ b/drivers/accel/amdxdna/aie2_pci.c
@@ -284,7 +284,7 @@ static struct xrs_action_ops aie2_xrs_actions = {
 
 static void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
 {
-	ndev->priv->hw_ops.set_dpm(ndev, 0);
+	ndev->priv->hw_ops->set_dpm(ndev, 0);
 	aie_smu_fini(ndev->aie.smu_hdl);
 }
 
@@ -765,6 +765,7 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
 	if (!clock)
 		return -ENOMEM;
 
+	aie2_update_counters(ndev);
 	snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
 		 "MP-NPU Clock");
 	clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
@@ -925,6 +926,7 @@ static int aie2_query_resource_info(struct amdxdna_client *client,
 	ndev = xdna->dev_handle;
 	priv = ndev->priv;
 
+	aie2_update_counters(ndev);
 	res_info.npu_clk_max = priv->dpm_clk_tbl[ndev->max_dpm_level].hclk;
 	res_info.npu_tops_max = ndev->max_tops;
 	res_info.npu_task_max = priv->hwctx_limit;
diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
index 7c308672b5fe..77ba125e4d72 100644
--- a/drivers/accel/amdxdna/aie2_pci.h
+++ b/drivers/accel/amdxdna/aie2_pci.h
@@ -201,8 +201,16 @@ struct amdxdna_dev_hdl {
 
 struct aie2_hw_ops {
 	int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+	int (*update_counters)(struct amdxdna_dev_hdl *ndev);
 };
 
+#define aie2_update_counters(ndev)				\
+({								\
+	typeof(ndev) _ndev = ndev;				\
+	if (_ndev->priv->hw_ops->update_counters)		\
+		_ndev->priv->hw_ops->update_counters(_ndev);	\
+})
+
 enum aie2_fw_feature {
 	AIE2_NPU_COMMAND,
 	AIE2_PREEMPT,
@@ -229,7 +237,7 @@ struct amdxdna_dev_priv {
 	struct aie_bar_off_pair		sram_offs[SRAM_MAX_INDEX];
 	struct aie_bar_off_pair		psp_regs_off[PSP_MAX_REGS];
 	struct aie_bar_off_pair		smu_regs_off[SMU_MAX_REGS];
-	struct aie2_hw_ops		hw_ops;
+	const struct aie2_hw_ops	*hw_ops;
 };
 
 extern const struct amdxdna_dev_ops aie2_ops;
@@ -243,7 +251,7 @@ extern const struct dpm_clk_freq npu4_dpm_clk_table[];
 extern const struct rt_config npu1_default_rt_cfg[];
 extern const struct rt_config npu4_default_rt_cfg[];
 extern const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[];
-int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+extern const struct aie2_hw_ops npu4_hw_ops;
 
 /* aie2_pm.c */
 int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
index 5ec6728d04fd..786d688bd82c 100644
--- a/drivers/accel/amdxdna/aie2_pm.c
+++ b/drivers/accel/amdxdna/aie2_pm.c
@@ -35,7 +35,7 @@ int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 	if (ret)
 		return ret;
 
-	ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+	ret = ndev->priv->hw_ops->set_dpm(ndev, dpm_level);
 	if (!ret)
 		ndev->dpm_level = dpm_level;
 	amdxdna_pm_suspend_put(ndev->aie.xdna);
@@ -49,7 +49,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
 
 	if (ndev->dev_status != AIE2_DEV_UNINIT) {
 		/* Resume device */
-		ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
+		ret = ndev->priv->hw_ops->set_dpm(ndev, ndev->dpm_level);
 		if (ret)
 			return ret;
 
@@ -64,7 +64,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
 		ndev->max_dpm_level++;
 	ndev->max_dpm_level--;
 
-	ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
+	ret = ndev->priv->hw_ops->set_dpm(ndev, ndev->max_dpm_level);
 	if (ret)
 		return ret;
 	ndev->dpm_level = ndev->max_dpm_level;
diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
index a83e44f378ad..f1141a65e64d 100644
--- a/drivers/accel/amdxdna/npu1_regs.c
+++ b/drivers/accel/amdxdna/npu1_regs.c
@@ -122,7 +122,7 @@ static const struct amdxdna_dev_priv npu1_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
 	},
-	.hw_ops		= {
+	.hw_ops		= &(const struct aie2_hw_ops) {
 		.set_dpm = npu1_set_dpm,
 	},
 };
diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
index 5d68171f4ec2..a3b6df56abd0 100644
--- a/drivers/accel/amdxdna/npu4_regs.c
+++ b/drivers/accel/amdxdna/npu4_regs.c
@@ -6,6 +6,7 @@
 #include <drm/amdxdna_accel.h>
 #include <drm/drm_device.h>
 #include <drm/gpu_scheduler.h>
+#include <linux/amd-pmf-io.h>
 #include <linux/bits.h>
 #include <linux/sizes.h>
 
@@ -63,12 +64,7 @@
 #define NPU4_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
 #define NPU4_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
 
-#define NPU4_DPM_TOPS(ndev, dpm_level) \
-({ \
-	typeof(ndev) _ndev = ndev; \
-	(4096 * (_ndev)->total_col * \
-	 (_ndev)->priv->dpm_clk_tbl[dpm_level].hclk / 1000000); \
-})
+#define NPU4_DPM_TOPS(ndev, hclk) (4096 * (ndev)->total_col * (hclk) / 1000000)
 
 const struct rt_config npu4_default_rt_cfg[] = {
 	{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
@@ -105,7 +101,7 @@ const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = {
 	{ 0 }
 };
 
-int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
+static int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 {
 	int ret;
 
@@ -115,8 +111,8 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 
 	ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
 	ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
-	ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level);
-	ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level);
+	ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk);
+	ndev->curr_tops = NPU4_DPM_TOPS(ndev, ndev->hclk_freq);
 
 	XDNA_DBG(ndev->aie.xdna, "MP-NPU clock %d, H clock %d\n",
 		 ndev->npuclk_freq, ndev->hclk_freq);
@@ -124,6 +120,27 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 	return 0;
 }
 
+static int npu4_update_counters(struct amdxdna_dev_hdl *ndev)
+{
+	struct amd_pmf_npu_metrics npu_metrics;
+	int ret;
+
+	ret = AIE2_GET_PMF_NPU_METRICS(&npu_metrics);
+	if (ret)
+		return ret;
+
+	ndev->npuclk_freq = npu_metrics.mpnpuclk_freq;
+	ndev->hclk_freq = npu_metrics.npuclk_freq;
+	ndev->curr_tops = NPU4_DPM_TOPS(ndev, ndev->hclk_freq);
+
+	return 0;
+}
+
+const struct aie2_hw_ops npu4_hw_ops = {
+	.set_dpm = npu4_set_dpm,
+	.update_counters = npu4_update_counters,
+};
+
 static const struct amdxdna_dev_priv npu4_dev_priv = {
 	.fw_path        = "amdnpu/17f0_10/",
 	.rt_config	= npu4_default_rt_cfg,
@@ -154,9 +171,7 @@ static const struct amdxdna_dev_priv npu4_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
 	},
-	.hw_ops		= {
-		.set_dpm = npu4_set_dpm,
-	},
+	.hw_ops		= &npu4_hw_ops
 };
 
 const struct amdxdna_dev_info dev_npu4_info = {
diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
index 98ee8780f3f5..6d4596b9e61e 100644
--- a/drivers/accel/amdxdna/npu5_regs.c
+++ b/drivers/accel/amdxdna/npu5_regs.c
@@ -92,9 +92,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU5_SMU, MP1_C2PMSG_60),
 	},
-	.hw_ops		= {
-		.set_dpm = npu4_set_dpm,
-	},
+	.hw_ops		= &npu4_hw_ops
 };
 
 const struct amdxdna_dev_info dev_npu5_info = {
diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
index 31400cca5ec4..76181345b6d1 100644
--- a/drivers/accel/amdxdna/npu6_regs.c
+++ b/drivers/accel/amdxdna/npu6_regs.c
@@ -92,9 +92,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = {
 		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
 		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
 	},
-	.hw_ops         = {
-		.set_dpm = npu4_set_dpm,
-	},
+	.hw_ops         = &npu4_hw_ops
 
 };
 
-- 
2.34.1
Re: [PATCH V1] accel/amdxdna: Read real-time clock frequencies
Posted by Mario Limonciello 2 months, 1 week ago

On 4/6/26 17:05, Lizhi Hou wrote:
> Add support for reading real-time clock frequencies through the PMF
> interface.
> 
> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
>   drivers/accel/amdxdna/aie2_pci.c  |  4 +++-
>   drivers/accel/amdxdna/aie2_pci.h  | 12 ++++++++--
>   drivers/accel/amdxdna/aie2_pm.c   |  6 ++---
>   drivers/accel/amdxdna/npu1_regs.c |  2 +-
>   drivers/accel/amdxdna/npu4_regs.c | 39 +++++++++++++++++++++----------
>   drivers/accel/amdxdna/npu5_regs.c |  4 +---
>   drivers/accel/amdxdna/npu6_regs.c |  4 +---
>   7 files changed, 46 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 041cbc8cd7e5..c9c23c889c78 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -284,7 +284,7 @@ static struct xrs_action_ops aie2_xrs_actions = {
>   
>   static void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
>   {
> -	ndev->priv->hw_ops.set_dpm(ndev, 0);
> +	ndev->priv->hw_ops->set_dpm(ndev, 0);
>   	aie_smu_fini(ndev->aie.smu_hdl);
>   }
>   
> @@ -765,6 +765,7 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
>   	if (!clock)
>   		return -ENOMEM;
>   
> +	aie2_update_counters(ndev);
>   	snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
>   		 "MP-NPU Clock");
>   	clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
> @@ -925,6 +926,7 @@ static int aie2_query_resource_info(struct amdxdna_client *client,
>   	ndev = xdna->dev_handle;
>   	priv = ndev->priv;
>   
> +	aie2_update_counters(ndev);
>   	res_info.npu_clk_max = priv->dpm_clk_tbl[ndev->max_dpm_level].hclk;
>   	res_info.npu_tops_max = ndev->max_tops;
>   	res_info.npu_task_max = priv->hwctx_limit;
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index 7c308672b5fe..77ba125e4d72 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -201,8 +201,16 @@ struct amdxdna_dev_hdl {
>   
>   struct aie2_hw_ops {
>   	int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
> +	int (*update_counters)(struct amdxdna_dev_hdl *ndev);
>   };
>   
> +#define aie2_update_counters(ndev)				\
> +({								\
> +	typeof(ndev) _ndev = ndev;				\
> +	if (_ndev->priv->hw_ops->update_counters)		\
> +		_ndev->priv->hw_ops->update_counters(_ndev);	\
> +})
> +
>   enum aie2_fw_feature {
>   	AIE2_NPU_COMMAND,
>   	AIE2_PREEMPT,
> @@ -229,7 +237,7 @@ struct amdxdna_dev_priv {
>   	struct aie_bar_off_pair		sram_offs[SRAM_MAX_INDEX];
>   	struct aie_bar_off_pair		psp_regs_off[PSP_MAX_REGS];
>   	struct aie_bar_off_pair		smu_regs_off[SMU_MAX_REGS];
> -	struct aie2_hw_ops		hw_ops;
> +	const struct aie2_hw_ops	*hw_ops;
>   };
>   
>   extern const struct amdxdna_dev_ops aie2_ops;
> @@ -243,7 +251,7 @@ extern const struct dpm_clk_freq npu4_dpm_clk_table[];
>   extern const struct rt_config npu1_default_rt_cfg[];
>   extern const struct rt_config npu4_default_rt_cfg[];
>   extern const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[];
> -int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
> +extern const struct aie2_hw_ops npu4_hw_ops;
>   
>   /* aie2_pm.c */
>   int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
> diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
> index 5ec6728d04fd..786d688bd82c 100644
> --- a/drivers/accel/amdxdna/aie2_pm.c
> +++ b/drivers/accel/amdxdna/aie2_pm.c
> @@ -35,7 +35,7 @@ int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>   	if (ret)
>   		return ret;
>   
> -	ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
> +	ret = ndev->priv->hw_ops->set_dpm(ndev, dpm_level);
>   	if (!ret)
>   		ndev->dpm_level = dpm_level;
>   	amdxdna_pm_suspend_put(ndev->aie.xdna);
> @@ -49,7 +49,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
>   
>   	if (ndev->dev_status != AIE2_DEV_UNINIT) {
>   		/* Resume device */
> -		ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
> +		ret = ndev->priv->hw_ops->set_dpm(ndev, ndev->dpm_level);
>   		if (ret)
>   			return ret;
>   
> @@ -64,7 +64,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
>   		ndev->max_dpm_level++;
>   	ndev->max_dpm_level--;
>   
> -	ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
> +	ret = ndev->priv->hw_ops->set_dpm(ndev, ndev->max_dpm_level);
>   	if (ret)
>   		return ret;
>   	ndev->dpm_level = ndev->max_dpm_level;
> diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
> index a83e44f378ad..f1141a65e64d 100644
> --- a/drivers/accel/amdxdna/npu1_regs.c
> +++ b/drivers/accel/amdxdna/npu1_regs.c
> @@ -122,7 +122,7 @@ static const struct amdxdna_dev_priv npu1_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
>   	},
> -	.hw_ops		= {
> +	.hw_ops		= &(const struct aie2_hw_ops) {
>   		.set_dpm = npu1_set_dpm,
>   	},
>   };
> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
> index 5d68171f4ec2..a3b6df56abd0 100644
> --- a/drivers/accel/amdxdna/npu4_regs.c
> +++ b/drivers/accel/amdxdna/npu4_regs.c
> @@ -6,6 +6,7 @@
>   #include <drm/amdxdna_accel.h>
>   #include <drm/drm_device.h>
>   #include <drm/gpu_scheduler.h>
> +#include <linux/amd-pmf-io.h>
>   #include <linux/bits.h>
>   #include <linux/sizes.h>
>   
> @@ -63,12 +64,7 @@
>   #define NPU4_SMU_BAR_BASE	MMNPU_APERTURE4_BASE
>   #define NPU4_SRAM_BAR_BASE	MMNPU_APERTURE1_BASE
>   
> -#define NPU4_DPM_TOPS(ndev, dpm_level) \
> -({ \
> -	typeof(ndev) _ndev = ndev; \
> -	(4096 * (_ndev)->total_col * \
> -	 (_ndev)->priv->dpm_clk_tbl[dpm_level].hclk / 1000000); \
> -})
> +#define NPU4_DPM_TOPS(ndev, hclk) (4096 * (ndev)->total_col * (hclk) / 1000000)
>   
>   const struct rt_config npu4_default_rt_cfg[] = {
>   	{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
> @@ -105,7 +101,7 @@ const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = {
>   	{ 0 }
>   };
>   
> -int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
> +static int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>   {
>   	int ret;
>   
> @@ -115,8 +111,8 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>   
>   	ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
>   	ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
> -	ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level);
> -	ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level);
> +	ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk);
> +	ndev->curr_tops = NPU4_DPM_TOPS(ndev, ndev->hclk_freq);
>   
>   	XDNA_DBG(ndev->aie.xdna, "MP-NPU clock %d, H clock %d\n",
>   		 ndev->npuclk_freq, ndev->hclk_freq);
> @@ -124,6 +120,27 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>   	return 0;
>   }
>   
> +static int npu4_update_counters(struct amdxdna_dev_hdl *ndev)
> +{
> +	struct amd_pmf_npu_metrics npu_metrics;
> +	int ret;
> +
> +	ret = AIE2_GET_PMF_NPU_METRICS(&npu_metrics);
> +	if (ret)
> +		return ret;
> +
> +	ndev->npuclk_freq = npu_metrics.mpnpuclk_freq;
> +	ndev->hclk_freq = npu_metrics.npuclk_freq;
> +	ndev->curr_tops = NPU4_DPM_TOPS(ndev, ndev->hclk_freq);
> +
> +	return 0;
> +}
> +
> +const struct aie2_hw_ops npu4_hw_ops = {
> +	.set_dpm = npu4_set_dpm,
> +	.update_counters = npu4_update_counters,
> +};
> +
>   static const struct amdxdna_dev_priv npu4_dev_priv = {
>   	.fw_path        = "amdnpu/17f0_10/",
>   	.rt_config	= npu4_default_rt_cfg,
> @@ -154,9 +171,7 @@ static const struct amdxdna_dev_priv npu4_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
>   	},
> -	.hw_ops		= {
> -		.set_dpm = npu4_set_dpm,
> -	},
> +	.hw_ops		= &npu4_hw_ops
>   };
>   
>   const struct amdxdna_dev_info dev_npu4_info = {
> diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
> index 98ee8780f3f5..6d4596b9e61e 100644
> --- a/drivers/accel/amdxdna/npu5_regs.c
> +++ b/drivers/accel/amdxdna/npu5_regs.c
> @@ -92,9 +92,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU5_SMU, MP1_C2PMSG_60),
>   	},
> -	.hw_ops		= {
> -		.set_dpm = npu4_set_dpm,
> -	},
> +	.hw_ops		= &npu4_hw_ops
>   };
>   
>   const struct amdxdna_dev_info dev_npu5_info = {
> diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
> index 31400cca5ec4..76181345b6d1 100644
> --- a/drivers/accel/amdxdna/npu6_regs.c
> +++ b/drivers/accel/amdxdna/npu6_regs.c
> @@ -92,9 +92,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = {
>   		DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
>   		DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
>   	},
> -	.hw_ops         = {
> -		.set_dpm = npu4_set_dpm,
> -	},
> +	.hw_ops         = &npu4_hw_ops
>   
>   };
>
Re: [PATCH V1] accel/amdxdna: Read real-time clock frequencies
Posted by Lizhi Hou 2 months, 1 week ago
Applied to drm-misc-next

On 4/10/26 20:33, Mario Limonciello wrote:
>
>
> On 4/6/26 17:05, Lizhi Hou wrote:
>> Add support for reading real-time clock frequencies through the PMF
>> interface.
>>
>> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
> Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
>> ---
>>   drivers/accel/amdxdna/aie2_pci.c  |  4 +++-
>>   drivers/accel/amdxdna/aie2_pci.h  | 12 ++++++++--
>>   drivers/accel/amdxdna/aie2_pm.c   |  6 ++---
>>   drivers/accel/amdxdna/npu1_regs.c |  2 +-
>>   drivers/accel/amdxdna/npu4_regs.c | 39 +++++++++++++++++++++----------
>>   drivers/accel/amdxdna/npu5_regs.c |  4 +---
>>   drivers/accel/amdxdna/npu6_regs.c |  4 +---
>>   7 files changed, 46 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index 041cbc8cd7e5..c9c23c889c78 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -284,7 +284,7 @@ static struct xrs_action_ops aie2_xrs_actions = {
>>     static void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
>>   {
>> -    ndev->priv->hw_ops.set_dpm(ndev, 0);
>> +    ndev->priv->hw_ops->set_dpm(ndev, 0);
>>       aie_smu_fini(ndev->aie.smu_hdl);
>>   }
>>   @@ -765,6 +765,7 @@ static int aie2_get_clock_metadata(struct 
>> amdxdna_client *client,
>>       if (!clock)
>>           return -ENOMEM;
>>   +    aie2_update_counters(ndev);
>>       snprintf(clock->mp_npu_clock.name, 
>> sizeof(clock->mp_npu_clock.name),
>>            "MP-NPU Clock");
>>       clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
>> @@ -925,6 +926,7 @@ static int aie2_query_resource_info(struct 
>> amdxdna_client *client,
>>       ndev = xdna->dev_handle;
>>       priv = ndev->priv;
>>   +    aie2_update_counters(ndev);
>>       res_info.npu_clk_max = 
>> priv->dpm_clk_tbl[ndev->max_dpm_level].hclk;
>>       res_info.npu_tops_max = ndev->max_tops;
>>       res_info.npu_task_max = priv->hwctx_limit;
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h 
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index 7c308672b5fe..77ba125e4d72 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -201,8 +201,16 @@ struct amdxdna_dev_hdl {
>>     struct aie2_hw_ops {
>>       int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>> +    int (*update_counters)(struct amdxdna_dev_hdl *ndev);
>>   };
>>   +#define aie2_update_counters(ndev)                \
>> +({                                \
>> +    typeof(ndev) _ndev = ndev;                \
>> +    if (_ndev->priv->hw_ops->update_counters)        \
>> + _ndev->priv->hw_ops->update_counters(_ndev);    \
>> +})
>> +
>>   enum aie2_fw_feature {
>>       AIE2_NPU_COMMAND,
>>       AIE2_PREEMPT,
>> @@ -229,7 +237,7 @@ struct amdxdna_dev_priv {
>>       struct aie_bar_off_pair        sram_offs[SRAM_MAX_INDEX];
>>       struct aie_bar_off_pair        psp_regs_off[PSP_MAX_REGS];
>>       struct aie_bar_off_pair        smu_regs_off[SMU_MAX_REGS];
>> -    struct aie2_hw_ops        hw_ops;
>> +    const struct aie2_hw_ops    *hw_ops;
>>   };
>>     extern const struct amdxdna_dev_ops aie2_ops;
>> @@ -243,7 +251,7 @@ extern const struct dpm_clk_freq 
>> npu4_dpm_clk_table[];
>>   extern const struct rt_config npu1_default_rt_cfg[];
>>   extern const struct rt_config npu4_default_rt_cfg[];
>>   extern const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[];
>> -int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>> +extern const struct aie2_hw_ops npu4_hw_ops;
>>     /* aie2_pm.c */
>>   int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
>> diff --git a/drivers/accel/amdxdna/aie2_pm.c 
>> b/drivers/accel/amdxdna/aie2_pm.c
>> index 5ec6728d04fd..786d688bd82c 100644
>> --- a/drivers/accel/amdxdna/aie2_pm.c
>> +++ b/drivers/accel/amdxdna/aie2_pm.c
>> @@ -35,7 +35,7 @@ int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, 
>> u32 dpm_level)
>>       if (ret)
>>           return ret;
>>   -    ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
>> +    ret = ndev->priv->hw_ops->set_dpm(ndev, dpm_level);
>>       if (!ret)
>>           ndev->dpm_level = dpm_level;
>>       amdxdna_pm_suspend_put(ndev->aie.xdna);
>> @@ -49,7 +49,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
>>         if (ndev->dev_status != AIE2_DEV_UNINIT) {
>>           /* Resume device */
>> -        ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
>> +        ret = ndev->priv->hw_ops->set_dpm(ndev, ndev->dpm_level);
>>           if (ret)
>>               return ret;
>>   @@ -64,7 +64,7 @@ int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
>>           ndev->max_dpm_level++;
>>       ndev->max_dpm_level--;
>>   -    ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
>> +    ret = ndev->priv->hw_ops->set_dpm(ndev, ndev->max_dpm_level);
>>       if (ret)
>>           return ret;
>>       ndev->dpm_level = ndev->max_dpm_level;
>> diff --git a/drivers/accel/amdxdna/npu1_regs.c 
>> b/drivers/accel/amdxdna/npu1_regs.c
>> index a83e44f378ad..f1141a65e64d 100644
>> --- a/drivers/accel/amdxdna/npu1_regs.c
>> +++ b/drivers/accel/amdxdna/npu1_regs.c
>> @@ -122,7 +122,7 @@ static const struct amdxdna_dev_priv 
>> npu1_dev_priv = {
>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
>>       },
>> -    .hw_ops        = {
>> +    .hw_ops        = &(const struct aie2_hw_ops) {
>>           .set_dpm = npu1_set_dpm,
>>       },
>>   };
>> diff --git a/drivers/accel/amdxdna/npu4_regs.c 
>> b/drivers/accel/amdxdna/npu4_regs.c
>> index 5d68171f4ec2..a3b6df56abd0 100644
>> --- a/drivers/accel/amdxdna/npu4_regs.c
>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>> @@ -6,6 +6,7 @@
>>   #include <drm/amdxdna_accel.h>
>>   #include <drm/drm_device.h>
>>   #include <drm/gpu_scheduler.h>
>> +#include <linux/amd-pmf-io.h>
>>   #include <linux/bits.h>
>>   #include <linux/sizes.h>
>>   @@ -63,12 +64,7 @@
>>   #define NPU4_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>   #define NPU4_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>   -#define NPU4_DPM_TOPS(ndev, dpm_level) \
>> -({ \
>> -    typeof(ndev) _ndev = ndev; \
>> -    (4096 * (_ndev)->total_col * \
>> -     (_ndev)->priv->dpm_clk_tbl[dpm_level].hclk / 1000000); \
>> -})
>> +#define NPU4_DPM_TOPS(ndev, hclk) (4096 * (ndev)->total_col * (hclk) 
>> / 1000000)
>>     const struct rt_config npu4_default_rt_cfg[] = {
>>       { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
>> @@ -105,7 +101,7 @@ const struct amdxdna_fw_feature_tbl 
>> npu4_fw_feature_table[] = {
>>       { 0 }
>>   };
>>   -int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>> +static int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>>   {
>>       int ret;
>>   @@ -115,8 +111,8 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, 
>> u32 dpm_level)
>>         ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
>>       ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
>> -    ndev->max_tops = NPU4_DPM_TOPS(ndev, ndev->max_dpm_level);
>> -    ndev->curr_tops = NPU4_DPM_TOPS(ndev, dpm_level);
>> +    ndev->max_tops = NPU4_DPM_TOPS(ndev, 
>> ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk);
>> +    ndev->curr_tops = NPU4_DPM_TOPS(ndev, ndev->hclk_freq);
>>         XDNA_DBG(ndev->aie.xdna, "MP-NPU clock %d, H clock %d\n",
>>            ndev->npuclk_freq, ndev->hclk_freq);
>> @@ -124,6 +120,27 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, 
>> u32 dpm_level)
>>       return 0;
>>   }
>>   +static int npu4_update_counters(struct amdxdna_dev_hdl *ndev)
>> +{
>> +    struct amd_pmf_npu_metrics npu_metrics;
>> +    int ret;
>> +
>> +    ret = AIE2_GET_PMF_NPU_METRICS(&npu_metrics);
>> +    if (ret)
>> +        return ret;
>> +
>> +    ndev->npuclk_freq = npu_metrics.mpnpuclk_freq;
>> +    ndev->hclk_freq = npu_metrics.npuclk_freq;
>> +    ndev->curr_tops = NPU4_DPM_TOPS(ndev, ndev->hclk_freq);
>> +
>> +    return 0;
>> +}
>> +
>> +const struct aie2_hw_ops npu4_hw_ops = {
>> +    .set_dpm = npu4_set_dpm,
>> +    .update_counters = npu4_update_counters,
>> +};
>> +
>>   static const struct amdxdna_dev_priv npu4_dev_priv = {
>>       .fw_path        = "amdnpu/17f0_10/",
>>       .rt_config    = npu4_default_rt_cfg,
>> @@ -154,9 +171,7 @@ static const struct amdxdna_dev_priv 
>> npu4_dev_priv = {
>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
>>       },
>> -    .hw_ops        = {
>> -        .set_dpm = npu4_set_dpm,
>> -    },
>> +    .hw_ops        = &npu4_hw_ops
>>   };
>>     const struct amdxdna_dev_info dev_npu4_info = {
>> diff --git a/drivers/accel/amdxdna/npu5_regs.c 
>> b/drivers/accel/amdxdna/npu5_regs.c
>> index 98ee8780f3f5..6d4596b9e61e 100644
>> --- a/drivers/accel/amdxdna/npu5_regs.c
>> +++ b/drivers/accel/amdxdna/npu5_regs.c
>> @@ -92,9 +92,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = {
>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU5_SMU, MP1_C2PMSG_60),
>>       },
>> -    .hw_ops        = {
>> -        .set_dpm = npu4_set_dpm,
>> -    },
>> +    .hw_ops        = &npu4_hw_ops
>>   };
>>     const struct amdxdna_dev_info dev_npu5_info = {
>> diff --git a/drivers/accel/amdxdna/npu6_regs.c 
>> b/drivers/accel/amdxdna/npu6_regs.c
>> index 31400cca5ec4..76181345b6d1 100644
>> --- a/drivers/accel/amdxdna/npu6_regs.c
>> +++ b/drivers/accel/amdxdna/npu6_regs.c
>> @@ -92,9 +92,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = {
>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
>>       },
>> -    .hw_ops         = {
>> -        .set_dpm = npu4_set_dpm,
>> -    },
>> +    .hw_ops         = &npu4_hw_ops
>>     };
>