[PATCH 3/3] perf: arm_cspmu: nvidia: enable NVLINK-C2C port filtering

Besar Wicaksono posted 3 patches 2 months, 1 week ago
There is a newer version of this series
[PATCH 3/3] perf: arm_cspmu: nvidia: enable NVLINK-C2C port filtering
Posted by Besar Wicaksono 2 months, 1 week ago
Enable NVLINK-C2C port filtering to distinguish traffic from
different GPUs connected to NVLINK-C2C.

Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
---
 Documentation/admin-guide/perf/nvidia-pmu.rst | 32 +++++++++++++++++++
 drivers/perf/arm_cspmu/nvidia_cspmu.c         |  7 ++--
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/perf/nvidia-pmu.rst b/Documentation/admin-guide/perf/nvidia-pmu.rst
index 2e0d47cfe7ea..6d1d3206b4ad 100644
--- a/Documentation/admin-guide/perf/nvidia-pmu.rst
+++ b/Documentation/admin-guide/perf/nvidia-pmu.rst
@@ -86,6 +86,22 @@ Example usage:
 
    perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/
 
+The NVLink-C2C has two ports that can be connected to one GPU (occupying both
+ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
+parameter to select the port(s) to monitor. Each bit represents the port number,
+e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
+PMU will monitor both ports by default if not specified.
+
+Example for port filtering:
+
+* Count event id 0x0 from the GPU connected with socket 0 on port 0::
+
+   perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x1/
+
+* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
+
+   perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x3/
+
 NVLink-C2C1 PMU
 -------------------
 
@@ -116,6 +132,22 @@ Example usage:
 
    perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/
 
+The NVLink-C2C has two ports that can be connected to one GPU (occupying both
+ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
+parameter to select the port(s) to monitor. Each bit represents the port number,
+e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
+PMU will monitor both ports by default if not specified.
+
+Example for port filtering:
+
+* Count event id 0x0 from the GPU connected with socket 0 on port 0::
+
+   perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x1/
+
+* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
+
+   perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x3/
+
 CNVLink PMU
 ---------------
 
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
index d1cd9975e71a..cd51177347e5 100644
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -149,6 +149,7 @@ static struct attribute *pcie_pmu_format_attrs[] = {
 
 static struct attribute *nvlink_c2c_pmu_format_attrs[] = {
 	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_ATTR(port, "config1:0-1"),
 	NULL,
 };
 
@@ -193,7 +194,7 @@ static u32 nv_cspmu_event_filter(const struct perf_event *event)
 	const struct nv_cspmu_ctx *ctx =
 		to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
 
-	if (ctx->filter_mask == 0)
+	if (ctx->filter_mask == 0 || event->attr.config1 == 0)
 		return ctx->filter_default_val;
 
 	return event->attr.config1 & ctx->filter_mask;
@@ -229,7 +230,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = {
 	{
 	  .prodid = 0x104,
 	  .prodid_mask = NV_PRODID_MASK,
-	  .filter_mask = 0x0,
+	  .filter_mask = NV_NVL_C2C_FILTER_ID_MASK,
 	  .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
 	  .name_pattern = "nvidia_nvlink_c2c1_pmu_%u",
 	  .name_fmt = NAME_FMT_SOCKET,
@@ -239,7 +240,7 @@ static const struct nv_cspmu_match nv_cspmu_match[] = {
 	{
 	  .prodid = 0x105,
 	  .prodid_mask = NV_PRODID_MASK,
-	  .filter_mask = 0x0,
+	  .filter_mask = NV_NVL_C2C_FILTER_ID_MASK,
 	  .filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
 	  .name_pattern = "nvidia_nvlink_c2c0_pmu_%u",
 	  .name_fmt = NAME_FMT_SOCKET,
-- 
2.34.1
Re: [PATCH 3/3] perf: arm_cspmu: nvidia: enable NVLINK-C2C port filtering
Posted by Will Deacon 1 month, 2 weeks ago
On Wed, Sep 18, 2024 at 09:58:46PM +0000, Besar Wicaksono wrote:
> Enable NVLINK-C2C port filtering to distinguish traffic from
> different GPUs connected to NVLINK-C2C.
> 
> Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com>
> ---
>  Documentation/admin-guide/perf/nvidia-pmu.rst | 32 +++++++++++++++++++
>  drivers/perf/arm_cspmu/nvidia_cspmu.c         |  7 ++--
>  2 files changed, 36 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/admin-guide/perf/nvidia-pmu.rst b/Documentation/admin-guide/perf/nvidia-pmu.rst
> index 2e0d47cfe7ea..6d1d3206b4ad 100644
> --- a/Documentation/admin-guide/perf/nvidia-pmu.rst
> +++ b/Documentation/admin-guide/perf/nvidia-pmu.rst
> @@ -86,6 +86,22 @@ Example usage:
>  
>     perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/
>  
> +The NVLink-C2C has two ports that can be connected to one GPU (occupying both
> +ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
> +parameter to select the port(s) to monitor. Each bit represents the port number,
> +e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
> +PMU will monitor both ports by default if not specified.
> +
> +Example for port filtering:
> +
> +* Count event id 0x0 from the GPU connected with socket 0 on port 0::
> +
> +   perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x1/
> +
> +* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
> +
> +   perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0,port=0x3/
> +
>  NVLink-C2C1 PMU
>  -------------------
>  
> @@ -116,6 +132,22 @@ Example usage:
>  
>     perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/
>  
> +The NVLink-C2C has two ports that can be connected to one GPU (occupying both
> +ports) or to two GPUs (one GPU per port). The user can use "port" bitmap
> +parameter to select the port(s) to monitor. Each bit represents the port number,
> +e.g. "port=0x1" corresponds to port 0 and "port=0x3" is for port 0 and 1. The
> +PMU will monitor both ports by default if not specified.
> +
> +Example for port filtering:
> +
> +* Count event id 0x0 from the GPU connected with socket 0 on port 0::
> +
> +   perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x1/
> +
> +* Count event id 0x0 from the GPUs connected with socket 0 on port 0 and port 1::
> +
> +   perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0,port=0x3/
> +
>  CNVLink PMU
>  ---------------
>  
> diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
> index d1cd9975e71a..cd51177347e5 100644
> --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
> +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
> @@ -149,6 +149,7 @@ static struct attribute *pcie_pmu_format_attrs[] = {
>  
>  static struct attribute *nvlink_c2c_pmu_format_attrs[] = {
>  	ARM_CSPMU_FORMAT_EVENT_ATTR,
> +	ARM_CSPMU_FORMAT_ATTR(port, "config1:0-1"),
>  	NULL,
>  };
>  
> @@ -193,7 +194,7 @@ static u32 nv_cspmu_event_filter(const struct perf_event *event)
>  	const struct nv_cspmu_ctx *ctx =
>  		to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
>  
> -	if (ctx->filter_mask == 0)
> +	if (ctx->filter_mask == 0 || event->attr.config1 == 0)
>  		return ctx->filter_default_val;

Isn't this a bit too broad? It looks like this filter function is used
beyond the C2C PMU (i.e. the PCIe PMU) and you're also checking the whole
of config1 rather than just the port field.

Will