Mali GPUs have three registers that indicate which parts of the hardware
are powered and active at any moment. These take the form of bitmaps. In
the case of SHADER_PWRACTIVE for example, a high bit indicates that the
shader core corresponding to that bit index is active. These bitmaps
aren't solely contiguous bits, as it's common to have holes in the
sequence of shader core indices, and the actual set of which cores are
present is defined by the "shader present" register.
When the GPU finishes a power state transition, it fires a
GPU_IRQ_POWER_CHANGED_ALL interrupt. After such an interrupt is
received, the PWRACTIVE registers will likely contain interesting new
information.
This is not to be confused with the PWR_IRQ_POWER_CHANGED_ALL interrupt,
which is something related to Mali v14+'s power control logic. The
PWRACTIVE registers and corresponding interrupts are already available
in v9 and onwards.
Expose this as a tracepoint to userspace. This allows users to debug
various scenarios and gather interesting information, such as: knowing
how much hardware is lit up at any given time, correlating graphics
corruption with a specific active shader core, measuring when hardware
is allowed to go to an inactive state again, and so on.
Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
---
drivers/gpu/drm/panthor/panthor_device.c | 1 +
drivers/gpu/drm/panthor/panthor_gpu.c | 9 ++++++++
drivers/gpu/drm/panthor/panthor_trace.h | 38 ++++++++++++++++++++++++++++++++
3 files changed, 48 insertions(+)
diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
index e133b1e0ad6d..a3cb934104b8 100644
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -548,6 +548,7 @@ int panthor_device_resume(struct device *dev)
DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
mutex_unlock(&ptdev->pm.mmio_lock);
+
return 0;
err_suspend_devfreq:
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
index 9cb5dee93212..8830aa9a5c4b 100644
--- a/drivers/gpu/drm/panthor/panthor_gpu.c
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -22,6 +22,9 @@
#include "panthor_hw.h"
#include "panthor_regs.h"
+#define CREATE_TRACE_POINTS
+#include "panthor_trace.h"
+
/**
* struct panthor_gpu - GPU block management data.
*/
@@ -46,6 +49,7 @@ struct panthor_gpu {
(GPU_IRQ_FAULT | \
GPU_IRQ_PROTM_FAULT | \
GPU_IRQ_RESET_COMPLETED | \
+ GPU_IRQ_POWER_CHANGED_ALL | \
GPU_IRQ_CLEAN_CACHES_COMPLETED)
static void panthor_gpu_coherency_set(struct panthor_device *ptdev)
@@ -97,6 +101,11 @@ static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
wake_up_all(&ptdev->gpu->reqs_acked);
}
spin_unlock(&ptdev->gpu->reqs_lock);
+
+ if (status & GPU_IRQ_POWER_CHANGED_ALL)
+ trace_gpu_power_active(gpu_read64(ptdev, SHADER_PWRACTIVE),
+ gpu_read64(ptdev, TILER_PWRACTIVE),
+ gpu_read64(ptdev, L2_PWRACTIVE));
}
PANTHOR_IRQ_HANDLER(gpu, GPU, panthor_gpu_irq_handler);
diff --git a/drivers/gpu/drm/panthor/panthor_trace.h b/drivers/gpu/drm/panthor/panthor_trace.h
new file mode 100644
index 000000000000..01013f81e68a
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_trace.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2025 Collabora ltd. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM panthor
+
+#if !defined(__PANTHOR_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
+#define __PANTHOR_TRACE_H__
+
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+
+TRACE_EVENT(gpu_power_active,
+ TP_PROTO(u64 shader_bitmap, u64 tiler_bitmap, u64 l2_bitmap),
+ TP_ARGS(shader_bitmap, tiler_bitmap, l2_bitmap),
+ TP_STRUCT__entry(
+ __field(u64, shader_bitmap)
+ __field(u64, tiler_bitmap)
+ __field(u64, l2_bitmap)
+ ),
+ TP_fast_assign(
+ __entry->shader_bitmap = shader_bitmap;
+ __entry->tiler_bitmap = tiler_bitmap;
+ __entry->l2_bitmap = l2_bitmap;
+ ),
+ TP_printk("shader_bitmap=0x%llx tiler_bitmap=0x%llx l2_bitmap=0x%llx",
+ __entry->shader_bitmap, __entry->tiler_bitmap, __entry->l2_bitmap
+ )
+);
+
+#endif /* __PANTHOR_TRACE_H__ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE panthor_trace
+
+#include <trace/define_trace.h>
--
2.52.0
On 03/12/2025 13:56, Nicolas Frattaroli wrote:
> Mali GPUs have three registers that indicate which parts of the hardware
> are powered and active at any moment. These take the form of bitmaps. In
> the case of SHADER_PWRACTIVE for example, a high bit indicates that the
> shader core corresponding to that bit index is active. These bitmaps
> aren't solely contiguous bits, as it's common to have holes in the
> sequence of shader core indices, and the actual set of which cores are
> present is defined by the "shader present" register.
>
> When the GPU finishes a power state transition, it fires a
> GPU_IRQ_POWER_CHANGED_ALL interrupt. After such an interrupt is
> received, the PWRACTIVE registers will likely contain interesting new
> information.
>
> This is not to be confused with the PWR_IRQ_POWER_CHANGED_ALL interrupt,
> which is something related to Mali v14+'s power control logic. The
> PWRACTIVE registers and corresponding interrupts are already available
> in v9 and onwards.
>
> Expose this as a tracepoint to userspace. This allows users to debug
> various scenarios and gather interesting information, such as: knowing
> how much hardware is lit up at any given time, correlating graphics
> corruption with a specific active shader core, measuring when hardware
> is allowed to go to an inactive state again, and so on.
>
> Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> ---
> drivers/gpu/drm/panthor/panthor_device.c | 1 +
> drivers/gpu/drm/panthor/panthor_gpu.c | 9 ++++++++
> drivers/gpu/drm/panthor/panthor_trace.h | 38 ++++++++++++++++++++++++++++++++
> 3 files changed, 48 insertions(+)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
> index e133b1e0ad6d..a3cb934104b8 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.c
> +++ b/drivers/gpu/drm/panthor/panthor_device.c
> @@ -548,6 +548,7 @@ int panthor_device_resume(struct device *dev)
> DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
> atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
> mutex_unlock(&ptdev->pm.mmio_lock);
> +
> return 0;
>
> err_suspend_devfreq:
> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> index 9cb5dee93212..8830aa9a5c4b 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> @@ -22,6 +22,9 @@
> #include "panthor_hw.h"
> #include "panthor_regs.h"
>
> +#define CREATE_TRACE_POINTS
> +#include "panthor_trace.h"
> +
> /**
> * struct panthor_gpu - GPU block management data.
> */
> @@ -46,6 +49,7 @@ struct panthor_gpu {
> (GPU_IRQ_FAULT | \
> GPU_IRQ_PROTM_FAULT | \
> GPU_IRQ_RESET_COMPLETED | \
> + GPU_IRQ_POWER_CHANGED_ALL | \
Also, we've seen customers complain about too many IRQs originating
from this event, is there any chance we can enable this conditionally
i.e. only when the trace point is enabled?
Kind regards,
Karunika
> GPU_IRQ_CLEAN_CACHES_COMPLETED)
>
> static void panthor_gpu_coherency_set(struct panthor_device *ptdev)
> @@ -97,6 +101,11 @@ static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
> wake_up_all(&ptdev->gpu->reqs_acked);
> }
> spin_unlock(&ptdev->gpu->reqs_lock);
> +
> + if (status & GPU_IRQ_POWER_CHANGED_ALL)
> + trace_gpu_power_active(gpu_read64(ptdev, SHADER_PWRACTIVE),
> + gpu_read64(ptdev, TILER_PWRACTIVE),
> + gpu_read64(ptdev, L2_PWRACTIVE));
> }
> PANTHOR_IRQ_HANDLER(gpu, GPU, panthor_gpu_irq_handler);
>
> diff --git a/drivers/gpu/drm/panthor/panthor_trace.h b/drivers/gpu/drm/panthor/panthor_trace.h
> new file mode 100644
> index 000000000000..01013f81e68a
> --- /dev/null
> +++ b/drivers/gpu/drm/panthor/panthor_trace.h
> @@ -0,0 +1,38 @@
> +/* SPDX-License-Identifier: GPL-2.0 or MIT */
> +/* Copyright 2025 Collabora ltd. */
> +
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM panthor
> +
> +#if !defined(__PANTHOR_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
> +#define __PANTHOR_TRACE_H__
> +
> +#include <linux/tracepoint.h>
> +#include <linux/types.h>
> +
> +TRACE_EVENT(gpu_power_active,
> + TP_PROTO(u64 shader_bitmap, u64 tiler_bitmap, u64 l2_bitmap),
> + TP_ARGS(shader_bitmap, tiler_bitmap, l2_bitmap),
> + TP_STRUCT__entry(
> + __field(u64, shader_bitmap)
> + __field(u64, tiler_bitmap)
> + __field(u64, l2_bitmap)
> + ),
> + TP_fast_assign(
> + __entry->shader_bitmap = shader_bitmap;
> + __entry->tiler_bitmap = tiler_bitmap;
> + __entry->l2_bitmap = l2_bitmap;
> + ),
> + TP_printk("shader_bitmap=0x%llx tiler_bitmap=0x%llx l2_bitmap=0x%llx",
> + __entry->shader_bitmap, __entry->tiler_bitmap, __entry->l2_bitmap
> + )
> +);
> +
> +#endif /* __PANTHOR_TRACE_H__ */
> +
> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH .
> +#undef TRACE_INCLUDE_FILE
> +#define TRACE_INCLUDE_FILE panthor_trace
> +
> +#include <trace/define_trace.h>
>
On Monday, 8 December 2025 18:21:06 Central European Standard Time Karunika Choo wrote:
> On 03/12/2025 13:56, Nicolas Frattaroli wrote:
> > Mali GPUs have three registers that indicate which parts of the hardware
> > are powered and active at any moment. These take the form of bitmaps. In
> > the case of SHADER_PWRACTIVE for example, a high bit indicates that the
> > shader core corresponding to that bit index is active. These bitmaps
> > aren't solely contiguous bits, as it's common to have holes in the
> > sequence of shader core indices, and the actual set of which cores are
> > present is defined by the "shader present" register.
> >
> > When the GPU finishes a power state transition, it fires a
> > GPU_IRQ_POWER_CHANGED_ALL interrupt. After such an interrupt is
> > received, the PWRACTIVE registers will likely contain interesting new
> > information.
> >
> > This is not to be confused with the PWR_IRQ_POWER_CHANGED_ALL interrupt,
> > which is something related to Mali v14+'s power control logic. The
> > PWRACTIVE registers and corresponding interrupts are already available
> > in v9 and onwards.
> >
> > Expose this as a tracepoint to userspace. This allows users to debug
> > various scenarios and gather interesting information, such as: knowing
> > how much hardware is lit up at any given time, correlating graphics
> > corruption with a specific active shader core, measuring when hardware
> > is allowed to go to an inactive state again, and so on.
> >
> > Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> > ---
> > drivers/gpu/drm/panthor/panthor_device.c | 1 +
> > drivers/gpu/drm/panthor/panthor_gpu.c | 9 ++++++++
> > drivers/gpu/drm/panthor/panthor_trace.h | 38 ++++++++++++++++++++++++++++++++
> > 3 files changed, 48 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
> > index e133b1e0ad6d..a3cb934104b8 100644
> > --- a/drivers/gpu/drm/panthor/panthor_device.c
> > +++ b/drivers/gpu/drm/panthor/panthor_device.c
> > @@ -548,6 +548,7 @@ int panthor_device_resume(struct device *dev)
> > DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
> > atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
> > mutex_unlock(&ptdev->pm.mmio_lock);
> > +
> > return 0;
> >
> > err_suspend_devfreq:
> > diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> > index 9cb5dee93212..8830aa9a5c4b 100644
> > --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> > +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> > @@ -22,6 +22,9 @@
> > #include "panthor_hw.h"
> > #include "panthor_regs.h"
> >
> > +#define CREATE_TRACE_POINTS
> > +#include "panthor_trace.h"
> > +
> > /**
> > * struct panthor_gpu - GPU block management data.
> > */
> > @@ -46,6 +49,7 @@ struct panthor_gpu {
> > (GPU_IRQ_FAULT | \
> > GPU_IRQ_PROTM_FAULT | \
> > GPU_IRQ_RESET_COMPLETED | \
> > + GPU_IRQ_POWER_CHANGED_ALL | \
>
> Also, we've seen customers complain about too many IRQs originating
> from this event, is there any chance we can enable this conditionally
> i.e. only when the trace point is enabled?
Yeah, that's something I've been trying to look into. I'll need to
do some more digging to see if there's a way to run a callback when
an event tracepoint is enabled. That'd be the ideal way to do this,
because then we can just modify the interrupt mask in the callback.
For what it's worth, it doesn't fire very often for me, magnitudes
less often than the job interrupt fires at least. But I assume this
is highly implementation dependent, e.g. on bigger designs that have
more complex power setups and more reasons to enable only part of the
hardware, it'll fire way more often.
Kind regards,
Nicolas Frattaroli
>
> Kind regards,
> Karunika
On 03/12/2025 13:56, Nicolas Frattaroli wrote:
> Mali GPUs have three registers that indicate which parts of the hardware
> are powered and active at any moment. These take the form of bitmaps. In
> the case of SHADER_PWRACTIVE for example, a high bit indicates that the
> shader core corresponding to that bit index is active. These bitmaps
> aren't solely contiguous bits, as it's common to have holes in the
> sequence of shader core indices, and the actual set of which cores are
> present is defined by the "shader present" register.
>
> When the GPU finishes a power state transition, it fires a
> GPU_IRQ_POWER_CHANGED_ALL interrupt. After such an interrupt is
> received, the PWRACTIVE registers will likely contain interesting new
> information.
>
> This is not to be confused with the PWR_IRQ_POWER_CHANGED_ALL interrupt,
> which is something related to Mali v14+'s power control logic. The
> PWRACTIVE registers and corresponding interrupts are already available
> in v9 and onwards.
>
> Expose this as a tracepoint to userspace. This allows users to debug
> various scenarios and gather interesting information, such as: knowing
> how much hardware is lit up at any given time, correlating graphics
> corruption with a specific active shader core, measuring when hardware
> is allowed to go to an inactive state again, and so on.
>
> Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> ---
> drivers/gpu/drm/panthor/panthor_device.c | 1 +
> drivers/gpu/drm/panthor/panthor_gpu.c | 9 ++++++++
> drivers/gpu/drm/panthor/panthor_trace.h | 38 ++++++++++++++++++++++++++++++++
> 3 files changed, 48 insertions(+)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
> index e133b1e0ad6d..a3cb934104b8 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.c
> +++ b/drivers/gpu/drm/panthor/panthor_device.c
> @@ -548,6 +548,7 @@ int panthor_device_resume(struct device *dev)
> DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
> atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
> mutex_unlock(&ptdev->pm.mmio_lock);
> +
> return 0;
>
> err_suspend_devfreq:
> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> index 9cb5dee93212..8830aa9a5c4b 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> @@ -22,6 +22,9 @@
> #include "panthor_hw.h"
> #include "panthor_regs.h"
>
> +#define CREATE_TRACE_POINTS
> +#include "panthor_trace.h"
> +
> /**
> * struct panthor_gpu - GPU block management data.
> */
> @@ -46,6 +49,7 @@ struct panthor_gpu {
> (GPU_IRQ_FAULT | \
> GPU_IRQ_PROTM_FAULT | \
> GPU_IRQ_RESET_COMPLETED | \
> + GPU_IRQ_POWER_CHANGED_ALL | \
> GPU_IRQ_CLEAN_CACHES_COMPLETED)
>
> static void panthor_gpu_coherency_set(struct panthor_device *ptdev)
> @@ -97,6 +101,11 @@ static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
> wake_up_all(&ptdev->gpu->reqs_acked);
> }
> spin_unlock(&ptdev->gpu->reqs_lock);
> +
> + if (status & GPU_IRQ_POWER_CHANGED_ALL)
> + trace_gpu_power_active(gpu_read64(ptdev, SHADER_PWRACTIVE),
> + gpu_read64(ptdev, TILER_PWRACTIVE),
> + gpu_read64(ptdev, L2_PWRACTIVE));
> }
> PANTHOR_IRQ_HANDLER(gpu, GPU, panthor_gpu_irq_handler);
>
> diff --git a/drivers/gpu/drm/panthor/panthor_trace.h b/drivers/gpu/drm/panthor/panthor_trace.h
> new file mode 100644
> index 000000000000..01013f81e68a
> --- /dev/null
> +++ b/drivers/gpu/drm/panthor/panthor_trace.h
> @@ -0,0 +1,38 @@
> +/* SPDX-License-Identifier: GPL-2.0 or MIT */
> +/* Copyright 2025 Collabora ltd. */
> +
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM panthor
> +
> +#if !defined(__PANTHOR_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
> +#define __PANTHOR_TRACE_H__
> +
> +#include <linux/tracepoint.h>
> +#include <linux/types.h>
> +
> +TRACE_EVENT(gpu_power_active,
> + TP_PROTO(u64 shader_bitmap, u64 tiler_bitmap, u64 l2_bitmap),
nit: if you want to add tracing can we also add the device name as
well? Something like:
TP_PROTO(struct device *dev, ...),
TP_ARGS(dev, ...),
TP_STRUCT__ENTRY(
__string(dev_name, dev_name(dev))
...
).
...
This will help differentiate the device it is originating from in
a multi GPU situation.
Kind regards,
Karunika
> + TP_ARGS(shader_bitmap, tiler_bitmap, l2_bitmap),
> + TP_STRUCT__entry(
> + __field(u64, shader_bitmap)
> + __field(u64, tiler_bitmap)
> + __field(u64, l2_bitmap)
> + ),
> + TP_fast_assign(
> + __entry->shader_bitmap = shader_bitmap;
> + __entry->tiler_bitmap = tiler_bitmap;
> + __entry->l2_bitmap = l2_bitmap;
> + ),
> + TP_printk("shader_bitmap=0x%llx tiler_bitmap=0x%llx l2_bitmap=0x%llx",
> + __entry->shader_bitmap, __entry->tiler_bitmap, __entry->l2_bitmap
> + )
> +);
> +
> +#endif /* __PANTHOR_TRACE_H__ */
> +
> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH .
> +#undef TRACE_INCLUDE_FILE
> +#define TRACE_INCLUDE_FILE panthor_trace
> +
> +#include <trace/define_trace.h>
>
On Monday, 8 December 2025 18:14:53 Central European Standard Time Karunika Choo wrote:
> On 03/12/2025 13:56, Nicolas Frattaroli wrote:
> > [... snip ...]
> > diff --git a/drivers/gpu/drm/panthor/panthor_trace.h b/drivers/gpu/drm/panthor/panthor_trace.h
> > new file mode 100644
> > index 000000000000..01013f81e68a
> > --- /dev/null
> > +++ b/drivers/gpu/drm/panthor/panthor_trace.h
> > @@ -0,0 +1,38 @@
> > +/* SPDX-License-Identifier: GPL-2.0 or MIT */
> > +/* Copyright 2025 Collabora ltd. */
> > +
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM panthor
> > +
> > +#if !defined(__PANTHOR_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
> > +#define __PANTHOR_TRACE_H__
> > +
> > +#include <linux/tracepoint.h>
> > +#include <linux/types.h>
> > +
> > +TRACE_EVENT(gpu_power_active,
> > + TP_PROTO(u64 shader_bitmap, u64 tiler_bitmap, u64 l2_bitmap),
>
> nit: if you want to add tracing can we also add the device name as
> well? Something like:
>
> TP_PROTO(struct device *dev, ...),
> TP_ARGS(dev, ...),
> TP_STRUCT__ENTRY(
> __string(dev_name, dev_name(dev))
> ...
> ).
> ...
This is a great idea, will do. Any specific reason to pass the
device in the tracepoint rather than a const char*?
>
> This will help differentiate the device it is originating from in
> a multi GPU situation.
I'll try not to get too excited at the prospect of systems using
multiple Mali GPUs because I know the likeliest case this happens
on is Arm evaluation systems with a hard IP and a soft IP loaded to
the FPGA core. :)
Kind regards,
Nicolas Frattaroli
>
> Kind regards,
> Karunika
>
> > + TP_ARGS(shader_bitmap, tiler_bitmap, l2_bitmap),
> > + TP_STRUCT__entry(
> > + __field(u64, shader_bitmap)
> > + __field(u64, tiler_bitmap)
> > + __field(u64, l2_bitmap)
> > + ),
> > + TP_fast_assign(
> > + __entry->shader_bitmap = shader_bitmap;
> > + __entry->tiler_bitmap = tiler_bitmap;
> > + __entry->l2_bitmap = l2_bitmap;
> > + ),
> > + TP_printk("shader_bitmap=0x%llx tiler_bitmap=0x%llx l2_bitmap=0x%llx",
> > + __entry->shader_bitmap, __entry->tiler_bitmap, __entry->l2_bitmap
> > + )
> > +);
> > +
> > +#endif /* __PANTHOR_TRACE_H__ */
> > +
> > +#undef TRACE_INCLUDE_PATH
> > +#define TRACE_INCLUDE_PATH .
> > +#undef TRACE_INCLUDE_FILE
> > +#define TRACE_INCLUDE_FILE panthor_trace
> > +
> > +#include <trace/define_trace.h>
> >
>
>
>
On 09/12/2025 13:01, Nicolas Frattaroli wrote:
> On Monday, 8 December 2025 18:14:53 Central European Standard Time Karunika Choo wrote:
>> On 03/12/2025 13:56, Nicolas Frattaroli wrote:
>>> [... snip ...]
>>> diff --git a/drivers/gpu/drm/panthor/panthor_trace.h b/drivers/gpu/drm/panthor/panthor_trace.h
>>> new file mode 100644
>>> index 000000000000..01013f81e68a
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/panthor/panthor_trace.h
>>> @@ -0,0 +1,38 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 or MIT */
>>> +/* Copyright 2025 Collabora ltd. */
>>> +
>>> +#undef TRACE_SYSTEM
>>> +#define TRACE_SYSTEM panthor
>>> +
>>> +#if !defined(__PANTHOR_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
>>> +#define __PANTHOR_TRACE_H__
>>> +
>>> +#include <linux/tracepoint.h>
>>> +#include <linux/types.h>
>>> +
>>> +TRACE_EVENT(gpu_power_active,
>>> + TP_PROTO(u64 shader_bitmap, u64 tiler_bitmap, u64 l2_bitmap),
>>
>> nit: if you want to add tracing can we also add the device name as
>> well? Something like:
>>
>> TP_PROTO(struct device *dev, ...),
>> TP_ARGS(dev, ...),
>> TP_STRUCT__ENTRY(
>> __string(dev_name, dev_name(dev))
>> ...
>> ).
>> ...
>
> This is a great idea, will do. Any specific reason to pass the
> device in the tracepoint rather than a const char*?
>
Nope, totaly fine to do it that way as well.
Kind regards,
Karunika
>>
>> This will help differentiate the device it is originating from in
>> a multi GPU situation.
>
> I'll try not to get too excited at the prospect of systems using
> multiple Mali GPUs because I know the likeliest case this happens
> on is Arm evaluation systems with a hard IP and a soft IP loaded to
> the FPGA core. :)
>
> Kind regards,
> Nicolas Frattaroli
>
>>
>> Kind regards,
>> Karunika
>>
>>> + TP_ARGS(shader_bitmap, tiler_bitmap, l2_bitmap),
>>> + TP_STRUCT__entry(
>>> + __field(u64, shader_bitmap)
>>> + __field(u64, tiler_bitmap)
>>> + __field(u64, l2_bitmap)
>>> + ),
>>> + TP_fast_assign(
>>> + __entry->shader_bitmap = shader_bitmap;
>>> + __entry->tiler_bitmap = tiler_bitmap;
>>> + __entry->l2_bitmap = l2_bitmap;
>>> + ),
>>> + TP_printk("shader_bitmap=0x%llx tiler_bitmap=0x%llx l2_bitmap=0x%llx",
>>> + __entry->shader_bitmap, __entry->tiler_bitmap, __entry->l2_bitmap
>>> + )
>>> +);
>>> +
>>> +#endif /* __PANTHOR_TRACE_H__ */
>>> +
>>> +#undef TRACE_INCLUDE_PATH
>>> +#define TRACE_INCLUDE_PATH .
>>> +#undef TRACE_INCLUDE_FILE
>>> +#define TRACE_INCLUDE_FILE panthor_trace
>>> +
>>> +#include <trace/define_trace.h>
>>>
>>
>>
>>
>
>
>
>
On Tue, Dec 09, 2025 at 04:22:15PM +0000, Karunika Choo wrote: > On 09/12/2025 13:01, Nicolas Frattaroli wrote: > > On Monday, 8 December 2025 18:14:53 Central European Standard Time Karunika Choo wrote: > >> On 03/12/2025 13:56, Nicolas Frattaroli wrote: > >>> [... snip ...] > >>> diff --git a/drivers/gpu/drm/panthor/panthor_trace.h b/drivers/gpu/drm/panthor/panthor_trace.h > >>> new file mode 100644 > >>> index 000000000000..01013f81e68a > >>> --- /dev/null > >>> +++ b/drivers/gpu/drm/panthor/panthor_trace.h > >>> @@ -0,0 +1,38 @@ > >>> +/* SPDX-License-Identifier: GPL-2.0 or MIT */ > >>> +/* Copyright 2025 Collabora ltd. */ > >>> + > >>> +#undef TRACE_SYSTEM > >>> +#define TRACE_SYSTEM panthor > >>> + > >>> +#if !defined(__PANTHOR_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) > >>> +#define __PANTHOR_TRACE_H__ > >>> + > >>> +#include <linux/tracepoint.h> > >>> +#include <linux/types.h> > >>> + > >>> +TRACE_EVENT(gpu_power_active, > >>> + TP_PROTO(u64 shader_bitmap, u64 tiler_bitmap, u64 l2_bitmap), > >> > >> nit: if you want to add tracing can we also add the device name as > >> well? Something like: > >> > >> TP_PROTO(struct device *dev, ...), > >> TP_ARGS(dev, ...), > >> TP_STRUCT__ENTRY( > >> __string(dev_name, dev_name(dev)) > >> ... > >> ). > >> ... > > > > This is a great idea, will do. Any specific reason to pass the > > device in the tracepoint rather than a const char*? > > > > Nope, totaly fine to do it that way as well. If you store any pointer into the trace buffer, then by the time it will be dereferenced (TP_printk below), the object behind it may be already freed. __string/__assign_str prevents this use-after-free by embedding a full copy of the string in the trace buffer. Cheers, Marcin
On Wed, Dec 3, 2025 at 6:04 AM Nicolas Frattaroli
<nicolas.frattaroli@collabora.com> wrote:
>
> Mali GPUs have three registers that indicate which parts of the hardware
> are powered and active at any moment. These take the form of bitmaps. In
> the case of SHADER_PWRACTIVE for example, a high bit indicates that the
> shader core corresponding to that bit index is active. These bitmaps
> aren't solely contiguous bits, as it's common to have holes in the
> sequence of shader core indices, and the actual set of which cores are
> present is defined by the "shader present" register.
>
> When the GPU finishes a power state transition, it fires a
> GPU_IRQ_POWER_CHANGED_ALL interrupt. After such an interrupt is
> received, the PWRACTIVE registers will likely contain interesting new
> information.
I am seeing
irq/342-panthor-412 [000] ..... 934.526754: gpu_power_active:
shader_bitmap=0x0 tiler_bitmap=0x0 l2_bitmap=0x0
irq/342-panthor-412 [000] ..... 936.640356: gpu_power_active:
shader_bitmap=0x0 tiler_bitmap=0x0 l2_bitmap=0x0
on a gpu-bound test. It does not look like collecting samples on
GPU_IRQ_POWER_CHANGED_ALL gives too much info.
I think they are more useful to be collected periodically, such that
we know that in the past X seconds, Y out of a total of Z samples
indicates activities. That's best done in userspace, and panthor's
role should be to provide an uapi such as
https://lore.kernel.org/all/cover.1743517880.git.lukas.zapolskas@arm.com/.
>
> This is not to be confused with the PWR_IRQ_POWER_CHANGED_ALL interrupt,
> which is something related to Mali v14+'s power control logic. The
> PWRACTIVE registers and corresponding interrupts are already available
> in v9 and onwards.
>
> Expose this as a tracepoint to userspace. This allows users to debug
> various scenarios and gather interesting information, such as: knowing
> how much hardware is lit up at any given time, correlating graphics
> corruption with a specific active shader core, measuring when hardware
> is allowed to go to an inactive state again, and so on.
>
> Signed-off-by: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
> ---
> drivers/gpu/drm/panthor/panthor_device.c | 1 +
> drivers/gpu/drm/panthor/panthor_gpu.c | 9 ++++++++
> drivers/gpu/drm/panthor/panthor_trace.h | 38 ++++++++++++++++++++++++++++++++
> 3 files changed, 48 insertions(+)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
> index e133b1e0ad6d..a3cb934104b8 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.c
> +++ b/drivers/gpu/drm/panthor/panthor_device.c
> @@ -548,6 +548,7 @@ int panthor_device_resume(struct device *dev)
> DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
> atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
> mutex_unlock(&ptdev->pm.mmio_lock);
> +
> return 0;
>
> err_suspend_devfreq:
> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> index 9cb5dee93212..8830aa9a5c4b 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> @@ -22,6 +22,9 @@
> #include "panthor_hw.h"
> #include "panthor_regs.h"
>
> +#define CREATE_TRACE_POINTS
> +#include "panthor_trace.h"
> +
> /**
> * struct panthor_gpu - GPU block management data.
> */
> @@ -46,6 +49,7 @@ struct panthor_gpu {
> (GPU_IRQ_FAULT | \
> GPU_IRQ_PROTM_FAULT | \
> GPU_IRQ_RESET_COMPLETED | \
> + GPU_IRQ_POWER_CHANGED_ALL | \
> GPU_IRQ_CLEAN_CACHES_COMPLETED)
>
> static void panthor_gpu_coherency_set(struct panthor_device *ptdev)
> @@ -97,6 +101,11 @@ static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
> wake_up_all(&ptdev->gpu->reqs_acked);
> }
> spin_unlock(&ptdev->gpu->reqs_lock);
> +
> + if (status & GPU_IRQ_POWER_CHANGED_ALL)
> + trace_gpu_power_active(gpu_read64(ptdev, SHADER_PWRACTIVE),
> + gpu_read64(ptdev, TILER_PWRACTIVE),
> + gpu_read64(ptdev, L2_PWRACTIVE));
> }
> PANTHOR_IRQ_HANDLER(gpu, GPU, panthor_gpu_irq_handler);
>
> diff --git a/drivers/gpu/drm/panthor/panthor_trace.h b/drivers/gpu/drm/panthor/panthor_trace.h
> new file mode 100644
> index 000000000000..01013f81e68a
> --- /dev/null
> +++ b/drivers/gpu/drm/panthor/panthor_trace.h
> @@ -0,0 +1,38 @@
> +/* SPDX-License-Identifier: GPL-2.0 or MIT */
> +/* Copyright 2025 Collabora ltd. */
> +
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM panthor
> +
> +#if !defined(__PANTHOR_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
> +#define __PANTHOR_TRACE_H__
> +
> +#include <linux/tracepoint.h>
> +#include <linux/types.h>
> +
> +TRACE_EVENT(gpu_power_active,
> + TP_PROTO(u64 shader_bitmap, u64 tiler_bitmap, u64 l2_bitmap),
> + TP_ARGS(shader_bitmap, tiler_bitmap, l2_bitmap),
> + TP_STRUCT__entry(
> + __field(u64, shader_bitmap)
> + __field(u64, tiler_bitmap)
> + __field(u64, l2_bitmap)
> + ),
> + TP_fast_assign(
> + __entry->shader_bitmap = shader_bitmap;
> + __entry->tiler_bitmap = tiler_bitmap;
> + __entry->l2_bitmap = l2_bitmap;
> + ),
> + TP_printk("shader_bitmap=0x%llx tiler_bitmap=0x%llx l2_bitmap=0x%llx",
> + __entry->shader_bitmap, __entry->tiler_bitmap, __entry->l2_bitmap
> + )
> +);
> +
> +#endif /* __PANTHOR_TRACE_H__ */
> +
> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH .
> +#undef TRACE_INCLUDE_FILE
> +#define TRACE_INCLUDE_FILE panthor_trace
> +
> +#include <trace/define_trace.h>
>
> --
> 2.52.0
>
© 2016 - 2025 Red Hat, Inc.