drivers/gpu/drm/panthor/panthor_drv.c | 23 ++++++++++++++++++++++- include/uapi/drm/panthor_drm.h | 16 ++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-)
Expose system timestamp and frequency supported by the GPU with a new
device query.
Mali uses the generic arch timer as GPU system time so we currently
wire cntvct_el0 and cntfrq_el0 respectively to a new device query.
We could have directly read those values from userland but handling
this here allows us to be future proof in case of architectural changes
or erratas related to timers for example.
This new uAPI will be used in Mesa to implement timestamp queries and
VK_KHR_calibrated_timestamps.
Since this extends the uAPI and because userland needs a way to advertise
those features conditionally, this also bumps the driver minor version.
Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
---
drivers/gpu/drm/panthor/panthor_drv.c | 23 ++++++++++++++++++++++-
include/uapi/drm/panthor_drm.h | 16 ++++++++++++++++
2 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index b8a84f26b3ef..4d62ff3fbe03 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -164,6 +164,7 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride,
_Generic(_obj_name, \
PANTHOR_UOBJ_DECL(struct drm_panthor_gpu_info, tiler_present), \
PANTHOR_UOBJ_DECL(struct drm_panthor_csif_info, pad), \
+ PANTHOR_UOBJ_DECL(struct drm_panthor_timestamp_info, current_timestamp), \
PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \
PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \
PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \
@@ -750,10 +751,21 @@ static void panthor_submit_ctx_cleanup(struct panthor_submit_ctx *ctx,
kvfree(ctx->jobs);
}
+static void panthor_ioctl_query_timestamp(struct drm_panthor_timestamp_info *arg)
+{
+#ifdef CONFIG_ARM_ARCH_TIMER
+ arg->timestamp_frequency = arch_timer_get_cntfrq();
+ arg->current_timestamp = __arch_counter_get_cntvct();
+#else
+ memset(arg, 0, sizeof(*arg));
+#endif
+}
+
static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct drm_file *file)
{
struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
struct drm_panthor_dev_query *args = data;
+ struct drm_panthor_timestamp_info timestamp_info;
if (!args->pointer) {
switch (args->type) {
@@ -765,6 +777,10 @@ static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct d
args->size = sizeof(ptdev->csif_info);
return 0;
+ case DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO:
+ args->size = sizeof(timestamp_info);
+ return 0;
+
default:
return -EINVAL;
}
@@ -777,6 +793,10 @@ static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct d
case DRM_PANTHOR_DEV_QUERY_CSIF_INFO:
return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->csif_info);
+ case DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO:
+ panthor_ioctl_query_timestamp(×tamp_info);
+ return PANTHOR_UOBJ_SET(args->pointer, args->size, timestamp_info);
+
default:
return -EINVAL;
}
@@ -1372,6 +1392,7 @@ static void panthor_debugfs_init(struct drm_minor *minor)
/*
* PanCSF driver version:
* - 1.0 - initial interface
+ * - 1.1 - adds DEV_QUERY_TIMESTAMP_INFO query
*/
static const struct drm_driver panthor_drm_driver = {
.driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ |
@@ -1385,7 +1406,7 @@ static const struct drm_driver panthor_drm_driver = {
.desc = "Panthor DRM driver",
.date = "20230801",
.major = 1,
- .minor = 0,
+ .minor = 1,
.gem_create_object = panthor_gem_create_object,
.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index aaed8e12ad0b..8d39a2f91769 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -260,6 +260,9 @@ enum drm_panthor_dev_query_type {
/** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface information. */
DRM_PANTHOR_DEV_QUERY_CSIF_INFO,
+
+ /** @DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: Query timestamp information. */
+ DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO,
};
/**
@@ -377,6 +380,19 @@ struct drm_panthor_csif_info {
__u32 pad;
};
+/**
+ * struct drm_panthor_timestamp_info - Timestamp information
+ *
+ * Structure grouping all queryable information relating to the GPU timestamp.
+ */
+struct drm_panthor_timestamp_info {
+ /** @timestamp_frequency: The frequency of the timestamp timer. */
+ __u64 timestamp_frequency;
+
+ /** @current_timestamp: The current timestamp. */
+ __u64 current_timestamp;
+};
+
/**
* struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY
*/
base-commit: f7f3ddb6e5c8dc7b621fd8c0903ea42190d67452
--
2.45.2
Hi Mary,
Thanks for your patch.
On 07/08/2024 16:35, Mary Guillemard wrote:
> Expose system timestamp and frequency supported by the GPU with a
> new device query.
>
> Mali uses the generic arch timer as GPU system time so we currently
> wire cntvct_el0 and cntfrq_el0 respectively to a new device query. We
> could have directly read those values from userland but handling this
> here allows us to be future proof in case of architectural changes or
> erratas related to timers for example.
>
> This new uAPI will be used in Mesa to implement timestamp queries
> and VK_KHR_calibrated_timestamps.
>
> Since this extends the uAPI and because userland needs a way to
> advertise those features conditionally, this also bumps the driver
> minor version.> Signed-off-by: Mary Guillemard
> <mary.guillemard@collabora.com> ---
> drivers/gpu/drm/panthor/panthor_drv.c | 23 ++++++++++++++++++++++-
> include/uapi/drm/panthor_drm.h | 16 ++++++++++++++++ 2 files
> changed, 38 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_drv.c
> b/drivers/gpu/drm/panthor/panthor_drv.c index
> b8a84f26b3ef..4d62ff3fbe03 100644 ---
> a/drivers/gpu/drm/panthor/panthor_drv.c +++
> b/drivers/gpu/drm/panthor/panthor_drv.c @@ -164,6 +164,7 @@
> panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32
> min_stride, _Generic(_obj_name, \ PANTHOR_UOBJ_DECL(struct
> drm_panthor_gpu_info, tiler_present), \ PANTHOR_UOBJ_DECL(struct
> drm_panthor_csif_info, pad), \ + PANTHOR_UOBJ_DECL(struct
> drm_panthor_timestamp_info, current_timestamp), \
> PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \
> PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \
> PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \
> @@ -750,10 +751,21 @@ static void panthor_submit_ctx_cleanup(struct
> panthor_submit_ctx *ctx, kvfree(ctx->jobs); }
>
> +static void panthor_ioctl_query_timestamp(struct
> drm_panthor_timestamp_info *arg) +{ +#ifdef CONFIG_ARM_ARCH_TIMER +
> arg->timestamp_frequency = arch_timer_get_cntfrq(); +
> arg->current_timestamp = __arch_counter_get_cntvct();
The GPU HW doesn't use the arch counter directly, there's a dedicated
timestamp register that can, e.g.:
- get dumped in perfcnt dumps
- get sampled in shader programs
- get dumped in perf instrumentation trace buffers by FW
This GPU timestamp sometimes/usually needs to have an offset applied to
it by SW via a dedicated register, for example, to:
- account for errata like a slight difference in the clock frequencies
between the system and the GPU counters leading to drift, or
- account for errata like an existing fixed offset between system and
GPU timestamps, or
- provide a stable relationship to a known system clock source like
CLOCK_MONOTONIC_RAW, to assist in correlating events in perfcnt dumps &
traces to other system events
> +#else + memset(arg, 0, sizeof(*arg));
[aside] The GPU timestamp register semantics are also defined for a
non-ARM-CPU based system. How you get the frequency on such systems,
though, is not something I can help with :). [/aside]
> +#endif +} + static int panthor_ioctl_dev_query(struct drm_device
> *ddev, void *data, struct drm_file *file) { struct panthor_device
> *ptdev = container_of(ddev, struct panthor_device, base); struct
> drm_panthor_dev_query *args = data; + struct
> drm_panthor_timestamp_info timestamp_info;
>
> if (!args->pointer) { switch (args->type) { @@ -765,6 +777,10 @@
> static int panthor_ioctl_dev_query(struct drm_device *ddev, void
> *data, struct d args->size = sizeof(ptdev->csif_info); return 0;
>
> + case DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: + args->size =
> sizeof(timestamp_info); + return 0; + default: return -EINVAL; } @@
> -777,6 +793,10 @@ static int panthor_ioctl_dev_query(struct
> drm_device *ddev, void *data, struct d case
> DRM_PANTHOR_DEV_QUERY_CSIF_INFO: return
> PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->csif_info);
>
> + case DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: +
> panthor_ioctl_query_timestamp(×tamp_info); + return
> PANTHOR_UOBJ_SET(args->pointer, args->size, timestamp_info); +
> default: return -EINVAL; } @@ -1372,6 +1392,7 @@ static void
> panthor_debugfs_init(struct drm_minor *minor) /* * PanCSF driver
> version: * - 1.0 - initial interface + * - 1.1 - adds
> DEV_QUERY_TIMESTAMP_INFO query */ static const struct drm_driver
> panthor_drm_driver = { .driver_features = DRIVER_RENDER | DRIVER_GEM
> | DRIVER_SYNCOBJ | @@ -1385,7 +1406,7 @@ static const struct
> drm_driver panthor_drm_driver = { .desc = "Panthor DRM driver", .date
> = "20230801", .major = 1, - .minor = 0, + .minor = 1,
>
> .gem_create_object = panthor_gem_create_object,
> .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
> diff --git a/include/uapi/drm/panthor_drm.h
> b/include/uapi/drm/panthor_drm.h index aaed8e12ad0b..8d39a2f91769
> 100644 --- a/include/uapi/drm/panthor_drm.h +++
> b/include/uapi/drm/panthor_drm.h @@ -260,6 +260,9 @@ enum
> drm_panthor_dev_query_type {
>
> /** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface
> information. */ DRM_PANTHOR_DEV_QUERY_CSIF_INFO, + + /**
> @DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: Query timestamp information.
> */ + DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO, };
>
> /** @@ -377,6 +380,19 @@ struct drm_panthor_csif_info { __u32 pad;
> };
>
> +/** + * struct drm_panthor_timestamp_info - Timestamp information +
> * + * Structure grouping all queryable information relating to the
> GPU timestamp. + */ +struct drm_panthor_timestamp_info { + /**
> @timestamp_frequency: The frequency of the timestamp timer. */ +
> __u64 timestamp_frequency; + + /** @current_timestamp: The current
> timestamp. */ + __u64 current_timestamp;
As it stands, this query has nothing to do with the actual GPU so
doesn't really belong here.
It'd be more valuable, and can maybe give better calibration results
than querying the system timestamp separately in userspace, if you
reported all of:
* the system timer value
* the system timer frequency
* the GPU timer value
* the GPU timer frequency (because it _could_ be different in some
systems)
* the GPU timer offset
> +}; + /** * struct drm_panthor_dev_query - Arguments passed to
> DRM_PANTHOR_IOCTL_DEV_QUERY */
>
> base-commit: f7f3ddb6e5c8dc7b621fd8c0903ea42190d67452
--
Mihail Atanassov <mihail.atanassov@arm.com>
Hi Mihail,
On Thu, 8 Aug 2024 12:41:05 +0300
Mihail Atanassov <mihail.atanassov@arm.com> wrote:
> >
> > +/** + * struct drm_panthor_timestamp_info - Timestamp information +
> > * + * Structure grouping all queryable information relating to the
> > GPU timestamp. + */ +struct drm_panthor_timestamp_info { + /**
> > @timestamp_frequency: The frequency of the timestamp timer. */ +
> > __u64 timestamp_frequency; + + /** @current_timestamp: The current
> > timestamp. */ + __u64 current_timestamp;
>
> As it stands, this query has nothing to do with the actual GPU so
> doesn't really belong here.
>
> It'd be more valuable, and can maybe give better calibration results
> than querying the system timestamp separately in userspace, if you
> reported all of:
> * the system timer value
> * the system timer frequency
> * the GPU timer value
> * the GPU timer frequency (because it _could_ be different in some
> systems)
Duh, I wish this wasn't the case and all SoC vendors went for the
arch-timer which guarantees the consistency of the timestamp on the GPU
and CPU. But let's say this is a case we need to support, wouldn't it
be more useful to do the CPU/GPU calibration kernel side (basically at
init/resume time) and then expose the formula describing the
relationship between those 2 things:
CPU_time = GPU_time * GPU_to_CPU_mul / GPU_to_CPU_div +
GPU_to_CPU_offset;
> * the GPU timer offset
Assuming you're talking about the TIMESTAMP_OFFSET register, my
understanding is that this offset should be set by the kernel driver to
account for any disjoint caused by suspend/resume cycles, or any
design-specific offset between the arch-timer and the timer feeding the
GPU timestamp block (hopefully the arch-timer is directly propagated to
the GPU though). The timestamp read by the GPU/CPU already has this
offset added, so I'm not sure I understand what's the value of exposing
it to userspace. As long as the CPU/GPU timestamps are consistent,
userspace probably doesn't care, but I might be missing something.
>
> > +}; + /** * struct drm_panthor_dev_query - Arguments passed to
> > DRM_PANTHOR_IOCTL_DEV_QUERY */
> >
> > base-commit: f7f3ddb6e5c8dc7b621fd8c0903ea42190d67452
>
Hi Boris,
On 28/08/2024 13:09, Boris Brezillon wrote:
> Hi Mihail,
>
> On Thu, 8 Aug 2024 12:41:05 +0300
> Mihail Atanassov <mihail.atanassov@arm.com> wrote:
>
>>>
>>> +/** + * struct drm_panthor_timestamp_info - Timestamp information +
>>> * + * Structure grouping all queryable information relating to the
>>> GPU timestamp. + */ +struct drm_panthor_timestamp_info { + /**
>>> @timestamp_frequency: The frequency of the timestamp timer. */ +
>>> __u64 timestamp_frequency; + + /** @current_timestamp: The current
>>> timestamp. */ + __u64 current_timestamp;
>>
>> As it stands, this query has nothing to do with the actual GPU so
>> doesn't really belong here.
>>
>> It'd be more valuable, and can maybe give better calibration results
>> than querying the system timestamp separately in userspace, if you
>> reported all of:
>> * the system timer value
>> * the system timer frequency
>> * the GPU timer value
>> * the GPU timer frequency (because it _could_ be different in some
>> systems)
>
> Duh, I wish this wasn't the case and all SoC vendors went for the
> arch-timer which guarantees the consistency of the timestamp on the GPU
> and CPU. But let's say this is a case we need to support, wouldn't it
> be more useful to do the CPU/GPU calibration kernel side (basically at
> init/resume time) and then expose the formula describing the
> relationship between those 2 things:
>
> CPU_time = GPU_time * GPU_to_CPU_mul / GPU_to_CPU_div +
> GPU_to_CPU_offset;
>
TIMESTAMP_OFFSET should indeed be set by the kernel (on resume). But I
don't think we need to post M/D+offset to userspace. The 2 Frequencies +
the scalar offset are the raw sources, and userspace can work back from
there.
>> * the GPU timer offset
>
> Assuming you're talking about the TIMESTAMP_OFFSET register, my
> understanding is that this offset should be set by the kernel driver to
> account for any disjoint caused by suspend/resume cycles, or any
That's the primary use, yes.
> design-specific offset between the arch-timer and the timer feeding the
> GPU timestamp block (hopefully the arch-timer is directly propagated to
> the GPU though). The timestamp read by the GPU/CPU already has this
Some platforms don't quite do that, so the two counts can drift away
somewhat (both scalar and multiplicative differences). We've observed
that re-calibrating the offset on resume has been sufficient to retain
accuracy w.r.t. wall clock time, though.
> offset added, so I'm not sure I understand what's the value of exposing
> it to userspace. As long as the CPU/GPU timestamps are consistent,
> userspace probably doesn't care, but I might be missing something.
Functionally, there's no need for it. The timestamp offset could be
negative, however, so userspace could see a jump back on the GPU
timestamp (unlikely as it may be in practice beyond the first GPU
start). In any case, userspace seeing a modified offset value could be a
cue to re-calibrate its own view of the world. And what I mentioned in
the adjacent thread -- if you want to test the quality of the GPU
timestamp values from userspace, not knowing the offset applied makes it
nigh impossible to do so.
>
>>
>>> +}; + /** * struct drm_panthor_dev_query - Arguments passed to
>>> DRM_PANTHOR_IOCTL_DEV_QUERY */
>>>
>>> base-commit: f7f3ddb6e5c8dc7b621fd8c0903ea42190d67452
>>
>
--
Mihail Atanassov <mihail.atanassov@arm.com>
On Wed, 28 Aug 2024 14:22:51 +0100
Mihail Atanassov <mihail.atanassov@arm.com> wrote:
> Hi Boris,
>
> On 28/08/2024 13:09, Boris Brezillon wrote:
> > Hi Mihail,
> >
> > On Thu, 8 Aug 2024 12:41:05 +0300
> > Mihail Atanassov <mihail.atanassov@arm.com> wrote:
> >
> >>>
> >>> +/** + * struct drm_panthor_timestamp_info - Timestamp information +
> >>> * + * Structure grouping all queryable information relating to the
> >>> GPU timestamp. + */ +struct drm_panthor_timestamp_info { + /**
> >>> @timestamp_frequency: The frequency of the timestamp timer. */ +
> >>> __u64 timestamp_frequency; + + /** @current_timestamp: The current
> >>> timestamp. */ + __u64 current_timestamp;
> >>
> >> As it stands, this query has nothing to do with the actual GPU so
> >> doesn't really belong here.
> >>
> >> It'd be more valuable, and can maybe give better calibration results
> >> than querying the system timestamp separately in userspace, if you
> >> reported all of:
> >> * the system timer value
> >> * the system timer frequency
> >> * the GPU timer value
> >> * the GPU timer frequency (because it _could_ be different in some
> >> systems)
> >
> > Duh, I wish this wasn't the case and all SoC vendors went for the
> > arch-timer which guarantees the consistency of the timestamp on the GPU
> > and CPU. But let's say this is a case we need to support, wouldn't it
> > be more useful to do the CPU/GPU calibration kernel side (basically at
> > init/resume time) and then expose the formula describing the
> > relationship between those 2 things:
> >
> > CPU_time = GPU_time * GPU_to_CPU_mul / GPU_to_CPU_div +
> > GPU_to_CPU_offset;
> >
>
> TIMESTAMP_OFFSET should indeed be set by the kernel (on resume). But I
> don't think we need to post M/D+offset to userspace. The 2 Frequencies +
> the scalar offset are the raw sources, and userspace can work back from
> there.
Sure. No matter how you express the relationship, my point was, if the
calibration is supposed to happen in the kernel at resume time,
returning both the CPU/GPU time in DEV_QUERY_TIMESTAMP to make sure the
sampling is close enough that they actually represent the same
timestamp might not be needed, because you can easily convert from one
domain to the other.
> As it stands, this query has nothing to do with the actual GPU so > doesn't really belong here. > > It'd be more valuable, and can maybe give better calibration results > than querying the system timestamp separately in userspace, if you > reported all of: > * the system timer value > * the system timer frequency > * the GPU timer value > * the GPU timer frequency (because it _could_ be different in some systems) > * the GPU timer offset I see, I missed those registers... I will also fix this on my v2 of my Panfrost series. From my understanding, the GPU timer frequency doesn't have a register so I suppose it would be wired to cntfrq_el0 if CONFIG_ARM_ARCH_TIMER is set, am I correct? Thanks for the review, Mary
On 08/08/2024 13:24, Mary Guillemard wrote: >> As it stands, this query has nothing to do with the actual GPU so >> doesn't really belong here. >> >> It'd be more valuable, and can maybe give better calibration results >> than querying the system timestamp separately in userspace, if you >> reported all of: >> * the system timer value >> * the system timer frequency >> * the GPU timer value >> * the GPU timer frequency (because it _could_ be different in some systems) >> * the GPU timer offset > > I see, I missed those registers... I will also fix this on my v2 of my Panfrost series. > > From my understanding, the GPU timer frequency doesn't have a register > so I suppose it would be wired to cntfrq_el0 if CONFIG_ARM_ARCH_TIMER is > set, am I correct? Barring any HW errata, yes, that's the frequency the GPU timer should tick at, too. > > Thanks for the review, > > Mary -- Mihail Atanassov <mihail.atanassov@arm.com>
Sorry about the accidental re-wrap, I got fat fingers... On 08/08/2024 10:41, Mihail Atanassov wrote: > Hi Mary, > > Thanks for your patch. > > On 07/08/2024 16:35, Mary Guillemard wrote: >> Expose system timestamp and frequency supported by the GPU with a >> new device query. >> [... snip ...] >> >> base-commit: f7f3ddb6e5c8dc7b621fd8c0903ea42190d67452 > -- Mihail Atanassov <mihail.atanassov@arm.com>
© 2016 - 2025 Red Hat, Inc.