We would like to access panthor_file from panthor_group on gpu errors.
Because panthour_group can outlive drm_file, add refcount to
panthor_file to ensure its lifetime.
Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
---
drivers/gpu/drm/panthor/panthor_device.h | 16 ++++++++++++++++
drivers/gpu/drm/panthor/panthor_drv.c | 15 ++++++++++++++-
drivers/gpu/drm/panthor/panthor_mmu.c | 1 +
drivers/gpu/drm/panthor/panthor_sched.c | 6 ++++++
4 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index 4fc7cf2aeed57..75ae6fd3a5128 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -256,8 +256,24 @@ struct panthor_file {
/** @stats: cycle and timestamp measures for job execution. */
struct panthor_gpu_usage stats;
+
+ /** @refcount: ref count of this file */
+ struct kref refcount;
};
+static inline struct panthor_file *panthor_file_get(struct panthor_file *pfile)
+{
+ kref_get(&pfile->refcount);
+ return pfile;
+}
+
+void panthor_file_release(struct kref *kref);
+
+static inline void panthor_file_put(struct panthor_file *pfile)
+{
+ kref_put(&pfile->refcount, panthor_file_release);
+}
+
int panthor_device_init(struct panthor_device *ptdev);
void panthor_device_unplug(struct panthor_device *ptdev);
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index 775a66c394544..aea9609684b77 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -1393,6 +1393,16 @@ static int panthor_ioctl_set_user_mmio_offset(struct drm_device *ddev,
return 0;
}
+void panthor_file_release(struct kref *kref)
+{
+ struct panthor_file *pfile =
+ container_of(kref, struct panthor_file, refcount);
+
+ WARN_ON(pfile->vms || pfile->groups);
+
+ kfree(pfile);
+}
+
static int
panthor_open(struct drm_device *ddev, struct drm_file *file)
{
@@ -1426,6 +1436,8 @@ panthor_open(struct drm_device *ddev, struct drm_file *file)
if (ret)
goto err_destroy_vm_pool;
+ kref_init(&pfile->refcount);
+
file->driver_priv = pfile;
return 0;
@@ -1442,10 +1454,11 @@ panthor_postclose(struct drm_device *ddev, struct drm_file *file)
{
struct panthor_file *pfile = file->driver_priv;
+ /* destroy vm and group handles now to avoid circular references */
panthor_group_pool_destroy(pfile);
panthor_vm_pool_destroy(pfile);
- kfree(pfile);
+ panthor_file_put(pfile);
}
static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = {
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index b39ea6acc6a96..ccbcfe11420ac 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -1604,6 +1604,7 @@ void panthor_vm_pool_destroy(struct panthor_file *pfile)
xa_destroy(&pfile->vms->xa);
kfree(pfile->vms);
+ pfile->vms = NULL;
}
/**
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index a2248f692a030..485072904cd7d 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -535,6 +535,9 @@ struct panthor_group {
/** @ptdev: Device. */
struct panthor_device *ptdev;
+ /** @pfile: File this group is created from. */
+ struct panthor_file *pfile;
+
/** @vm: VM bound to the group. */
struct panthor_vm *vm;
@@ -919,6 +922,7 @@ static void group_release_work(struct work_struct *work)
panthor_kernel_bo_destroy(group->syncobjs);
panthor_vm_put(group->vm);
+ panthor_file_put(group->pfile);
kfree(group);
}
@@ -3467,6 +3471,8 @@ int panthor_group_create(struct panthor_file *pfile,
INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
INIT_WORK(&group->release_work, group_release_work);
+ group->pfile = panthor_file_get(pfile);
+
group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id);
if (!group->vm) {
ret = -EINVAL;
--
2.50.0.714.g196bf9f422-goog
On Fri, 20 Jun 2025 16:50:51 -0700 Chia-I Wu <olvaffe@gmail.com> wrote: > We would like to access panthor_file from panthor_group on gpu errors. > Because panthour_group can outlive drm_file, add refcount to > panthor_file to ensure its lifetime. I'm not a huge fan of refcounting panthor_file because people tend to put resource they expect to be released when the last handle goes away, and if we don't refcount these sub-resources they might live longer than they are meant to. Also not a huge fan of the circular referencing that exists between file and groups after this change. How about we move the process info to a sub-object that's refcounted and let both panthor_file and panthor_group take a ref on this object instead? > > Signed-off-by: Chia-I Wu <olvaffe@gmail.com> > --- > drivers/gpu/drm/panthor/panthor_device.h | 16 ++++++++++++++++ > drivers/gpu/drm/panthor/panthor_drv.c | 15 ++++++++++++++- > drivers/gpu/drm/panthor/panthor_mmu.c | 1 + > drivers/gpu/drm/panthor/panthor_sched.c | 6 ++++++ > 4 files changed, 37 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h > index 4fc7cf2aeed57..75ae6fd3a5128 100644 > --- a/drivers/gpu/drm/panthor/panthor_device.h > +++ b/drivers/gpu/drm/panthor/panthor_device.h > @@ -256,8 +256,24 @@ struct panthor_file { > > /** @stats: cycle and timestamp measures for job execution. */ > struct panthor_gpu_usage stats; > + > + /** @refcount: ref count of this file */ > + struct kref refcount; > }; > > +static inline struct panthor_file *panthor_file_get(struct panthor_file *pfile) > +{ > + kref_get(&pfile->refcount); > + return pfile; > +} > + > +void panthor_file_release(struct kref *kref); > + > +static inline void panthor_file_put(struct panthor_file *pfile) > +{ > + kref_put(&pfile->refcount, panthor_file_release); > +} > + > int panthor_device_init(struct panthor_device *ptdev); > void panthor_device_unplug(struct panthor_device *ptdev); > > diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c > index 775a66c394544..aea9609684b77 100644 > --- a/drivers/gpu/drm/panthor/panthor_drv.c > +++ b/drivers/gpu/drm/panthor/panthor_drv.c > @@ -1393,6 +1393,16 @@ static int panthor_ioctl_set_user_mmio_offset(struct drm_device *ddev, > return 0; > } > > +void panthor_file_release(struct kref *kref) > +{ > + struct panthor_file *pfile = > + container_of(kref, struct panthor_file, refcount); > + > + WARN_ON(pfile->vms || pfile->groups); > + > + kfree(pfile); > +} > + > static int > panthor_open(struct drm_device *ddev, struct drm_file *file) > { > @@ -1426,6 +1436,8 @@ panthor_open(struct drm_device *ddev, struct drm_file *file) > if (ret) > goto err_destroy_vm_pool; > > + kref_init(&pfile->refcount); > + > file->driver_priv = pfile; > return 0; > > @@ -1442,10 +1454,11 @@ panthor_postclose(struct drm_device *ddev, struct drm_file *file) > { > struct panthor_file *pfile = file->driver_priv; > > + /* destroy vm and group handles now to avoid circular references */ > panthor_group_pool_destroy(pfile); > panthor_vm_pool_destroy(pfile); > > - kfree(pfile); > + panthor_file_put(pfile); > } > > static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c > index b39ea6acc6a96..ccbcfe11420ac 100644 > --- a/drivers/gpu/drm/panthor/panthor_mmu.c > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c > @@ -1604,6 +1604,7 @@ void panthor_vm_pool_destroy(struct panthor_file *pfile) > > xa_destroy(&pfile->vms->xa); > kfree(pfile->vms); > + pfile->vms = NULL; > } > > /** > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > index a2248f692a030..485072904cd7d 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.c > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > @@ -535,6 +535,9 @@ struct panthor_group { > /** @ptdev: Device. */ > struct panthor_device *ptdev; > > + /** @pfile: File this group is created from. */ > + struct panthor_file *pfile; > + > /** @vm: VM bound to the group. */ > struct panthor_vm *vm; > > @@ -919,6 +922,7 @@ static void group_release_work(struct work_struct *work) > panthor_kernel_bo_destroy(group->syncobjs); > > panthor_vm_put(group->vm); > + panthor_file_put(group->pfile); > kfree(group); > } > > @@ -3467,6 +3471,8 @@ int panthor_group_create(struct panthor_file *pfile, > INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work); > INIT_WORK(&group->release_work, group_release_work); > > + group->pfile = panthor_file_get(pfile); > + > group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id); > if (!group->vm) { > ret = -EINVAL;
On Mon, Jun 23, 2025 at 08:21:22AM +0200, Boris Brezillon wrote: > On Fri, 20 Jun 2025 16:50:51 -0700 > Chia-I Wu <olvaffe@gmail.com> wrote: > > > We would like to access panthor_file from panthor_group on gpu errors. > > Because panthour_group can outlive drm_file, add refcount to > > panthor_file to ensure its lifetime. > > I'm not a huge fan of refcounting panthor_file because people tend to > put resource they expect to be released when the last handle goes away, > and if we don't refcount these sub-resources they might live longer > than they are meant to. Also not a huge fan of the circular referencing > that exists between file and groups after this change. > > How about we move the process info to a sub-object that's refcounted > and let both panthor_file and panthor_group take a ref on this object > instead? I agree with Boris on this. One alternative is to put the pid and comm in the panthor_group struct as panthor_file makes no use of the fields. Best regards, Liviu > > > > > Signed-off-by: Chia-I Wu <olvaffe@gmail.com> > > --- > > drivers/gpu/drm/panthor/panthor_device.h | 16 ++++++++++++++++ > > drivers/gpu/drm/panthor/panthor_drv.c | 15 ++++++++++++++- > > drivers/gpu/drm/panthor/panthor_mmu.c | 1 + > > drivers/gpu/drm/panthor/panthor_sched.c | 6 ++++++ > > 4 files changed, 37 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h > > index 4fc7cf2aeed57..75ae6fd3a5128 100644 > > --- a/drivers/gpu/drm/panthor/panthor_device.h > > +++ b/drivers/gpu/drm/panthor/panthor_device.h > > @@ -256,8 +256,24 @@ struct panthor_file { > > > > /** @stats: cycle and timestamp measures for job execution. */ > > struct panthor_gpu_usage stats; > > + > > + /** @refcount: ref count of this file */ > > + struct kref refcount; > > }; > > > > +static inline struct panthor_file *panthor_file_get(struct panthor_file *pfile) > > +{ > > + kref_get(&pfile->refcount); > > + return pfile; > > +} > > + > > +void panthor_file_release(struct kref *kref); > > + > > +static inline void panthor_file_put(struct panthor_file *pfile) > > +{ > > + kref_put(&pfile->refcount, panthor_file_release); > > +} > > + > > int panthor_device_init(struct panthor_device *ptdev); > > void panthor_device_unplug(struct panthor_device *ptdev); > > > > diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c > > index 775a66c394544..aea9609684b77 100644 > > --- a/drivers/gpu/drm/panthor/panthor_drv.c > > +++ b/drivers/gpu/drm/panthor/panthor_drv.c > > @@ -1393,6 +1393,16 @@ static int panthor_ioctl_set_user_mmio_offset(struct drm_device *ddev, > > return 0; > > } > > > > +void panthor_file_release(struct kref *kref) > > +{ > > + struct panthor_file *pfile = > > + container_of(kref, struct panthor_file, refcount); > > + > > + WARN_ON(pfile->vms || pfile->groups); > > + > > + kfree(pfile); > > +} > > + > > static int > > panthor_open(struct drm_device *ddev, struct drm_file *file) > > { > > @@ -1426,6 +1436,8 @@ panthor_open(struct drm_device *ddev, struct drm_file *file) > > if (ret) > > goto err_destroy_vm_pool; > > > > + kref_init(&pfile->refcount); > > + > > file->driver_priv = pfile; > > return 0; > > > > @@ -1442,10 +1454,11 @@ panthor_postclose(struct drm_device *ddev, struct drm_file *file) > > { > > struct panthor_file *pfile = file->driver_priv; > > > > + /* destroy vm and group handles now to avoid circular references */ > > panthor_group_pool_destroy(pfile); > > panthor_vm_pool_destroy(pfile); > > > > - kfree(pfile); > > + panthor_file_put(pfile); > > } > > > > static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { > > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c > > index b39ea6acc6a96..ccbcfe11420ac 100644 > > --- a/drivers/gpu/drm/panthor/panthor_mmu.c > > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c > > @@ -1604,6 +1604,7 @@ void panthor_vm_pool_destroy(struct panthor_file *pfile) > > > > xa_destroy(&pfile->vms->xa); > > kfree(pfile->vms); > > + pfile->vms = NULL; > > } > > > > /** > > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > > index a2248f692a030..485072904cd7d 100644 > > --- a/drivers/gpu/drm/panthor/panthor_sched.c > > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > > @@ -535,6 +535,9 @@ struct panthor_group { > > /** @ptdev: Device. */ > > struct panthor_device *ptdev; > > > > + /** @pfile: File this group is created from. */ > > + struct panthor_file *pfile; > > + > > /** @vm: VM bound to the group. */ > > struct panthor_vm *vm; > > > > @@ -919,6 +922,7 @@ static void group_release_work(struct work_struct *work) > > panthor_kernel_bo_destroy(group->syncobjs); > > > > panthor_vm_put(group->vm); > > + panthor_file_put(group->pfile); > > kfree(group); > > } > > > > @@ -3467,6 +3471,8 @@ int panthor_group_create(struct panthor_file *pfile, > > INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work); > > INIT_WORK(&group->release_work, group_release_work); > > > > + group->pfile = panthor_file_get(pfile); > > + > > group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id); > > if (!group->vm) { > > ret = -EINVAL; > -- ==================== | I would like to | | fix the world, | | but they're not | | giving me the | \ source code! / --------------- ¯\_(ツ)_/¯
Hi, On Mon, Jun 23, 2025 at 2:07 AM Liviu Dudau <liviu.dudau@arm.com> wrote: > > On Mon, Jun 23, 2025 at 08:21:22AM +0200, Boris Brezillon wrote: > > On Fri, 20 Jun 2025 16:50:51 -0700 > > Chia-I Wu <olvaffe@gmail.com> wrote: > > > > > We would like to access panthor_file from panthor_group on gpu errors. > > > Because panthour_group can outlive drm_file, add refcount to > > > panthor_file to ensure its lifetime. > > > > I'm not a huge fan of refcounting panthor_file because people tend to > > put resource they expect to be released when the last handle goes away, > > and if we don't refcount these sub-resources they might live longer > > than they are meant to. Also not a huge fan of the circular referencing > > that exists between file and groups after this change. > > > > How about we move the process info to a sub-object that's refcounted > > and let both panthor_file and panthor_group take a ref on this object > > instead? > > I agree with Boris on this. One alternative is to put the pid and comm in > the panthor_group struct as panthor_file makes no use of the fields. I took this suggestion in v2 because, when the task that opened the node differs from the task that created the group, we are more interested in the latter.
© 2016 - 2025 Red Hat, Inc.