When calling drm_gpuvm_bo_obtain_prealloc() and using immediate mode,
this may result in a call to ops->vm_bo_free(vm_bo) while holding the
GEMs gpuva mutex. This is a problem if ops->vm_bo_free(vm_bo) performs
any operations that are not safe in the fence signalling critical path,
and it turns out that Panthor (the only current user of the method)
calls drm_gem_shmem_unpin() which takes a resv lock internally.
This constitutes both a violation of signalling safety and lock
inversion. To fix this, we modify the method to internally take the GEMs
gpuva mutex so that the mutex can be unlocked before freeing the
preallocated vm_bo.
Note that this modification introduces a requirement that the driver
uses immediate mode to call drm_gpuvm_bo_obtain_prealloc() as it would
otherwise take the wrong lock.
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
---
drivers/gpu/drm/drm_gpuvm.c | 58 ++++++++++++++++++++++-------------
drivers/gpu/drm/panthor/panthor_mmu.c | 10 ------
2 files changed, 37 insertions(+), 31 deletions(-)
diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index 936e6c1a60c16ed5a6898546bf99e23a74f6b58b..f08a5cc1d611f971862c1272987e5ecd6d97c163 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -1601,14 +1601,37 @@ drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm,
}
EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create);
+static void
+drm_gpuvm_bo_destroy_not_in_lists(struct drm_gpuvm_bo *vm_bo)
+{
+ struct drm_gpuvm *gpuvm = vm_bo->vm;
+ const struct drm_gpuvm_ops *ops = gpuvm->ops;
+ struct drm_gem_object *obj = vm_bo->obj;
+
+ if (ops && ops->vm_bo_free)
+ ops->vm_bo_free(vm_bo);
+ else
+ kfree(vm_bo);
+
+ drm_gpuvm_put(gpuvm);
+ drm_gem_object_put(obj);
+}
+
+static void
+drm_gpuvm_bo_destroy_not_in_lists_kref(struct kref *kref)
+{
+ struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo,
+ kref);
+
+ drm_gpuvm_bo_destroy_not_in_lists(vm_bo);
+}
+
static void
drm_gpuvm_bo_destroy(struct kref *kref)
{
struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo,
kref);
struct drm_gpuvm *gpuvm = vm_bo->vm;
- const struct drm_gpuvm_ops *ops = gpuvm->ops;
- struct drm_gem_object *obj = vm_bo->obj;
bool lock = !drm_gpuvm_resv_protected(gpuvm);
if (!lock)
@@ -1617,16 +1640,10 @@ drm_gpuvm_bo_destroy(struct kref *kref)
drm_gpuvm_bo_list_del(vm_bo, extobj, lock);
drm_gpuvm_bo_list_del(vm_bo, evict, lock);
- drm_gem_gpuva_assert_lock_held(gpuvm, obj);
+ drm_gem_gpuva_assert_lock_held(gpuvm, vm_bo->obj);
list_del(&vm_bo->list.entry.gem);
- if (ops && ops->vm_bo_free)
- ops->vm_bo_free(vm_bo);
- else
- kfree(vm_bo);
-
- drm_gpuvm_put(gpuvm);
- drm_gem_object_put(obj);
+ drm_gpuvm_bo_destroy_not_in_lists(vm_bo);
}
/**
@@ -1744,9 +1761,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put_deferred);
void
drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm)
{
- const struct drm_gpuvm_ops *ops = gpuvm->ops;
struct drm_gpuvm_bo *vm_bo;
- struct drm_gem_object *obj;
struct llist_node *bo_defer;
bo_defer = llist_del_all(&gpuvm->bo_defer);
@@ -1765,14 +1780,7 @@ drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm)
while (bo_defer) {
vm_bo = llist_entry(bo_defer, struct drm_gpuvm_bo, list.entry.bo_defer);
bo_defer = bo_defer->next;
- obj = vm_bo->obj;
- if (ops && ops->vm_bo_free)
- ops->vm_bo_free(vm_bo);
- else
- kfree(vm_bo);
-
- drm_gpuvm_put(gpuvm);
- drm_gem_object_put(obj);
+ drm_gpuvm_bo_destroy_not_in_lists(vm_bo);
}
}
EXPORT_SYMBOL_GPL(drm_gpuvm_bo_deferred_cleanup);
@@ -1860,6 +1868,9 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain);
* count is decreased. If not found @__vm_bo is returned without further
* increase of the reference count.
*
+ * The provided @__vm_bo must not already be in the gpuva, evict, or extobj
+ * lists prior to calling this method.
+ *
* A new &drm_gpuvm_bo is added to the GEMs gpuva list.
*
* Returns: a pointer to the found &drm_gpuvm_bo or @__vm_bo if no existing
@@ -1872,14 +1883,19 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo)
struct drm_gem_object *obj = __vm_bo->obj;
struct drm_gpuvm_bo *vm_bo;
+ drm_WARN_ON(gpuvm->drm, !drm_gpuvm_immediate_mode(gpuvm));
+
+ mutex_lock(&obj->gpuva.lock);
vm_bo = drm_gpuvm_bo_find(gpuvm, obj);
if (vm_bo) {
- drm_gpuvm_bo_put(__vm_bo);
+ mutex_unlock(&obj->gpuva.lock);
+ kref_put(&__vm_bo->kref, drm_gpuvm_bo_destroy_not_in_lists_kref);
return vm_bo;
}
drm_gem_gpuva_assert_lock_held(gpuvm, obj);
list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list);
+ mutex_unlock(&obj->gpuva.lock);
return __vm_bo;
}
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index 9f5f4ddf291024121f3fd5644f2fdeba354fa67c..be8811a70e1a3adec87ca4a85cad7c838f54bebf 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -1224,17 +1224,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
goto err_cleanup;
}
- /* drm_gpuvm_bo_obtain_prealloc() will call drm_gpuvm_bo_put() on our
- * pre-allocated BO if the <BO,VM> association exists. Given we
- * only have one ref on preallocated_vm_bo, drm_gpuvm_bo_destroy() will
- * be called immediately, and we have to hold the VM resv lock when
- * calling this function.
- */
- dma_resv_lock(panthor_vm_resv(vm), NULL);
- mutex_lock(&bo->base.base.gpuva.lock);
op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo);
- mutex_unlock(&bo->base.base.gpuva.lock);
- dma_resv_unlock(panthor_vm_resv(vm));
op_ctx->map.bo_offset = offset;
--
2.52.0.487.g5c8c507ade-goog
On Fri, 28 Nov 2025 14:14:15 +0000
Alice Ryhl <aliceryhl@google.com> wrote:
> When calling drm_gpuvm_bo_obtain_prealloc() and using immediate mode,
> this may result in a call to ops->vm_bo_free(vm_bo) while holding the
> GEMs gpuva mutex. This is a problem if ops->vm_bo_free(vm_bo) performs
> any operations that are not safe in the fence signalling critical path,
> and it turns out that Panthor (the only current user of the method)
> calls drm_gem_shmem_unpin() which takes a resv lock internally.
>
> This constitutes both a violation of signalling safety and lock
> inversion. To fix this, we modify the method to internally take the GEMs
> gpuva mutex so that the mutex can be unlocked before freeing the
> preallocated vm_bo.
>
> Note that this modification introduces a requirement that the driver
> uses immediate mode to call drm_gpuvm_bo_obtain_prealloc() as it would
> otherwise take the wrong lock.
>
> Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Should we add a Fixes tag?
> ---
> drivers/gpu/drm/drm_gpuvm.c | 58 ++++++++++++++++++++++-------------
> drivers/gpu/drm/panthor/panthor_mmu.c | 10 ------
> 2 files changed, 37 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
> index 936e6c1a60c16ed5a6898546bf99e23a74f6b58b..f08a5cc1d611f971862c1272987e5ecd6d97c163 100644
> --- a/drivers/gpu/drm/drm_gpuvm.c
> +++ b/drivers/gpu/drm/drm_gpuvm.c
> @@ -1601,14 +1601,37 @@ drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm,
> }
> EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create);
>
> +static void
> +drm_gpuvm_bo_destroy_not_in_lists(struct drm_gpuvm_bo *vm_bo)
> +{
> + struct drm_gpuvm *gpuvm = vm_bo->vm;
> + const struct drm_gpuvm_ops *ops = gpuvm->ops;
> + struct drm_gem_object *obj = vm_bo->obj;
> +
> + if (ops && ops->vm_bo_free)
> + ops->vm_bo_free(vm_bo);
> + else
> + kfree(vm_bo);
> +
> + drm_gpuvm_put(gpuvm);
> + drm_gem_object_put(obj);
> +}
> +
> +static void
> +drm_gpuvm_bo_destroy_not_in_lists_kref(struct kref *kref)
> +{
> + struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo,
> + kref);
> +
> + drm_gpuvm_bo_destroy_not_in_lists(vm_bo);
> +}
> +
> static void
> drm_gpuvm_bo_destroy(struct kref *kref)
> {
> struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo,
> kref);
> struct drm_gpuvm *gpuvm = vm_bo->vm;
> - const struct drm_gpuvm_ops *ops = gpuvm->ops;
> - struct drm_gem_object *obj = vm_bo->obj;
> bool lock = !drm_gpuvm_resv_protected(gpuvm);
>
> if (!lock)
> @@ -1617,16 +1640,10 @@ drm_gpuvm_bo_destroy(struct kref *kref)
> drm_gpuvm_bo_list_del(vm_bo, extobj, lock);
> drm_gpuvm_bo_list_del(vm_bo, evict, lock);
>
> - drm_gem_gpuva_assert_lock_held(gpuvm, obj);
> + drm_gem_gpuva_assert_lock_held(gpuvm, vm_bo->obj);
> list_del(&vm_bo->list.entry.gem);
>
> - if (ops && ops->vm_bo_free)
> - ops->vm_bo_free(vm_bo);
> - else
> - kfree(vm_bo);
> -
> - drm_gpuvm_put(gpuvm);
> - drm_gem_object_put(obj);
> + drm_gpuvm_bo_destroy_not_in_lists(vm_bo);
> }
>
> /**
> @@ -1744,9 +1761,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put_deferred);
> void
> drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm)
> {
> - const struct drm_gpuvm_ops *ops = gpuvm->ops;
> struct drm_gpuvm_bo *vm_bo;
> - struct drm_gem_object *obj;
> struct llist_node *bo_defer;
>
> bo_defer = llist_del_all(&gpuvm->bo_defer);
> @@ -1765,14 +1780,7 @@ drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm)
> while (bo_defer) {
> vm_bo = llist_entry(bo_defer, struct drm_gpuvm_bo, list.entry.bo_defer);
> bo_defer = bo_defer->next;
> - obj = vm_bo->obj;
> - if (ops && ops->vm_bo_free)
> - ops->vm_bo_free(vm_bo);
> - else
> - kfree(vm_bo);
> -
> - drm_gpuvm_put(gpuvm);
> - drm_gem_object_put(obj);
> + drm_gpuvm_bo_destroy_not_in_lists(vm_bo);
> }
> }
> EXPORT_SYMBOL_GPL(drm_gpuvm_bo_deferred_cleanup);
> @@ -1860,6 +1868,9 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain);
> * count is decreased. If not found @__vm_bo is returned without further
> * increase of the reference count.
> *
> + * The provided @__vm_bo must not already be in the gpuva, evict, or extobj
> + * lists prior to calling this method.
> + *
> * A new &drm_gpuvm_bo is added to the GEMs gpuva list.
> *
> * Returns: a pointer to the found &drm_gpuvm_bo or @__vm_bo if no existing
> @@ -1872,14 +1883,19 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo)
> struct drm_gem_object *obj = __vm_bo->obj;
> struct drm_gpuvm_bo *vm_bo;
>
> + drm_WARN_ON(gpuvm->drm, !drm_gpuvm_immediate_mode(gpuvm));
> +
> + mutex_lock(&obj->gpuva.lock);
> vm_bo = drm_gpuvm_bo_find(gpuvm, obj);
> if (vm_bo) {
> - drm_gpuvm_bo_put(__vm_bo);
> + mutex_unlock(&obj->gpuva.lock);
> + kref_put(&__vm_bo->kref, drm_gpuvm_bo_destroy_not_in_lists_kref);
> return vm_bo;
> }
>
> drm_gem_gpuva_assert_lock_held(gpuvm, obj);
> list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list);
> + mutex_unlock(&obj->gpuva.lock);
>
> return __vm_bo;
> }
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 9f5f4ddf291024121f3fd5644f2fdeba354fa67c..be8811a70e1a3adec87ca4a85cad7c838f54bebf 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -1224,17 +1224,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
> goto err_cleanup;
> }
>
> - /* drm_gpuvm_bo_obtain_prealloc() will call drm_gpuvm_bo_put() on our
> - * pre-allocated BO if the <BO,VM> association exists. Given we
> - * only have one ref on preallocated_vm_bo, drm_gpuvm_bo_destroy() will
> - * be called immediately, and we have to hold the VM resv lock when
> - * calling this function.
> - */
> - dma_resv_lock(panthor_vm_resv(vm), NULL);
> - mutex_lock(&bo->base.base.gpuva.lock);
> op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo);
> - mutex_unlock(&bo->base.base.gpuva.lock);
> - dma_resv_unlock(panthor_vm_resv(vm));
>
> op_ctx->map.bo_offset = offset;
>
>
On Fri, Nov 28, 2025 at 03:24:03PM +0100, Boris Brezillon wrote:
> On Fri, 28 Nov 2025 14:14:15 +0000
> Alice Ryhl <aliceryhl@google.com> wrote:
>
> > When calling drm_gpuvm_bo_obtain_prealloc() and using immediate mode,
> > this may result in a call to ops->vm_bo_free(vm_bo) while holding the
> > GEMs gpuva mutex. This is a problem if ops->vm_bo_free(vm_bo) performs
> > any operations that are not safe in the fence signalling critical path,
> > and it turns out that Panthor (the only current user of the method)
> > calls drm_gem_shmem_unpin() which takes a resv lock internally.
> >
> > This constitutes both a violation of signalling safety and lock
> > inversion. To fix this, we modify the method to internally take the GEMs
> > gpuva mutex so that the mutex can be unlocked before freeing the
> > preallocated vm_bo.
> >
> > Note that this modification introduces a requirement that the driver
> > uses immediate mode to call drm_gpuvm_bo_obtain_prealloc() as it would
> > otherwise take the wrong lock.
> >
> > Signed-off-by: Alice Ryhl <aliceryhl@google.com>
>
> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
>
> Should we add a Fixes tag?
Yeah, let's add:
Fixes: 63e919a31625 ("panthor: use drm_gpuva_unlink_defer()")
Alice
© 2016 - 2025 Red Hat, Inc.