[PATCH v2] drm/nouveau: Prevent signalled fences in pending list

Philipp Stanner posted 1 patch 1 day, 1 hour ago
drivers/gpu/drm/nouveau/nouveau_fence.c | 52 +++++++++++++------------
drivers/gpu/drm/nouveau/nouveau_fence.h |  1 +
2 files changed, 29 insertions(+), 24 deletions(-)
[PATCH v2] drm/nouveau: Prevent signalled fences in pending list
Posted by Philipp Stanner 1 day, 1 hour ago
Nouveau currently relies on the assumption that dma_fences will only
ever get signalled through nouveau_fence_signal(), which takes care of
removing a signalled fence from the list nouveau_fence_chan.pending.

This self-imposed rule is violated in nouveau_fence_done(), where
dma_fence_is_signaled() can signal the fence without removing it from
the list. This enables accesses to already signalled fences through the
list, which is a bug.

Furthermore, it must always be possible to use standard dma_fence
methods an a dma_fence and observe valid behavior. The canonical way of
ensuring that signalling a fence has additional effects is to add those
effects to a callback and register it on that fence.

Move the code from nouveau_fence_signal() into a dma_fence callback.
Register that callback when creating the fence.

Cc: <stable@vger.kernel.org> # 4.10+
Signed-off-by: Philipp Stanner <phasta@kernel.org>
---
Changes in v2:
  - Remove Fixes: tag. (Danilo)
  - Remove integer "drop" and call nvif_event_block() in the fence
    callback. (Danilo)
---
 drivers/gpu/drm/nouveau/nouveau_fence.c | 52 +++++++++++++------------
 drivers/gpu/drm/nouveau/nouveau_fence.h |  1 +
 2 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 7cc84472cece..cf510ef9641a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -50,24 +50,24 @@ nouveau_fctx(struct nouveau_fence *fence)
 	return container_of(fence->base.lock, struct nouveau_fence_chan, lock);
 }
 
-static int
-nouveau_fence_signal(struct nouveau_fence *fence)
+static void
+nouveau_fence_cleanup_cb(struct dma_fence *dfence, struct dma_fence_cb *cb)
 {
-	int drop = 0;
+	struct nouveau_fence_chan *fctx;
+	struct nouveau_fence *fence;
+
+	fence = container_of(dfence, struct nouveau_fence, base);
+	fctx = nouveau_fctx(fence);
 
-	dma_fence_signal_locked(&fence->base);
 	list_del(&fence->head);
 	rcu_assign_pointer(fence->channel, NULL);
 
 	if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) {
-		struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
-
 		if (!--fctx->notify_ref)
-			drop = 1;
+			nvif_event_block(&fctx->event);
 	}
 
 	dma_fence_put(&fence->base);
-	return drop;
 }
 
 static struct nouveau_fence *
@@ -93,8 +93,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
 		if (error)
 			dma_fence_set_error(&fence->base, error);
 
-		if (nouveau_fence_signal(fence))
-			nvif_event_block(&fctx->event);
+		dma_fence_signal_locked(&fence->base);
 	}
 	fctx->killed = 1;
 	spin_unlock_irqrestore(&fctx->lock, flags);
@@ -127,11 +126,10 @@ nouveau_fence_context_free(struct nouveau_fence_chan *fctx)
 	kref_put(&fctx->fence_ref, nouveau_fence_context_put);
 }
 
-static int
+static void
 nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx)
 {
 	struct nouveau_fence *fence;
-	int drop = 0;
 	u32 seq = fctx->read(chan);
 
 	while (!list_empty(&fctx->pending)) {
@@ -140,10 +138,8 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc
 		if ((int)(seq - fence->base.seqno) < 0)
 			break;
 
-		drop |= nouveau_fence_signal(fence);
+		dma_fence_signal_locked(&fence->base);
 	}
-
-	return drop;
 }
 
 static void
@@ -152,7 +148,6 @@ nouveau_fence_uevent_work(struct work_struct *work)
 	struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
 						       uevent_work);
 	unsigned long flags;
-	int drop = 0;
 
 	spin_lock_irqsave(&fctx->lock, flags);
 	if (!list_empty(&fctx->pending)) {
@@ -161,11 +156,8 @@ nouveau_fence_uevent_work(struct work_struct *work)
 
 		fence = list_entry(fctx->pending.next, typeof(*fence), head);
 		chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
-		if (nouveau_fence_update(chan, fctx))
-			drop = 1;
+		nouveau_fence_update(chan, fctx);
 	}
-	if (drop)
-		nvif_event_block(&fctx->event);
 
 	spin_unlock_irqrestore(&fctx->lock, flags);
 }
@@ -235,6 +227,19 @@ nouveau_fence_emit(struct nouveau_fence *fence)
 			       &fctx->lock, fctx->context, ++fctx->sequence);
 	kref_get(&fctx->fence_ref);
 
+	fence->cb.func = nouveau_fence_cleanup_cb;
+	/* Adding a callback runs into __dma_fence_enable_signaling(), which will
+	 * ultimately run into nouveau_fence_no_signaling(), where a WARN_ON
+	 * would fire because the refcount can be dropped there.
+	 *
+	 * Increment the refcount here temporarily to work around that.
+	 */
+	dma_fence_get(&fence->base);
+	ret = dma_fence_add_callback(&fence->base, &fence->cb, nouveau_fence_cleanup_cb);
+	dma_fence_put(&fence->base);
+	if (ret)
+		return ret;
+
 	ret = fctx->emit(fence);
 	if (!ret) {
 		dma_fence_get(&fence->base);
@@ -246,8 +251,7 @@ nouveau_fence_emit(struct nouveau_fence *fence)
 			return -ENODEV;
 		}
 
-		if (nouveau_fence_update(chan, fctx))
-			nvif_event_block(&fctx->event);
+		nouveau_fence_update(chan, fctx);
 
 		list_add_tail(&fence->head, &fctx->pending);
 		spin_unlock_irq(&fctx->lock);
@@ -270,8 +274,8 @@ nouveau_fence_done(struct nouveau_fence *fence)
 
 		spin_lock_irqsave(&fctx->lock, flags);
 		chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
-		if (chan && nouveau_fence_update(chan, fctx))
-			nvif_event_block(&fctx->event);
+		if (chan)
+			nouveau_fence_update(chan, fctx);
 		spin_unlock_irqrestore(&fctx->lock, flags);
 	}
 	return dma_fence_is_signaled(&fence->base);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 8bc065acfe35..e6b2df7fdc42 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -10,6 +10,7 @@ struct nouveau_bo;
 
 struct nouveau_fence {
 	struct dma_fence base;
+	struct dma_fence_cb cb;
 
 	struct list_head head;
 
-- 
2.48.1
Re: [PATCH v2] drm/nouveau: Prevent signalled fences in pending list
Posted by Christian König 23 hours ago
Am 03.04.25 um 12:13 schrieb Philipp Stanner:
> Nouveau currently relies on the assumption that dma_fences will only
> ever get signalled through nouveau_fence_signal(), which takes care of
> removing a signalled fence from the list nouveau_fence_chan.pending.
>
> This self-imposed rule is violated in nouveau_fence_done(), where
> dma_fence_is_signaled() can signal the fence without removing it from
> the list. This enables accesses to already signalled fences through the
> list, which is a bug.
>
> Furthermore, it must always be possible to use standard dma_fence
> methods an a dma_fence and observe valid behavior. The canonical way of
> ensuring that signalling a fence has additional effects is to add those
> effects to a callback and register it on that fence.
>
> Move the code from nouveau_fence_signal() into a dma_fence callback.
> Register that callback when creating the fence.
>
> Cc: <stable@vger.kernel.org> # 4.10+
> Signed-off-by: Philipp Stanner <phasta@kernel.org>
> ---
> Changes in v2:
>   - Remove Fixes: tag. (Danilo)
>   - Remove integer "drop" and call nvif_event_block() in the fence
>     callback. (Danilo)
> ---
>  drivers/gpu/drm/nouveau/nouveau_fence.c | 52 +++++++++++++------------
>  drivers/gpu/drm/nouveau/nouveau_fence.h |  1 +
>  2 files changed, 29 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index 7cc84472cece..cf510ef9641a 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -50,24 +50,24 @@ nouveau_fctx(struct nouveau_fence *fence)
>  	return container_of(fence->base.lock, struct nouveau_fence_chan, lock);
>  }
>  
> -static int
> -nouveau_fence_signal(struct nouveau_fence *fence)
> +static void
> +nouveau_fence_cleanup_cb(struct dma_fence *dfence, struct dma_fence_cb *cb)
>  {
> -	int drop = 0;
> +	struct nouveau_fence_chan *fctx;
> +	struct nouveau_fence *fence;
> +
> +	fence = container_of(dfence, struct nouveau_fence, base);
> +	fctx = nouveau_fctx(fence);
>  
> -	dma_fence_signal_locked(&fence->base);
>  	list_del(&fence->head);
>  	rcu_assign_pointer(fence->channel, NULL);
>  
>  	if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) {
> -		struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
> -
>  		if (!--fctx->notify_ref)
> -			drop = 1;
> +			nvif_event_block(&fctx->event);
>  	}
>  
>  	dma_fence_put(&fence->base);
> -	return drop;
>  }
>  
>  static struct nouveau_fence *
> @@ -93,8 +93,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
>  		if (error)
>  			dma_fence_set_error(&fence->base, error);
>  
> -		if (nouveau_fence_signal(fence))
> -			nvif_event_block(&fctx->event);
> +		dma_fence_signal_locked(&fence->base);
>  	}
>  	fctx->killed = 1;
>  	spin_unlock_irqrestore(&fctx->lock, flags);
> @@ -127,11 +126,10 @@ nouveau_fence_context_free(struct nouveau_fence_chan *fctx)
>  	kref_put(&fctx->fence_ref, nouveau_fence_context_put);
>  }
>  
> -static int
> +static void
>  nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx)
>  {
>  	struct nouveau_fence *fence;
> -	int drop = 0;
>  	u32 seq = fctx->read(chan);
>  
>  	while (!list_empty(&fctx->pending)) {
> @@ -140,10 +138,8 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc
>  		if ((int)(seq - fence->base.seqno) < 0)
>  			break;
>  
> -		drop |= nouveau_fence_signal(fence);
> +		dma_fence_signal_locked(&fence->base);
>  	}
> -
> -	return drop;
>  }
>  
>  static void
> @@ -152,7 +148,6 @@ nouveau_fence_uevent_work(struct work_struct *work)
>  	struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
>  						       uevent_work);
>  	unsigned long flags;
> -	int drop = 0;
>  
>  	spin_lock_irqsave(&fctx->lock, flags);
>  	if (!list_empty(&fctx->pending)) {
> @@ -161,11 +156,8 @@ nouveau_fence_uevent_work(struct work_struct *work)
>  
>  		fence = list_entry(fctx->pending.next, typeof(*fence), head);
>  		chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
> -		if (nouveau_fence_update(chan, fctx))
> -			drop = 1;
> +		nouveau_fence_update(chan, fctx);
>  	}
> -	if (drop)
> -		nvif_event_block(&fctx->event);
>  
>  	spin_unlock_irqrestore(&fctx->lock, flags);
>  }
> @@ -235,6 +227,19 @@ nouveau_fence_emit(struct nouveau_fence *fence)
>  			       &fctx->lock, fctx->context, ++fctx->sequence);
>  	kref_get(&fctx->fence_ref);
>  
> +	fence->cb.func = nouveau_fence_cleanup_cb;
> +	/* Adding a callback runs into __dma_fence_enable_signaling(), which will
> +	 * ultimately run into nouveau_fence_no_signaling(), where a WARN_ON
> +	 * would fire because the refcount can be dropped there.
> +	 *
> +	 * Increment the refcount here temporarily to work around that.
> +	 */
> +	dma_fence_get(&fence->base);
> +	ret = dma_fence_add_callback(&fence->base, &fence->cb, nouveau_fence_cleanup_cb);

That looks like a really really awkward approach. The driver basically uses a the DMA fence infrastructure as middle layer and callbacks into itself to cleanup it's own structures.

Additional to that we don't guarantee any callback order for the DMA fence and so it can be that mix cleaning up the callback with other work which is certainly not good when you want to guarantee that the cleanup happens under the same lock.

Instead the call to dma_fence_signal_locked() should probably be removed from nouveau_fence_signal() and into nouveau_fence_context_kill() and nouveau_fence_update().

This way nouveau_fence_is_signaled() can call this function as well.

BTW: nouveau_fence_no_signaling() looks completely broken as well. It calls nouveau_fence_is_signaled() and then list_del() on the fence head.

As far as I can see that is completely superfluous and should probably be dropped. IIRC I once had a patch to clean that up but it was dropped for some reason.

Regards,
Christian.


> +	dma_fence_put(&fence->base);
> +	if (ret)
> +		return ret;
> +
>  	ret = fctx->emit(fence);
>  	if (!ret) {
>  		dma_fence_get(&fence->base);
> @@ -246,8 +251,7 @@ nouveau_fence_emit(struct nouveau_fence *fence)
>  			return -ENODEV;
>  		}
>  
> -		if (nouveau_fence_update(chan, fctx))
> -			nvif_event_block(&fctx->event);
> +		nouveau_fence_update(chan, fctx);
>  
>  		list_add_tail(&fence->head, &fctx->pending);
>  		spin_unlock_irq(&fctx->lock);
> @@ -270,8 +274,8 @@ nouveau_fence_done(struct nouveau_fence *fence)
>  
>  		spin_lock_irqsave(&fctx->lock, flags);
>  		chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
> -		if (chan && nouveau_fence_update(chan, fctx))
> -			nvif_event_block(&fctx->event);
> +		if (chan)
> +			nouveau_fence_update(chan, fctx);
>  		spin_unlock_irqrestore(&fctx->lock, flags);
>  	}
>  	return dma_fence_is_signaled(&fence->base);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
> index 8bc065acfe35..e6b2df7fdc42 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
> @@ -10,6 +10,7 @@ struct nouveau_bo;
>  
>  struct nouveau_fence {
>  	struct dma_fence base;
> +	struct dma_fence_cb cb;
>  
>  	struct list_head head;
>
Re: [PATCH v2] drm/nouveau: Prevent signalled fences in pending list
Posted by Philipp Stanner 22 hours ago
On Thu, 2025-04-03 at 14:08 +0200, Christian König wrote:
> Am 03.04.25 um 12:13 schrieb Philipp Stanner:
> > Nouveau currently relies on the assumption that dma_fences will
> > only
> > ever get signalled through nouveau_fence_signal(), which takes care
> > of
> > removing a signalled fence from the list
> > nouveau_fence_chan.pending.
> > 
> > This self-imposed rule is violated in nouveau_fence_done(), where
> > dma_fence_is_signaled() can signal the fence without removing it
> > from
> > the list. This enables accesses to already signalled fences through
> > the
> > list, which is a bug.
> > 
> > Furthermore, it must always be possible to use standard dma_fence
> > methods an a dma_fence and observe valid behavior. The canonical
> > way of
> > ensuring that signalling a fence has additional effects is to add
> > those
> > effects to a callback and register it on that fence.
> > 
> > Move the code from nouveau_fence_signal() into a dma_fence
> > callback.
> > Register that callback when creating the fence.
> > 
> > Cc: <stable@vger.kernel.org> # 4.10+
> > Signed-off-by: Philipp Stanner <phasta@kernel.org>
> > ---
> > Changes in v2:
> >   - Remove Fixes: tag. (Danilo)
> >   - Remove integer "drop" and call nvif_event_block() in the fence
> >     callback. (Danilo)
> > ---
> >  drivers/gpu/drm/nouveau/nouveau_fence.c | 52 +++++++++++++--------
> > ----
> >  drivers/gpu/drm/nouveau/nouveau_fence.h |  1 +
> >  2 files changed, 29 insertions(+), 24 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c
> > b/drivers/gpu/drm/nouveau/nouveau_fence.c
> > index 7cc84472cece..cf510ef9641a 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> > @@ -50,24 +50,24 @@ nouveau_fctx(struct nouveau_fence *fence)
> >  	return container_of(fence->base.lock, struct
> > nouveau_fence_chan, lock);
> >  }
> >  
> > -static int
> > -nouveau_fence_signal(struct nouveau_fence *fence)
> > +static void
> > +nouveau_fence_cleanup_cb(struct dma_fence *dfence, struct
> > dma_fence_cb *cb)
> >  {
> > -	int drop = 0;
> > +	struct nouveau_fence_chan *fctx;
> > +	struct nouveau_fence *fence;
> > +
> > +	fence = container_of(dfence, struct nouveau_fence, base);
> > +	fctx = nouveau_fctx(fence);
> >  
> > -	dma_fence_signal_locked(&fence->base);
> >  	list_del(&fence->head);
> >  	rcu_assign_pointer(fence->channel, NULL);
> >  
> >  	if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence-
> > >base.flags)) {
> > -		struct nouveau_fence_chan *fctx =
> > nouveau_fctx(fence);
> > -
> >  		if (!--fctx->notify_ref)
> > -			drop = 1;
> > +			nvif_event_block(&fctx->event);
> >  	}
> >  
> >  	dma_fence_put(&fence->base);
> > -	return drop;
> >  }
> >  
> >  static struct nouveau_fence *
> > @@ -93,8 +93,7 @@ nouveau_fence_context_kill(struct
> > nouveau_fence_chan *fctx, int error)
> >  		if (error)
> >  			dma_fence_set_error(&fence->base, error);
> >  
> > -		if (nouveau_fence_signal(fence))
> > -			nvif_event_block(&fctx->event);
> > +		dma_fence_signal_locked(&fence->base);
> >  	}
> >  	fctx->killed = 1;
> >  	spin_unlock_irqrestore(&fctx->lock, flags);
> > @@ -127,11 +126,10 @@ nouveau_fence_context_free(struct
> > nouveau_fence_chan *fctx)
> >  	kref_put(&fctx->fence_ref, nouveau_fence_context_put);
> >  }
> >  
> > -static int
> > +static void
> >  nouveau_fence_update(struct nouveau_channel *chan, struct
> > nouveau_fence_chan *fctx)
> >  {
> >  	struct nouveau_fence *fence;
> > -	int drop = 0;
> >  	u32 seq = fctx->read(chan);
> >  
> >  	while (!list_empty(&fctx->pending)) {
> > @@ -140,10 +138,8 @@ nouveau_fence_update(struct nouveau_channel
> > *chan, struct nouveau_fence_chan *fc
> >  		if ((int)(seq - fence->base.seqno) < 0)
> >  			break;
> >  
> > -		drop |= nouveau_fence_signal(fence);
> > +		dma_fence_signal_locked(&fence->base);
> >  	}
> > -
> > -	return drop;
> >  }
> >  
> >  static void
> > @@ -152,7 +148,6 @@ nouveau_fence_uevent_work(struct work_struct
> > *work)
> >  	struct nouveau_fence_chan *fctx = container_of(work,
> > struct nouveau_fence_chan,
> >  						      
> > uevent_work);
> >  	unsigned long flags;
> > -	int drop = 0;
> >  
> >  	spin_lock_irqsave(&fctx->lock, flags);
> >  	if (!list_empty(&fctx->pending)) {
> > @@ -161,11 +156,8 @@ nouveau_fence_uevent_work(struct work_struct
> > *work)
> >  
> >  		fence = list_entry(fctx->pending.next,
> > typeof(*fence), head);
> >  		chan = rcu_dereference_protected(fence->channel,
> > lockdep_is_held(&fctx->lock));
> > -		if (nouveau_fence_update(chan, fctx))
> > -			drop = 1;
> > +		nouveau_fence_update(chan, fctx);
> >  	}
> > -	if (drop)
> > -		nvif_event_block(&fctx->event);
> >  
> >  	spin_unlock_irqrestore(&fctx->lock, flags);
> >  }
> > @@ -235,6 +227,19 @@ nouveau_fence_emit(struct nouveau_fence
> > *fence)
> >  			       &fctx->lock, fctx->context, ++fctx-
> > >sequence);
> >  	kref_get(&fctx->fence_ref);
> >  
> > +	fence->cb.func = nouveau_fence_cleanup_cb;
> > +	/* Adding a callback runs into
> > __dma_fence_enable_signaling(), which will
> > +	 * ultimately run into nouveau_fence_no_signaling(), where
> > a WARN_ON
> > +	 * would fire because the refcount can be dropped there.
> > +	 *
> > +	 * Increment the refcount here temporarily to work around
> > that.
> > +	 */
> > +	dma_fence_get(&fence->base);
> > +	ret = dma_fence_add_callback(&fence->base, &fence->cb,
> > nouveau_fence_cleanup_cb);
> 
> That looks like a really really awkward approach. The driver
> basically uses a the DMA fence infrastructure as middle layer and
> callbacks into itself to cleanup it's own structures.

What else are callbacks good for, if not to do something automatically
when the fence gets signaled?

> Additional to that we don't guarantee any callback order for the DMA
> fence and so it can be that mix cleaning up the callback with other
> work which is certainly not good when you want to guarantee that the
> cleanup happens under the same lock.

Isn't my perception correct that the primary issue you have with this
approach is that dma_fence_put() is called from within the callback? Or
do you also take issue with deleting from the list?

> 
> Instead the call to dma_fence_signal_locked() should probably be
> removed from nouveau_fence_signal() and into
> nouveau_fence_context_kill() and nouveau_fence_update().
> 
> This way nouveau_fence_is_signaled() can call this function as well.

Which "this function"? dma_fence_signal_locked()

> 
> BTW: nouveau_fence_no_signaling() looks completely broken as well. It
> calls nouveau_fence_is_signaled() and then list_del() on the fence
> head.

I can assure you that a great many things in Nouveau look completely
broken.

The question for us is always the cost-benefit-ratio when fixing bugs.
There are fixes that solve the bug with reasonable effort, and there
are great reworks towards an ideal state.

P.


> 
> As far as I can see that is completely superfluous and should
> probably be dropped. IIRC I once had a patch to clean that up but it
> was dropped for some reason.
> 
> Regards,
> Christian.
> 
> 
> > +	dma_fence_put(&fence->base);
> > +	if (ret)
> > +		return ret;
> > +
> >  	ret = fctx->emit(fence);
> >  	if (!ret) {
> >  		dma_fence_get(&fence->base);
> > @@ -246,8 +251,7 @@ nouveau_fence_emit(struct nouveau_fence *fence)
> >  			return -ENODEV;
> >  		}
> >  
> > -		if (nouveau_fence_update(chan, fctx))
> > -			nvif_event_block(&fctx->event);
> > +		nouveau_fence_update(chan, fctx);
> >  
> >  		list_add_tail(&fence->head, &fctx->pending);
> >  		spin_unlock_irq(&fctx->lock);
> > @@ -270,8 +274,8 @@ nouveau_fence_done(struct nouveau_fence *fence)
> >  
> >  		spin_lock_irqsave(&fctx->lock, flags);
> >  		chan = rcu_dereference_protected(fence->channel,
> > lockdep_is_held(&fctx->lock));
> > -		if (chan && nouveau_fence_update(chan, fctx))
> > -			nvif_event_block(&fctx->event);
> > +		if (chan)
> > +			nouveau_fence_update(chan, fctx);
> >  		spin_unlock_irqrestore(&fctx->lock, flags);
> >  	}
> >  	return dma_fence_is_signaled(&fence->base);
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h
> > b/drivers/gpu/drm/nouveau/nouveau_fence.h
> > index 8bc065acfe35..e6b2df7fdc42 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_fence.h
> > +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
> > @@ -10,6 +10,7 @@ struct nouveau_bo;
> >  
> >  struct nouveau_fence {
> >  	struct dma_fence base;
> > +	struct dma_fence_cb cb;
> >  
> >  	struct list_head head;
> >  
> 
Re: [PATCH v2] drm/nouveau: Prevent signalled fences in pending list
Posted by Danilo Krummrich 22 hours ago
On Thu, Apr 03, 2025 at 02:58:13PM +0200, Philipp Stanner wrote:
> On Thu, 2025-04-03 at 14:08 +0200, Christian König wrote:
> > Am 03.04.25 um 12:13 schrieb Philipp Stanner:
> 
> > BTW: nouveau_fence_no_signaling() looks completely broken as well. It
> > calls nouveau_fence_is_signaled() and then list_del() on the fence
> > head.
> 
> I can assure you that a great many things in Nouveau look completely
> broken.
> 
> The question for us is always the cost-benefit-ratio when fixing bugs.
> There are fixes that solve the bug with reasonable effort, and there
> are great reworks towards an ideal state.

That's just an additional thing that Christian noticed. It isn't really directly
related to what you want to fix with your patch.

I think the function can simply be dropped.
Re: [PATCH v2] drm/nouveau: Prevent signalled fences in pending list
Posted by Christian König 21 hours ago
Am 03.04.25 um 15:15 schrieb Danilo Krummrich:
> On Thu, Apr 03, 2025 at 02:58:13PM +0200, Philipp Stanner wrote:
>> On Thu, 2025-04-03 at 14:08 +0200, Christian König wrote:
>>> Am 03.04.25 um 12:13 schrieb Philipp Stanner:
>>> BTW: nouveau_fence_no_signaling() looks completely broken as well. It
>>> calls nouveau_fence_is_signaled() and then list_del() on the fence
>>> head.
>> I can assure you that a great many things in Nouveau look completely
>> broken.
>>
>> The question for us is always the cost-benefit-ratio when fixing bugs.
>> There are fixes that solve the bug with reasonable effort, and there
>> are great reworks towards an ideal state.
> That's just an additional thing that Christian noticed. It isn't really directly
> related to what you want to fix with your patch.

Well there is some relation. From nouveau_fence_no_signaling():

        if (nouveau_fence_is_signaled(f)) {
                list_del(&fence->head);

                dma_fence_put(&fence->base);
                return false;
        }

That looks like somebody realized that the fence needs to be removed from the pending list and the reference dropped.

It's just that this was added to the wrong function, e.g. those lines need to be in nouveau_fence_is_signaled() and not here.

Regards,
Christian.

>
> I think the function can simply be dropped.

Re: [PATCH v2] drm/nouveau: Prevent signalled fences in pending list
Posted by Danilo Krummrich 22 hours ago
On Thu, Apr 03, 2025 at 02:08:06PM +0200, Christian König wrote:
> Am 03.04.25 um 12:13 schrieb Philipp Stanner:
> 
> > @@ -235,6 +227,19 @@ nouveau_fence_emit(struct nouveau_fence *fence)
> >  			       &fctx->lock, fctx->context, ++fctx->sequence);
> >  	kref_get(&fctx->fence_ref);
> >  
> > +	fence->cb.func = nouveau_fence_cleanup_cb;
> > +	/* Adding a callback runs into __dma_fence_enable_signaling(), which will
> > +	 * ultimately run into nouveau_fence_no_signaling(), where a WARN_ON
> > +	 * would fire because the refcount can be dropped there.
> > +	 *
> > +	 * Increment the refcount here temporarily to work around that.
> > +	 */
> > +	dma_fence_get(&fence->base);
> > +	ret = dma_fence_add_callback(&fence->base, &fence->cb, nouveau_fence_cleanup_cb);
> 
> That looks like a really really awkward approach. The driver basically uses a the DMA fence infrastructure as middle layer and callbacks into itself to cleanup it's own structures.
> 
> Additional to that we don't guarantee any callback order for the DMA fence and so it can be that mix cleaning up the callback with other work which is certainly not good when you want to guarantee that the cleanup happens under the same lock.
> 
> Instead the call to dma_fence_signal_locked() should probably be removed from nouveau_fence_signal() and into nouveau_fence_context_kill() and nouveau_fence_update().
> 
> This way nouveau_fence_is_signaled() can call this function as well.

Yes, I think this would work as well. It wouldn't work if only
dma_fence_signal() is called on this fence, but that should be fine.

So, I agree that's probably the better approach.

> BTW: nouveau_fence_no_signaling() looks completely broken as well. It calls nouveau_fence_is_signaled() and then list_del() on the fence head.

It does indeed look broken, as in the fence may not be signaled at all. If at
all, it should call dma_fence_is_signaled() instead.

> As far as I can see that is completely superfluous and should probably be dropped. IIRC I once had a patch to clean that up but it was dropped for some reason.

Agreed, to me it looks unnecessary as well.
Re: [PATCH v2] drm/nouveau: Prevent signalled fences in pending list
Posted by Philipp Stanner 1 day, 1 hour ago
On Thu, 2025-04-03 at 12:13 +0200, Philipp Stanner wrote:
> Nouveau currently relies on the assumption that dma_fences will only
> ever get signalled through nouveau_fence_signal(), which takes care
> of
> removing a signalled fence from the list nouveau_fence_chan.pending.
> 
> This self-imposed rule is violated in nouveau_fence_done(), where
> dma_fence_is_signaled() can signal the fence without removing it from
> the list. This enables accesses to already signalled fences through
> the
> list, which is a bug.
> 
> Furthermore, it must always be possible to use standard dma_fence
> methods an a dma_fence and observe valid behavior. The canonical way
> of
> ensuring that signalling a fence has additional effects is to add
> those
> effects to a callback and register it on that fence.
> 
> Move the code from nouveau_fence_signal() into a dma_fence callback.
> Register that callback when creating the fence.
> 
> Cc: <stable@vger.kernel.org> # 4.10+
> Signed-off-by: Philipp Stanner <phasta@kernel.org>
> ---
> Changes in v2:
>   - Remove Fixes: tag. (Danilo)
>   - Remove integer "drop" and call nvif_event_block() in the fence
>     callback. (Danilo)
> ---
>  drivers/gpu/drm/nouveau/nouveau_fence.c | 52 +++++++++++++----------
> --
>  drivers/gpu/drm/nouveau/nouveau_fence.h |  1 +
>  2 files changed, 29 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c
> b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index 7cc84472cece..cf510ef9641a 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -50,24 +50,24 @@ nouveau_fctx(struct nouveau_fence *fence)
>  	return container_of(fence->base.lock, struct
> nouveau_fence_chan, lock);
>  }
>  
> -static int
> -nouveau_fence_signal(struct nouveau_fence *fence)
> +static void
> +nouveau_fence_cleanup_cb(struct dma_fence *dfence, struct
> dma_fence_cb *cb)
>  {
> -	int drop = 0;
> +	struct nouveau_fence_chan *fctx;
> +	struct nouveau_fence *fence;
> +
> +	fence = container_of(dfence, struct nouveau_fence, base);
> +	fctx = nouveau_fctx(fence);
>  
> -	dma_fence_signal_locked(&fence->base);
>  	list_del(&fence->head);
>  	rcu_assign_pointer(fence->channel, NULL);
>  
>  	if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags))
> {
> -		struct nouveau_fence_chan *fctx =
> nouveau_fctx(fence);
> -
>  		if (!--fctx->notify_ref)
> -			drop = 1;
> +			nvif_event_block(&fctx->event);
>  	}
>  
>  	dma_fence_put(&fence->base);

What I realized while coding this v2 is that we might want to think
about whether we really want the dma_fence_put() in the fence callback?

It should work fine, since it's exactly identical to the previous
code's behavior – but effectively it means that the driver's reference
will be dropped whenever it signals that fence.

IDK

P.


> -	return drop;
>  }
>  
>  static struct nouveau_fence *
> @@ -93,8 +93,7 @@ nouveau_fence_context_kill(struct
> nouveau_fence_chan *fctx, int error)
>  		if (error)
>  			dma_fence_set_error(&fence->base, error);
>  
> -		if (nouveau_fence_signal(fence))
> -			nvif_event_block(&fctx->event);
> +		dma_fence_signal_locked(&fence->base);
>  	}
>  	fctx->killed = 1;
>  	spin_unlock_irqrestore(&fctx->lock, flags);
> @@ -127,11 +126,10 @@ nouveau_fence_context_free(struct
> nouveau_fence_chan *fctx)
>  	kref_put(&fctx->fence_ref, nouveau_fence_context_put);
>  }
>  
> -static int
> +static void
>  nouveau_fence_update(struct nouveau_channel *chan, struct
> nouveau_fence_chan *fctx)
>  {
>  	struct nouveau_fence *fence;
> -	int drop = 0;
>  	u32 seq = fctx->read(chan);
>  
>  	while (!list_empty(&fctx->pending)) {
> @@ -140,10 +138,8 @@ nouveau_fence_update(struct nouveau_channel
> *chan, struct nouveau_fence_chan *fc
>  		if ((int)(seq - fence->base.seqno) < 0)
>  			break;
>  
> -		drop |= nouveau_fence_signal(fence);
> +		dma_fence_signal_locked(&fence->base);
>  	}
> -
> -	return drop;
>  }
>  
>  static void
> @@ -152,7 +148,6 @@ nouveau_fence_uevent_work(struct work_struct
> *work)
>  	struct nouveau_fence_chan *fctx = container_of(work, struct
> nouveau_fence_chan,
>  						       uevent_work);
>  	unsigned long flags;
> -	int drop = 0;
>  
>  	spin_lock_irqsave(&fctx->lock, flags);
>  	if (!list_empty(&fctx->pending)) {
> @@ -161,11 +156,8 @@ nouveau_fence_uevent_work(struct work_struct
> *work)
>  
>  		fence = list_entry(fctx->pending.next,
> typeof(*fence), head);
>  		chan = rcu_dereference_protected(fence->channel,
> lockdep_is_held(&fctx->lock));
> -		if (nouveau_fence_update(chan, fctx))
> -			drop = 1;
> +		nouveau_fence_update(chan, fctx);
>  	}
> -	if (drop)
> -		nvif_event_block(&fctx->event);
>  
>  	spin_unlock_irqrestore(&fctx->lock, flags);
>  }
> @@ -235,6 +227,19 @@ nouveau_fence_emit(struct nouveau_fence *fence)
>  			       &fctx->lock, fctx->context, ++fctx-
> >sequence);
>  	kref_get(&fctx->fence_ref);
>  
> +	fence->cb.func = nouveau_fence_cleanup_cb;
> +	/* Adding a callback runs into
> __dma_fence_enable_signaling(), which will
> +	 * ultimately run into nouveau_fence_no_signaling(), where a
> WARN_ON
> +	 * would fire because the refcount can be dropped there.
> +	 *
> +	 * Increment the refcount here temporarily to work around
> that.
> +	 */
> +	dma_fence_get(&fence->base);
> +	ret = dma_fence_add_callback(&fence->base, &fence->cb,
> nouveau_fence_cleanup_cb);
> +	dma_fence_put(&fence->base);
> +	if (ret)
> +		return ret;
> +
>  	ret = fctx->emit(fence);
>  	if (!ret) {
>  		dma_fence_get(&fence->base);
> @@ -246,8 +251,7 @@ nouveau_fence_emit(struct nouveau_fence *fence)
>  			return -ENODEV;
>  		}
>  
> -		if (nouveau_fence_update(chan, fctx))
> -			nvif_event_block(&fctx->event);
> +		nouveau_fence_update(chan, fctx);
>  
>  		list_add_tail(&fence->head, &fctx->pending);
>  		spin_unlock_irq(&fctx->lock);
> @@ -270,8 +274,8 @@ nouveau_fence_done(struct nouveau_fence *fence)
>  
>  		spin_lock_irqsave(&fctx->lock, flags);
>  		chan = rcu_dereference_protected(fence->channel,
> lockdep_is_held(&fctx->lock));
> -		if (chan && nouveau_fence_update(chan, fctx))
> -			nvif_event_block(&fctx->event);
> +		if (chan)
> +			nouveau_fence_update(chan, fctx);
>  		spin_unlock_irqrestore(&fctx->lock, flags);
>  	}
>  	return dma_fence_is_signaled(&fence->base);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h
> b/drivers/gpu/drm/nouveau/nouveau_fence.h
> index 8bc065acfe35..e6b2df7fdc42 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
> @@ -10,6 +10,7 @@ struct nouveau_bo;
>  
>  struct nouveau_fence {
>  	struct dma_fence base;
> +	struct dma_fence_cb cb;
>  
>  	struct list_head head;
>  
Re: [PATCH v2] drm/nouveau: Prevent signalled fences in pending list
Posted by Danilo Krummrich 1 day ago
On Thu, Apr 03, 2025 at 12:17:29PM +0200, Philipp Stanner wrote:
> On Thu, 2025-04-03 at 12:13 +0200, Philipp Stanner wrote:
> > -static int
> > -nouveau_fence_signal(struct nouveau_fence *fence)
> > +static void
> > +nouveau_fence_cleanup_cb(struct dma_fence *dfence, struct
> > dma_fence_cb *cb)
> >  {
> > -	int drop = 0;
> > +	struct nouveau_fence_chan *fctx;
> > +	struct nouveau_fence *fence;
> > +
> > +	fence = container_of(dfence, struct nouveau_fence, base);
> > +	fctx = nouveau_fctx(fence);
> >  
> > -	dma_fence_signal_locked(&fence->base);
> >  	list_del(&fence->head);
> >  	rcu_assign_pointer(fence->channel, NULL);
> >  
> >  	if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags))
> > {
> > -		struct nouveau_fence_chan *fctx =
> > nouveau_fctx(fence);
> > -
> >  		if (!--fctx->notify_ref)
> > -			drop = 1;
> > +			nvif_event_block(&fctx->event);
> >  	}
> >  
> >  	dma_fence_put(&fence->base);
> 
> What I realized while coding this v2 is that we might want to think
> about whether we really want the dma_fence_put() in the fence callback?
> 
> It should work fine, since it's exactly identical to the previous
> code's behavior – but effectively it means that the driver's reference
> will be dropped whenever it signals that fence.

Not quite, it's the reference of the fence context's pending list.

When the fence is emitted, dma_fence_init() is called, which initializes the
reference count to 1. Subsequently, another reference is taken, when the fence
is added to the pending list. Once the fence is signaled and hence removed from
the pending list, we can (and have to) drop this reference.
Re: [PATCH v2] drm/nouveau: Prevent signalled fences in pending list
Posted by Christian König 22 hours ago
Am 03.04.25 um 12:25 schrieb Danilo Krummrich:
> On Thu, Apr 03, 2025 at 12:17:29PM +0200, Philipp Stanner wrote:
>> On Thu, 2025-04-03 at 12:13 +0200, Philipp Stanner wrote:
>>> -static int
>>> -nouveau_fence_signal(struct nouveau_fence *fence)
>>> +static void
>>> +nouveau_fence_cleanup_cb(struct dma_fence *dfence, struct
>>> dma_fence_cb *cb)
>>>  {
>>> -	int drop = 0;
>>> +	struct nouveau_fence_chan *fctx;
>>> +	struct nouveau_fence *fence;
>>> +
>>> +	fence = container_of(dfence, struct nouveau_fence, base);
>>> +	fctx = nouveau_fctx(fence);
>>>  
>>> -	dma_fence_signal_locked(&fence->base);
>>>  	list_del(&fence->head);
>>>  	rcu_assign_pointer(fence->channel, NULL);
>>>  
>>>  	if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags))
>>> {
>>> -		struct nouveau_fence_chan *fctx =
>>> nouveau_fctx(fence);
>>> -
>>>  		if (!--fctx->notify_ref)
>>> -			drop = 1;
>>> +			nvif_event_block(&fctx->event);
>>>  	}
>>>  
>>>  	dma_fence_put(&fence->base);
>> What I realized while coding this v2 is that we might want to think
>> about whether we really want the dma_fence_put() in the fence callback?
>>
>> It should work fine, since it's exactly identical to the previous
>> code's behavior – but effectively it means that the driver's reference
>> will be dropped whenever it signals that fence.
> Not quite, it's the reference of the fence context's pending list.
>
> When the fence is emitted, dma_fence_init() is called, which initializes the
> reference count to 1. Subsequently, another reference is taken, when the fence
> is added to the pending list. Once the fence is signaled and hence removed from
> the pending list, we can (and have to) drop this reference.

The general idea is that the caller must hold the reference until the signaling is completed.

So for signaling from the interrupt handler it means that you need to call dma_fence_put() for the list reference *after* you called dma_fence_signal_locked().

For signaling from the .enable_signaling or .signaled callback you need to remove the fence from the linked list and call dma_fence_put() *before* you return (because the caller is holding the potential last reference).

That's why I'm pretty sure that the approach with installing the callback won't work. As far as I know no other DMA fence implementation is doing that.

Regards,
Christian.
Re: [PATCH v2] drm/nouveau: Prevent signalled fences in pending list
Posted by Danilo Krummrich 22 hours ago
On Thu, Apr 03, 2025 at 02:22:41PM +0200, Christian König wrote:
> Am 03.04.25 um 12:25 schrieb Danilo Krummrich:
> > On Thu, Apr 03, 2025 at 12:17:29PM +0200, Philipp Stanner wrote:
> >> On Thu, 2025-04-03 at 12:13 +0200, Philipp Stanner wrote:
> >>> -static int
> >>> -nouveau_fence_signal(struct nouveau_fence *fence)
> >>> +static void
> >>> +nouveau_fence_cleanup_cb(struct dma_fence *dfence, struct
> >>> dma_fence_cb *cb)
> >>>  {
> >>> -	int drop = 0;
> >>> +	struct nouveau_fence_chan *fctx;
> >>> +	struct nouveau_fence *fence;
> >>> +
> >>> +	fence = container_of(dfence, struct nouveau_fence, base);
> >>> +	fctx = nouveau_fctx(fence);
> >>>  
> >>> -	dma_fence_signal_locked(&fence->base);
> >>>  	list_del(&fence->head);
> >>>  	rcu_assign_pointer(fence->channel, NULL);
> >>>  
> >>>  	if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags))
> >>> {
> >>> -		struct nouveau_fence_chan *fctx =
> >>> nouveau_fctx(fence);
> >>> -
> >>>  		if (!--fctx->notify_ref)
> >>> -			drop = 1;
> >>> +			nvif_event_block(&fctx->event);
> >>>  	}
> >>>  
> >>>  	dma_fence_put(&fence->base);
> >> What I realized while coding this v2 is that we might want to think
> >> about whether we really want the dma_fence_put() in the fence callback?
> >>
> >> It should work fine, since it's exactly identical to the previous
> >> code's behavior – but effectively it means that the driver's reference
> >> will be dropped whenever it signals that fence.
> > Not quite, it's the reference of the fence context's pending list.
> >
> > When the fence is emitted, dma_fence_init() is called, which initializes the
> > reference count to 1. Subsequently, another reference is taken, when the fence
> > is added to the pending list. Once the fence is signaled and hence removed from
> > the pending list, we can (and have to) drop this reference.
> 
> The general idea is that the caller must hold the reference until the signaling is completed.
> 
> So for signaling from the interrupt handler it means that you need to call dma_fence_put() for the list reference *after* you called dma_fence_signal_locked().
> 
> For signaling from the .enable_signaling or .signaled callback you need to remove the fence from the linked list and call dma_fence_put() *before* you return (because the caller is holding the potential last reference).
> 
> That's why I'm pretty sure that the approach with installing the callback won't work. As far as I know no other DMA fence implementation is doing that.

I think it works as long as no one calls dma_fence_singnal(), but only
dma_fence_signal_locked() on this fence (which is what nouveau does). For
dma_fence_signal_locked() it doesn't seem to matter if the last reference is
dropped from a callback. There also can't be other callbacks that suffer from
this, because they'd need to have their own reference.

But either way, as mentioned in my other reply, I agree that we should avoid the
callback approach in favor of your proposal, since it has its own footgun.