[PATCH v2 10/15] virtio_scsi: fix DMA cacheline issues for events

Michael S. Tsirkin posted 15 patches 1 month ago
[PATCH v2 10/15] virtio_scsi: fix DMA cacheline issues for events
Posted by Michael S. Tsirkin 1 month ago
Current struct virtio_scsi_event_node layout has two problems:

The event (DMA_FROM_DEVICE) and work (CPU-written via
INIT_WORK/queue_work) fields share a cacheline.
On non-cache-coherent platforms, CPU writes to work can
corrupt device-written event data.

If ARCH_DMA_MINALIGN is large enough, the 8 events in event_list share
cachelines, triggering CONFIG_DMA_API_DEBUG warnings.

Fix the corruption by moving event buffers to a separate array and
aligning using __dma_from_device_group_begin()/end().

Suppress the (now spurious) DMA debug warnings using
virtqueue_add_inbuf_cache_clean().

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/scsi/virtio_scsi.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 96a69edddbe5..6ff53fc8adb0 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -29,6 +29,7 @@
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsi_devinfo.h>
 #include <linux/seqlock.h>
+#include <linux/dma-mapping.h>
 
 #include "sd.h"
 
@@ -61,7 +62,7 @@ struct virtio_scsi_cmd {
 
 struct virtio_scsi_event_node {
 	struct virtio_scsi *vscsi;
-	struct virtio_scsi_event event;
+	struct virtio_scsi_event *event;
 	struct work_struct work;
 };
 
@@ -89,6 +90,11 @@ struct virtio_scsi {
 
 	struct virtio_scsi_vq ctrl_vq;
 	struct virtio_scsi_vq event_vq;
+
+	__dma_from_device_group_begin();
+	struct virtio_scsi_event events[VIRTIO_SCSI_EVENT_LEN];
+	__dma_from_device_group_end();
+
 	struct virtio_scsi_vq req_vqs[];
 };
 
@@ -237,12 +243,12 @@ static int virtscsi_kick_event(struct virtio_scsi *vscsi,
 	unsigned long flags;
 
 	INIT_WORK(&event_node->work, virtscsi_handle_event);
-	sg_init_one(&sg, &event_node->event, sizeof(struct virtio_scsi_event));
+	sg_init_one(&sg, event_node->event, sizeof(struct virtio_scsi_event));
 
 	spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags);
 
-	err = virtqueue_add_inbuf(vscsi->event_vq.vq, &sg, 1, event_node,
-				  GFP_ATOMIC);
+	err = virtqueue_add_inbuf_cache_clean(vscsi->event_vq.vq, &sg, 1, event_node,
+					      GFP_ATOMIC);
 	if (!err)
 		virtqueue_kick(vscsi->event_vq.vq);
 
@@ -257,6 +263,7 @@ static int virtscsi_kick_event_all(struct virtio_scsi *vscsi)
 
 	for (i = 0; i < VIRTIO_SCSI_EVENT_LEN; i++) {
 		vscsi->event_list[i].vscsi = vscsi;
+		vscsi->event_list[i].event = &vscsi->events[i];
 		virtscsi_kick_event(vscsi, &vscsi->event_list[i]);
 	}
 
@@ -380,7 +387,7 @@ static void virtscsi_handle_event(struct work_struct *work)
 	struct virtio_scsi_event_node *event_node =
 		container_of(work, struct virtio_scsi_event_node, work);
 	struct virtio_scsi *vscsi = event_node->vscsi;
-	struct virtio_scsi_event *event = &event_node->event;
+	struct virtio_scsi_event *event = event_node->event;
 
 	if (event->event &
 	    cpu_to_virtio32(vscsi->vdev, VIRTIO_SCSI_T_EVENTS_MISSED)) {
-- 
MST
Re: [PATCH v2 10/15] virtio_scsi: fix DMA cacheline issues for events
Posted by Stefan Hajnoczi 1 month ago
On Mon, Jan 05, 2026 at 03:23:29AM -0500, Michael S. Tsirkin wrote:
> @@ -61,7 +62,7 @@ struct virtio_scsi_cmd {
>  
>  struct virtio_scsi_event_node {
>  	struct virtio_scsi *vscsi;
> -	struct virtio_scsi_event event;
> +	struct virtio_scsi_event *event;
>  	struct work_struct work;
>  };
>  
> @@ -89,6 +90,11 @@ struct virtio_scsi {
>  
>  	struct virtio_scsi_vq ctrl_vq;
>  	struct virtio_scsi_vq event_vq;
> +
> +	__dma_from_device_group_begin();
> +	struct virtio_scsi_event events[VIRTIO_SCSI_EVENT_LEN];
> +	__dma_from_device_group_end();

If the device emits two events in rapid succession, could the CPU see
stale data for the second event because it already holds the cache line
for reading the first event?

In other words, it's not obvious to me that the DMA warnings are indeed
spurious and should be silenced here.

It seems safer and simpler to align and pad the struct virtio_scsi_event
field in struct virtio_scsi_event_node rather than packing these structs
into a single array here they might share cache lines.

Stefan
Re: [PATCH v2 10/15] virtio_scsi: fix DMA cacheline issues for events
Posted by Michael S. Tsirkin 1 month ago
On Mon, Jan 05, 2026 at 01:19:39PM -0500, Stefan Hajnoczi wrote:
> On Mon, Jan 05, 2026 at 03:23:29AM -0500, Michael S. Tsirkin wrote:
> > @@ -61,7 +62,7 @@ struct virtio_scsi_cmd {
> >  
> >  struct virtio_scsi_event_node {
> >  	struct virtio_scsi *vscsi;
> > -	struct virtio_scsi_event event;
> > +	struct virtio_scsi_event *event;
> >  	struct work_struct work;
> >  };
> >  
> > @@ -89,6 +90,11 @@ struct virtio_scsi {
> >  
> >  	struct virtio_scsi_vq ctrl_vq;
> >  	struct virtio_scsi_vq event_vq;
> > +
> > +	__dma_from_device_group_begin();
> > +	struct virtio_scsi_event events[VIRTIO_SCSI_EVENT_LEN];
> > +	__dma_from_device_group_end();
> 
> If the device emits two events in rapid succession, could the CPU see
> stale data for the second event because it already holds the cache line
> for reading the first event?
> 
> In other words, it's not obvious to me that the DMA warnings are indeed
> spurious and should be silenced here.
> 
> It seems safer and simpler to align and pad the struct virtio_scsi_event
> field in struct virtio_scsi_event_node rather than packing these structs
> into a single array here they might share cache lines.
> 
> Stefan



To add to what I wrote, that's a lot of overhead: 8 * 128 - about 1K on
some platforms, and these happen to be low end ones.

-- 
MST
Re: [PATCH v2 10/15] virtio_scsi: fix DMA cacheline issues for events
Posted by Michael S. Tsirkin 1 month ago
On Mon, Jan 05, 2026 at 01:19:39PM -0500, Stefan Hajnoczi wrote:
> On Mon, Jan 05, 2026 at 03:23:29AM -0500, Michael S. Tsirkin wrote:
> > @@ -61,7 +62,7 @@ struct virtio_scsi_cmd {
> >  
> >  struct virtio_scsi_event_node {
> >  	struct virtio_scsi *vscsi;
> > -	struct virtio_scsi_event event;
> > +	struct virtio_scsi_event *event;
> >  	struct work_struct work;
> >  };
> >  
> > @@ -89,6 +90,11 @@ struct virtio_scsi {
> >  
> >  	struct virtio_scsi_vq ctrl_vq;
> >  	struct virtio_scsi_vq event_vq;
> > +
> > +	__dma_from_device_group_begin();
> > +	struct virtio_scsi_event events[VIRTIO_SCSI_EVENT_LEN];
> > +	__dma_from_device_group_end();
> 
> If the device emits two events in rapid succession, could the CPU see
> stale data for the second event because it already holds the cache line
> for reading the first event?

No because virtio does unmap and syncs the cache line.

In other words, CPU reads cause no issues.

The issues are exclusively around CPU writes dirtying the
cache and writeback overwriting DMA data.

> In other words, it's not obvious to me that the DMA warnings are indeed
> spurious and should be silenced here.
> 
> It seems safer and simpler to align and pad the struct virtio_scsi_event
> field in struct virtio_scsi_event_node rather than packing these structs
> into a single array here they might share cache lines.
> 
> Stefan
Re: [PATCH v2 10/15] virtio_scsi: fix DMA cacheline issues for events
Posted by Stefan Hajnoczi 1 month ago
On Tue, Jan 06, 2026 at 09:50:00AM -0500, Michael S. Tsirkin wrote:
> On Mon, Jan 05, 2026 at 01:19:39PM -0500, Stefan Hajnoczi wrote:
> > On Mon, Jan 05, 2026 at 03:23:29AM -0500, Michael S. Tsirkin wrote:
> > > @@ -61,7 +62,7 @@ struct virtio_scsi_cmd {
> > >  
> > >  struct virtio_scsi_event_node {
> > >  	struct virtio_scsi *vscsi;
> > > -	struct virtio_scsi_event event;
> > > +	struct virtio_scsi_event *event;
> > >  	struct work_struct work;
> > >  };
> > >  
> > > @@ -89,6 +90,11 @@ struct virtio_scsi {
> > >  
> > >  	struct virtio_scsi_vq ctrl_vq;
> > >  	struct virtio_scsi_vq event_vq;
> > > +
> > > +	__dma_from_device_group_begin();
> > > +	struct virtio_scsi_event events[VIRTIO_SCSI_EVENT_LEN];
> > > +	__dma_from_device_group_end();
> > 
> > If the device emits two events in rapid succession, could the CPU see
> > stale data for the second event because it already holds the cache line
> > for reading the first event?
> 
> No because virtio does unmap and syncs the cache line.
> 
> In other words, CPU reads cause no issues.
> 
> The issues are exclusively around CPU writes dirtying the
> cache and writeback overwriting DMA data.

I see. In that case I'm happy with the virtio-scsi change:

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>