[PATCH] virtio_ring: Add READ_ONCE annotations for device-writable fields

Johannes Thumshirn posted 1 patch 1 week, 2 days ago
There is a newer version of this series
drivers/virtio/virtio_ring.c | 88 ++++++++++++++++++++++++++++++------
1 file changed, 73 insertions(+), 15 deletions(-)
[PATCH] virtio_ring: Add READ_ONCE annotations for device-writable fields
Posted by Johannes Thumshirn 1 week, 2 days ago
From: Alexander Graf <graf@amazon.com>

KCSAN reports data races when accessing virtio ring fields that are
concurrently written by the device (host). These are legitimate
concurrent accesses where the CPU reads fields that the device updates
via DMA-like mechanisms.

Add accessor functions that use READ_ONCE() to properly annotate these
device-writable fields and prevent compiler optimizations that could
break the code. This also serves as documentation showing which fields
are shared with the device.

The affected fields are:
- Split ring: used->idx, used->ring[].id, used->ring[].len
- Packed ring: desc[].flags, desc[].id, desc[].len

Reported-by: Kernel Concurrency Sanitizer (KCSAN)
Signed-off-by: Alexander Graf <graf@amazon.com>
[jth: Add READ_ONCE in virtqueue_kick_prepare_split ]
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 drivers/virtio/virtio_ring.c | 88 ++++++++++++++++++++++++++++++------
 1 file changed, 73 insertions(+), 15 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index ddab68959671..74957c83e138 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -222,6 +222,63 @@ struct vring_virtqueue {
 #endif
 };
 
+/*
+ * Accessors for device-writable fields in virtio rings.
+ * These fields are concurrently written by the device and read by the driver.
+ * Use READ_ONCE() to prevent compiler optimizations and document the
+ * intentional data race.
+ */
+
+/* Split ring: read device-written fields from used ring */
+static inline u16 vring_used_idx_read(const struct vring_virtqueue *vq)
+{
+	return virtio16_to_cpu(vq->vq.vdev,
+			       READ_ONCE(vq->split.vring.used->idx));
+}
+
+static inline u32 vring_used_id_read(const struct vring_virtqueue *vq,
+				     u16 idx)
+{
+	return virtio32_to_cpu(vq->vq.vdev,
+			       READ_ONCE(vq->split.vring.used->ring[idx].id));
+}
+
+static inline u32 vring_used_len_read(const struct vring_virtqueue *vq,
+				      u16 idx)
+{
+	return virtio32_to_cpu(vq->vq.vdev,
+			       READ_ONCE(vq->split.vring.used->ring[idx].len));
+}
+
+/* Packed ring: read device-written fields from descriptors */
+static inline u16 vring_packed_desc_flags_read(const struct vring_virtqueue *vq,
+					       u16 idx)
+{
+	return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].flags));
+}
+
+static inline u16 vring_packed_desc_id_read(const struct vring_virtqueue *vq,
+					    u16 idx)
+{
+	return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].id));
+}
+
+static inline u32 vring_packed_desc_len_read(const struct vring_virtqueue *vq,
+					     u16 idx)
+{
+	return le32_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].len));
+}
+
+/*
+ * Note: We don't need READ_ONCE for driver->device fields like:
+ * - split.vring.avail->idx (driver writes, device reads)
+ * - packed.vring.desc[].addr (driver writes, device reads)
+ * These are written by the driver and only read by the device, so the
+ * driver can safely access them without READ_ONCE. The device must use
+ * appropriate barriers on its side.
+ */
+
+
 static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
 static void vring_free(struct virtqueue *_vq);
 
@@ -736,9 +793,10 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
 	LAST_ADD_TIME_INVALID(vq);
 
 	if (vq->event) {
-		needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
-					vring_avail_event(&vq->split.vring)),
-					      new, old);
+		u16 event = virtio16_to_cpu(_vq->vdev,
+				READ_ONCE(vring_avail_event(&vq->split.vring)));
+
+		needs_kick = vring_need_event(event, new, old);
 	} else {
 		needs_kick = !(vq->split.vring.used->flags &
 					cpu_to_virtio16(_vq->vdev,
@@ -808,8 +866,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
 
 static bool more_used_split(const struct vring_virtqueue *vq)
 {
-	return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
-			vq->split.vring.used->idx);
+	return vq->last_used_idx != vring_used_idx_read(vq);
 }
 
 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
@@ -838,10 +895,8 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 	virtio_rmb(vq->weak_barriers);
 
 	last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
-	i = virtio32_to_cpu(_vq->vdev,
-			vq->split.vring.used->ring[last_used].id);
-	*len = virtio32_to_cpu(_vq->vdev,
-			vq->split.vring.used->ring[last_used].len);
+	i = vring_used_id_read(vq, last_used);
+	*len = vring_used_len_read(vq, last_used);
 
 	if (unlikely(i >= vq->split.vring.num)) {
 		BAD_RING(vq, "id %u out of range\n", i);
@@ -923,8 +978,7 @@ static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_i
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
-			vq->split.vring.used->idx);
+	return (u16)last_used_idx != vring_used_idx_read(vq);
 }
 
 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
@@ -1701,10 +1755,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
 				       u16 idx, bool used_wrap_counter)
 {
-	bool avail, used;
 	u16 flags;
+	bool avail, used;
 
-	flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
+	flags = vring_packed_desc_flags_read(vq, idx);
 	avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
 	used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
 
@@ -1751,8 +1805,8 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
 	last_used_idx = READ_ONCE(vq->last_used_idx);
 	used_wrap_counter = packed_used_wrap_counter(last_used_idx);
 	last_used = packed_last_used(last_used_idx);
-	id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
-	*len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
+	id = vring_packed_desc_id_read(vq, last_used);
+	*len = vring_packed_desc_len_read(vq, last_used);
 
 	if (unlikely(id >= vq->packed.vring.num)) {
 		BAD_RING(vq, "id %u out of range\n", id);
@@ -1850,6 +1904,10 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
 	bool wrap_counter;
 	u16 used_idx;
 
+	/*
+	 * Note: off_wrap is from virtqueue_enable_cb_prepare_packed() which
+	 * already used READ_ONCE on vq->last_used_idx, so we don't need it again.
+	 */
 	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
 	used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
 
-- 
2.52.0
Re: [PATCH] virtio_ring: Add READ_ONCE annotations for device-writable fields
Posted by Michael S. Tsirkin 1 week, 2 days ago
On Wed, Jan 28, 2026 at 02:59:46PM +0100, Johannes Thumshirn wrote:
> From: Alexander Graf <graf@amazon.com>
> 
> KCSAN reports data races when accessing virtio ring fields that are
> concurrently written by the device (host). These are legitimate
> concurrent accesses where the CPU reads fields that the device updates
> via DMA-like mechanisms.
> 
> Add accessor functions that use READ_ONCE() to properly annotate these
> device-writable fields and prevent compiler optimizations that could

let's add "in theory" here

> break the code. This also serves as documentation showing which fields
> are shared with the device.
> 
> The affected fields are:
> - Split ring: used->idx, used->ring[].id, used->ring[].len
> - Packed ring: desc[].flags, desc[].id, desc[].len

I would add here: using WRITE_ONCE for driver writes into fields
isn't done here as it does not currently trigger warnings.


> 
> Reported-by: Kernel Concurrency Sanitizer (KCSAN)
> Signed-off-by: Alexander Graf <graf@amazon.com>
> [jth: Add READ_ONCE in virtqueue_kick_prepare_split ]
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

Looks good to me. Yet something to improve:

> ---
>  drivers/virtio/virtio_ring.c | 88 ++++++++++++++++++++++++++++++------
>  1 file changed, 73 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index ddab68959671..74957c83e138 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -222,6 +222,63 @@ struct vring_virtqueue {
>  #endif
>  };
>  
> +/*
> + * Accessors for device-writable fields in virtio rings.
> + * These fields are concurrently written by the device and read by the driver.
> + * Use READ_ONCE() to prevent compiler optimizations and document the
> + * intentional data race.

... and prevent KCSAN warnings

> + */
> +
> +/* Split ring: read device-written fields from used ring */
> +static inline u16 vring_used_idx_read(const struct vring_virtqueue *vq)

I'd prefer a consistent prefix. Maybe:

vring_read_used_idx

and so on.




> +{
> +	return virtio16_to_cpu(vq->vq.vdev,
> +			       READ_ONCE(vq->split.vring.used->idx));
> +}
> +
> +static inline u32 vring_used_id_read(const struct vring_virtqueue *vq,
> +				     u16 idx)
> +{
> +	return virtio32_to_cpu(vq->vq.vdev,
> +			       READ_ONCE(vq->split.vring.used->ring[idx].id));
> +}
> +
> +static inline u32 vring_used_len_read(const struct vring_virtqueue *vq,
> +				      u16 idx)
> +{
> +	return virtio32_to_cpu(vq->vq.vdev,
> +			       READ_ONCE(vq->split.vring.used->ring[idx].len));
> +}
> +
> +/* Packed ring: read device-written fields from descriptors */
> +static inline u16 vring_packed_desc_flags_read(const struct vring_virtqueue *vq,
> +					       u16 idx)
> +{
> +	return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].flags));
> +}
> +
> +static inline u16 vring_packed_desc_id_read(const struct vring_virtqueue *vq,
> +					    u16 idx)
> +{
> +	return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].id));
> +}
> +
> +static inline u32 vring_packed_desc_len_read(const struct vring_virtqueue *vq,
> +					     u16 idx)
> +{
> +	return le32_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].len));
> +}
> +
> +/*
> + * Note: We don't need READ_ONCE for driver->device fields like:
> + * - split.vring.avail->idx (driver writes, device reads)
> + * - packed.vring.desc[].addr (driver writes, device reads)
> + * These are written by the driver and only read by the device, so the
> + * driver can safely access them

read them, really.

> without READ_ONCE.

... and without triggering KCSAM warnings.

> The device must use
> + * appropriate barriers on its side.
> + */
> +
> +

extra empty line here.

>  static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
>  static void vring_free(struct virtqueue *_vq);
>  
> @@ -736,9 +793,10 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
>  	LAST_ADD_TIME_INVALID(vq);
>  
>  	if (vq->event) {
> -		needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
> -					vring_avail_event(&vq->split.vring)),
> -					      new, old);
> +		u16 event = virtio16_to_cpu(_vq->vdev,
> +				READ_ONCE(vring_avail_event(&vq->split.vring)));

why not wrap this one, too?

> +
> +		needs_kick = vring_need_event(event, new, old);
>  	} else {
>  		needs_kick = !(vq->split.vring.used->flags &
>  					cpu_to_virtio16(_vq->vdev,
> @@ -808,8 +866,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
>  
>  static bool more_used_split(const struct vring_virtqueue *vq)
>  {
> -	return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
> -			vq->split.vring.used->idx);
> +	return vq->last_used_idx != vring_used_idx_read(vq);
>  }
>  
>  static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> @@ -838,10 +895,8 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>  	virtio_rmb(vq->weak_barriers);
>  
>  	last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
> -	i = virtio32_to_cpu(_vq->vdev,
> -			vq->split.vring.used->ring[last_used].id);
> -	*len = virtio32_to_cpu(_vq->vdev,
> -			vq->split.vring.used->ring[last_used].len);
> +	i = vring_used_id_read(vq, last_used);
> +	*len = vring_used_len_read(vq, last_used);
>  
>  	if (unlikely(i >= vq->split.vring.num)) {
>  		BAD_RING(vq, "id %u out of range\n", i);
> @@ -923,8 +978,7 @@ static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_i
>  {
>  	struct vring_virtqueue *vq = to_vvq(_vq);
>  
> -	return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
> -			vq->split.vring.used->idx);
> +	return (u16)last_used_idx != vring_used_idx_read(vq);
>  }
>  
>  static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
> @@ -1701,10 +1755,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
>  static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
>  				       u16 idx, bool used_wrap_counter)
>  {
> -	bool avail, used;
>  	u16 flags;
> +	bool avail, used;
>  
> -	flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
> +	flags = vring_packed_desc_flags_read(vq, idx);
>  	avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
>  	used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
>  
> @@ -1751,8 +1805,8 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
>  	last_used_idx = READ_ONCE(vq->last_used_idx);
>  	used_wrap_counter = packed_used_wrap_counter(last_used_idx);
>  	last_used = packed_last_used(last_used_idx);
> -	id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
> -	*len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
> +	id = vring_packed_desc_id_read(vq, last_used);
> +	*len = vring_packed_desc_len_read(vq, last_used);
>  
>  	if (unlikely(id >= vq->packed.vring.num)) {
>  		BAD_RING(vq, "id %u out of range\n", id);
> @@ -1850,6 +1904,10 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
>  	bool wrap_counter;
>  	u16 used_idx;
>  
> +	/*
> +	 * Note: off_wrap is from virtqueue_enable_cb_prepare_packed() which
> +	 * already used READ_ONCE on vq->last_used_idx, so we don't need it again.

we don't need what again? off_wrap is a local variable.

> +	 */
>  	wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
>  	used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
>  
> -- 
> 2.52.0
Re: [PATCH] virtio_ring: Add READ_ONCE annotations for device-writable fields
Posted by Alexander Graf 1 week, 2 days ago
On 28.01.26 14:59, Johannes Thumshirn wrote:
> From: Alexander Graf <graf@amazon.com>
>
> KCSAN reports data races when accessing virtio ring fields that are
> concurrently written by the device (host). These are legitimate
> concurrent accesses where the CPU reads fields that the device updates
> via DMA-like mechanisms.
>
> Add accessor functions that use READ_ONCE() to properly annotate these
> device-writable fields and prevent compiler optimizations that could
> break the code. This also serves as documentation showing which fields
> are shared with the device.
>
> The affected fields are:
> - Split ring: used->idx, used->ring[].id, used->ring[].len
> - Packed ring: desc[].flags, desc[].id, desc[].len
>
> Reported-by: Kernel Concurrency Sanitizer (KCSAN)
> Signed-off-by: Alexander Graf <graf@amazon.com>


Thanks for persistently trying to fix these KCSAN warnings! :)

This patch was an initial AI generated stab at seeing whether READ_ONCE 
would work and how to make it pretty. It was not meant to go to the 
mailing list as is. Some comments on what we would need to improve to 
bring it to a mergeable state.


Given this is not a subsystem-contributor relationship, I also think it 
would be Co-developed-by instead of signed-off-by :).

> [jth: Add READ_ONCE in virtqueue_kick_prepare_split ]
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>   drivers/virtio/virtio_ring.c | 88 ++++++++++++++++++++++++++++++------
>   1 file changed, 73 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index ddab68959671..74957c83e138 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -222,6 +222,63 @@ struct vring_virtqueue {
>   #endif
>   };
>
> +/*
> + * Accessors for device-writable fields in virtio rings.
> + * These fields are concurrently written by the device and read by the driver.
> + * Use READ_ONCE() to prevent compiler optimizations and document the
> + * intentional data race.


Should mention that this is necessary for KCSAN.


> + */
> +
> +/* Split ring: read device-written fields from used ring */


Useless comment


> +static inline u16 vring_used_idx_read(const struct vring_virtqueue *vq)


Just do a complete sed s/_read// on this patch. Nobody needs these _read 
suffixes.


> +{
> +       return virtio16_to_cpu(vq->vq.vdev,
> +                              READ_ONCE(vq->split.vring.used->idx));
> +}
> +
> +static inline u32 vring_used_id_read(const struct vring_virtqueue *vq,
> +                                    u16 idx)
> +{
> +       return virtio32_to_cpu(vq->vq.vdev,
> +                              READ_ONCE(vq->split.vring.used->ring[idx].id));
> +}
> +
> +static inline u32 vring_used_len_read(const struct vring_virtqueue *vq,
> +                                     u16 idx)
> +{
> +       return virtio32_to_cpu(vq->vq.vdev,
> +                              READ_ONCE(vq->split.vring.used->ring[idx].len));
> +}
> +
> +/* Packed ring: read device-written fields from descriptors */


Useless comment


> +static inline u16 vring_packed_desc_flags_read(const struct vring_virtqueue *vq,
> +                                              u16 idx)
> +{
> +       return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].flags));
> +}
> +
> +static inline u16 vring_packed_desc_id_read(const struct vring_virtqueue *vq,
> +                                           u16 idx)
> +{
> +       return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].id));
> +}
> +
> +static inline u32 vring_packed_desc_len_read(const struct vring_virtqueue *vq,
> +                                            u16 idx)
> +{
> +       return le32_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].len));
> +}
> +
> +/*
> + * Note: We don't need READ_ONCE for driver->device fields like:
> + * - split.vring.avail->idx (driver writes, device reads)
> + * - packed.vring.desc[].addr (driver writes, device reads)
> + * These are written by the driver and only read by the device, so the
> + * driver can safely access them without READ_ONCE. The device must use
> + * appropriate barriers on its side.
> + */


Useless comment really. If you think it's worthwhile to mention the 
above, put it into the patch description.


> +
> +
>   static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
>   static void vring_free(struct virtqueue *_vq);
>
> @@ -736,9 +793,10 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
>          LAST_ADD_TIME_INVALID(vq);
>
>          if (vq->event) {
> -               needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
> -                                       vring_avail_event(&vq->split.vring)),
> -                                             new, old);
> +               u16 event = virtio16_to_cpu(_vq->vdev,
> +                               READ_ONCE(vring_avail_event(&vq->split.vring)));
> +
> +               needs_kick = vring_need_event(event, new, old);
>          } else {
>                  needs_kick = !(vq->split.vring.used->flags &
>                                          cpu_to_virtio16(_vq->vdev,
> @@ -808,8 +866,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
>
>   static bool more_used_split(const struct vring_virtqueue *vq)
>   {
> -       return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
> -                       vq->split.vring.used->idx);
> +       return vq->last_used_idx != vring_used_idx_read(vq);
>   }
>
>   static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> @@ -838,10 +895,8 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>          virtio_rmb(vq->weak_barriers);
>
>          last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
> -       i = virtio32_to_cpu(_vq->vdev,
> -                       vq->split.vring.used->ring[last_used].id);
> -       *len = virtio32_to_cpu(_vq->vdev,
> -                       vq->split.vring.used->ring[last_used].len);
> +       i = vring_used_id_read(vq, last_used);
> +       *len = vring_used_len_read(vq, last_used);
>
>          if (unlikely(i >= vq->split.vring.num)) {
>                  BAD_RING(vq, "id %u out of range\n", i);
> @@ -923,8 +978,7 @@ static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_i
>   {
>          struct vring_virtqueue *vq = to_vvq(_vq);
>
> -       return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
> -                       vq->split.vring.used->idx);
> +       return (u16)last_used_idx != vring_used_idx_read(vq);
>   }
>
>   static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
> @@ -1701,10 +1755,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
>   static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
>                                         u16 idx, bool used_wrap_counter)
>   {
> -       bool avail, used;
>          u16 flags;
> +       bool avail, used;
>
> -       flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
> +       flags = vring_packed_desc_flags_read(vq, idx);
>          avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
>          used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
>
> @@ -1751,8 +1805,8 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
>          last_used_idx = READ_ONCE(vq->last_used_idx);
>          used_wrap_counter = packed_used_wrap_counter(last_used_idx);
>          last_used = packed_last_used(last_used_idx);
> -       id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
> -       *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
> +       id = vring_packed_desc_id_read(vq, last_used);
> +       *len = vring_packed_desc_len_read(vq, last_used);
>
>          if (unlikely(id >= vq->packed.vring.num)) {
>                  BAD_RING(vq, "id %u out of range\n", id);
> @@ -1850,6 +1904,10 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
>          bool wrap_counter;
>          u16 used_idx;
>
> +       /*
> +        * Note: off_wrap is from virtqueue_enable_cb_prepare_packed() which
> +        * already used READ_ONCE on vq->last_used_idx, so we don't need it again.
> +        */


On its own in this code base 5 years from now, this comment will be 
super confusing. Because nobody has context what this note is about. I'd 
say just remove it.


Alex


>          wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
>          used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
>
> --
> 2.52.0
>



Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christof Hellmis, Andreas Stieger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
Re: [PATCH] virtio_ring: Add READ_ONCE annotations for device-writable fields
Posted by Michael S. Tsirkin 1 week, 2 days ago
On Wed, Jan 28, 2026 at 03:47:27PM +0100, Alexander Graf wrote:
> 
> On 28.01.26 14:59, Johannes Thumshirn wrote:
> > From: Alexander Graf <graf@amazon.com>
> > 
> > KCSAN reports data races when accessing virtio ring fields that are
> > concurrently written by the device (host). These are legitimate
> > concurrent accesses where the CPU reads fields that the device updates
> > via DMA-like mechanisms.
> > 
> > Add accessor functions that use READ_ONCE() to properly annotate these
> > device-writable fields and prevent compiler optimizations that could
> > break the code. This also serves as documentation showing which fields
> > are shared with the device.
> > 
> > The affected fields are:
> > - Split ring: used->idx, used->ring[].id, used->ring[].len
> > - Packed ring: desc[].flags, desc[].id, desc[].len
> > 
> > Reported-by: Kernel Concurrency Sanitizer (KCSAN)
> > Signed-off-by: Alexander Graf <graf@amazon.com>
> 
> 
> Thanks for persistently trying to fix these KCSAN warnings! :)
> 
> This patch was an initial AI generated stab at seeing whether READ_ONCE
> would work and how to make it pretty. It was not meant to go to the mailing
> list as is. Some comments on what we would need to improve to bring it to a
> mergeable state.


According to latest Docs, use of AI should be documented.
Just a statement to this end is probably going to be enough.

> 
> Given this is not a subsystem-contributor relationship, I also think it
> would be Co-developed-by instead of signed-off-by :).
> 
> > [jth: Add READ_ONCE in virtqueue_kick_prepare_split ]
> > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> > ---
> >   drivers/virtio/virtio_ring.c | 88 ++++++++++++++++++++++++++++++------
> >   1 file changed, 73 insertions(+), 15 deletions(-)
> > 
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index ddab68959671..74957c83e138 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -222,6 +222,63 @@ struct vring_virtqueue {
> >   #endif
> >   };
> > 
> > +/*
> > + * Accessors for device-writable fields in virtio rings.
> > + * These fields are concurrently written by the device and read by the driver.
> > + * Use READ_ONCE() to prevent compiler optimizations and document the
> > + * intentional data race.
> 
> 
> Should mention that this is necessary for KCSAN.
> 
> 
> > + */
> > +
> > +/* Split ring: read device-written fields from used ring */
> 
> 
> Useless comment
> 
> 
> > +static inline u16 vring_used_idx_read(const struct vring_virtqueue *vq)
> 
> 
> Just do a complete sed s/_read// on this patch. Nobody needs these _read
> suffixes.

That's fine, too.

> 
> > +{
> > +       return virtio16_to_cpu(vq->vq.vdev,
> > +                              READ_ONCE(vq->split.vring.used->idx));
> > +}
> > +
> > +static inline u32 vring_used_id_read(const struct vring_virtqueue *vq,
> > +                                    u16 idx)
> > +{
> > +       return virtio32_to_cpu(vq->vq.vdev,
> > +                              READ_ONCE(vq->split.vring.used->ring[idx].id));
> > +}
> > +
> > +static inline u32 vring_used_len_read(const struct vring_virtqueue *vq,
> > +                                     u16 idx)
> > +{
> > +       return virtio32_to_cpu(vq->vq.vdev,
> > +                              READ_ONCE(vq->split.vring.used->ring[idx].len));
> > +}
> > +
> > +/* Packed ring: read device-written fields from descriptors */
> 
> 
> Useless comment
> 
> 
> > +static inline u16 vring_packed_desc_flags_read(const struct vring_virtqueue *vq,
> > +                                              u16 idx)
> > +{
> > +       return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].flags));
> > +}
> > +
> > +static inline u16 vring_packed_desc_id_read(const struct vring_virtqueue *vq,
> > +                                           u16 idx)
> > +{
> > +       return le16_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].id));
> > +}
> > +
> > +static inline u32 vring_packed_desc_len_read(const struct vring_virtqueue *vq,
> > +                                            u16 idx)
> > +{
> > +       return le32_to_cpu(READ_ONCE(vq->packed.vring.desc[idx].len));
> > +}
> > +
> > +/*
> > + * Note: We don't need READ_ONCE for driver->device fields like:
> > + * - split.vring.avail->idx (driver writes, device reads)
> > + * - packed.vring.desc[].addr (driver writes, device reads)
> > + * These are written by the driver and only read by the device, so the
> > + * driver can safely access them without READ_ONCE. The device must use
> > + * appropriate barriers on its side.
> > + */
> 
> 
> Useless comment really. If you think it's worthwhile to mention the above,
> put it into the patch description.
> 
> 
> > +
> > +
> >   static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
> >   static void vring_free(struct virtqueue *_vq);
> > 
> > @@ -736,9 +793,10 @@ static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
> >          LAST_ADD_TIME_INVALID(vq);
> > 
> >          if (vq->event) {
> > -               needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
> > -                                       vring_avail_event(&vq->split.vring)),
> > -                                             new, old);
> > +               u16 event = virtio16_to_cpu(_vq->vdev,
> > +                               READ_ONCE(vring_avail_event(&vq->split.vring)));
> > +
> > +               needs_kick = vring_need_event(event, new, old);
> >          } else {
> >                  needs_kick = !(vq->split.vring.used->flags &
> >                                          cpu_to_virtio16(_vq->vdev,
> > @@ -808,8 +866,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> > 
> >   static bool more_used_split(const struct vring_virtqueue *vq)
> >   {
> > -       return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
> > -                       vq->split.vring.used->idx);
> > +       return vq->last_used_idx != vring_used_idx_read(vq);
> >   }
> > 
> >   static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > @@ -838,10 +895,8 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> >          virtio_rmb(vq->weak_barriers);
> > 
> >          last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
> > -       i = virtio32_to_cpu(_vq->vdev,
> > -                       vq->split.vring.used->ring[last_used].id);
> > -       *len = virtio32_to_cpu(_vq->vdev,
> > -                       vq->split.vring.used->ring[last_used].len);
> > +       i = vring_used_id_read(vq, last_used);
> > +       *len = vring_used_len_read(vq, last_used);
> > 
> >          if (unlikely(i >= vq->split.vring.num)) {
> >                  BAD_RING(vq, "id %u out of range\n", i);
> > @@ -923,8 +978,7 @@ static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_i
> >   {
> >          struct vring_virtqueue *vq = to_vvq(_vq);
> > 
> > -       return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
> > -                       vq->split.vring.used->idx);
> > +       return (u16)last_used_idx != vring_used_idx_read(vq);
> >   }
> > 
> >   static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
> > @@ -1701,10 +1755,10 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
> >   static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
> >                                         u16 idx, bool used_wrap_counter)
> >   {
> > -       bool avail, used;
> >          u16 flags;
> > +       bool avail, used;
> > 
> > -       flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
> > +       flags = vring_packed_desc_flags_read(vq, idx);
> >          avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
> >          used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
> > 
> > @@ -1751,8 +1805,8 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> >          last_used_idx = READ_ONCE(vq->last_used_idx);
> >          used_wrap_counter = packed_used_wrap_counter(last_used_idx);
> >          last_used = packed_last_used(last_used_idx);
> > -       id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
> > -       *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
> > +       id = vring_packed_desc_id_read(vq, last_used);
> > +       *len = vring_packed_desc_len_read(vq, last_used);
> > 
> >          if (unlikely(id >= vq->packed.vring.num)) {
> >                  BAD_RING(vq, "id %u out of range\n", id);
> > @@ -1850,6 +1904,10 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
> >          bool wrap_counter;
> >          u16 used_idx;
> > 
> > +       /*
> > +        * Note: off_wrap is from virtqueue_enable_cb_prepare_packed() which
> > +        * already used READ_ONCE on vq->last_used_idx, so we don't need it again.
> > +        */
> 
> 
> On its own in this code base 5 years from now, this comment will be super
> confusing. Because nobody has context what this note is about. I'd say just
> remove it.
> 
> 
> Alex
> 
> 
> >          wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
> >          used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
> > 
> > --
> > 2.52.0
> > 
> 
> 
> 
> Amazon Web Services Development Center Germany GmbH
> Tamara-Danz-Str. 13
> 10243 Berlin
> Geschaeftsfuehrung: Christof Hellmis, Andreas Stieger
> Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
> Sitz: Berlin
> Ust-ID: DE 365 538 597