When a structure contains a buffer that DMA writes to alongside fields
that the CPU writes to, cache line sharing between the DMA buffer and
CPU-written fields can cause data corruption on non-cache-coherent
platforms.
Add __dma_from_device_aligned_begin/__dma_from_device_aligned_end
annotations to ensure proper alignment to prevent this:
struct my_device {
spinlock_t lock1;
__dma_from_device_aligned_begin char dma_buffer1[16];
char dma_buffer2[16];
__dma_from_device_aligned_end spinlock_t lock2;
};
When the DMA buffer is the last field in the structure, just
__dma_from_device_aligned_begin is enough - the compiler's struct
padding protects the tail:
struct my_device {
spinlock_t lock;
struct mutex mlock;
__dma_from_device_aligned_begin char dma_buffer1[16];
char dma_buffer2[16];
};
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/linux/dma-mapping.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index aa36a0d1d9df..47b7de3786a1 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -703,6 +703,16 @@ static inline int dma_get_cache_alignment(void)
}
#endif
+#ifdef ARCH_HAS_DMA_MINALIGN
+#define ____dma_from_device_aligned __aligned(ARCH_DMA_MINALIGN)
+#else
+#define ____dma_from_device_aligned
+#endif
+/* Apply to the 1st field of the DMA buffer */
+#define __dma_from_device_aligned_begin ____dma_from_device_aligned
+/* Apply to the 1st field beyond the DMA buffer */
+#define __dma_from_device_aligned_end ____dma_from_device_aligned
+
static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp)
{
--
MST
On Tue, 30 Dec 2025 05:15:46 -0500
"Michael S. Tsirkin" <mst@redhat.com> wrote:
> When a structure contains a buffer that DMA writes to alongside fields
> that the CPU writes to, cache line sharing between the DMA buffer and
> CPU-written fields can cause data corruption on non-cache-coherent
> platforms.
>
> Add __dma_from_device_aligned_begin/__dma_from_device_aligned_end
> annotations to ensure proper alignment to prevent this:
>
> struct my_device {
> spinlock_t lock1;
> __dma_from_device_aligned_begin char dma_buffer1[16];
> char dma_buffer2[16];
> __dma_from_device_aligned_end spinlock_t lock2;
> };
>
> When the DMA buffer is the last field in the structure, just
> __dma_from_device_aligned_begin is enough - the compiler's struct
> padding protects the tail:
>
> struct my_device {
> spinlock_t lock;
> struct mutex mlock;
> __dma_from_device_aligned_begin char dma_buffer1[16];
> char dma_buffer2[16];
> };
This works, but it's a bit hard to read. Can we reuse the
__cacheline_group_{begin, end}() macros from <linux/cache.h>?
Something like this:
#define __dma_from_device_group_begin(GROUP) \
__cacheline_group_begin(GROUP) \
____dma_from_device_aligned
#define __dma_from_device_group_end(GROUP) \
__cacheline_group_end(GROUP) \
____dma_from_device_aligned
And used like this (the "rxbuf" group id was chosen arbitrarily):
struct my_device {
spinlock_t lock1;
__dma_from_device_group_begin(rxbuf);
char dma_buffer1[16];
char dma_buffer2[16];
__dma_from_device_group_end(rxbuf);
spinlock_t lock2;
};
Petr T
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> include/linux/dma-mapping.h | 10 ++++++++++
> 1 file changed, 10 insertions(+)
>
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index aa36a0d1d9df..47b7de3786a1 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -703,6 +703,16 @@ static inline int dma_get_cache_alignment(void)
> }
> #endif
>
> +#ifdef ARCH_HAS_DMA_MINALIGN
> +#define ____dma_from_device_aligned __aligned(ARCH_DMA_MINALIGN)
> +#else
> +#define ____dma_from_device_aligned
> +#endif
> +/* Apply to the 1st field of the DMA buffer */
> +#define __dma_from_device_aligned_begin ____dma_from_device_aligned
> +/* Apply to the 1st field beyond the DMA buffer */
> +#define __dma_from_device_aligned_end ____dma_from_device_aligned
> +
> static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
> dma_addr_t *dma_handle, gfp_t gfp)
> {
On Wed, Dec 31, 2025 at 03:01:59PM +0100, Petr Tesarik wrote:
> On Tue, 30 Dec 2025 05:15:46 -0500
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
>
> > When a structure contains a buffer that DMA writes to alongside fields
> > that the CPU writes to, cache line sharing between the DMA buffer and
> > CPU-written fields can cause data corruption on non-cache-coherent
> > platforms.
> >
> > Add __dma_from_device_aligned_begin/__dma_from_device_aligned_end
> > annotations to ensure proper alignment to prevent this:
> >
> > struct my_device {
> > spinlock_t lock1;
> > __dma_from_device_aligned_begin char dma_buffer1[16];
> > char dma_buffer2[16];
> > __dma_from_device_aligned_end spinlock_t lock2;
> > };
> >
> > When the DMA buffer is the last field in the structure, just
> > __dma_from_device_aligned_begin is enough - the compiler's struct
> > padding protects the tail:
> >
> > struct my_device {
> > spinlock_t lock;
> > struct mutex mlock;
> > __dma_from_device_aligned_begin char dma_buffer1[16];
> > char dma_buffer2[16];
> > };
>
> This works, but it's a bit hard to read. Can we reuse the
> __cacheline_group_{begin, end}() macros from <linux/cache.h>?
> Something like this:
>
> #define __dma_from_device_group_begin(GROUP) \
> __cacheline_group_begin(GROUP) \
> ____dma_from_device_aligned
> #define __dma_from_device_group_end(GROUP) \
> __cacheline_group_end(GROUP) \
> ____dma_from_device_aligned
>
> And used like this (the "rxbuf" group id was chosen arbitrarily):
>
> struct my_device {
> spinlock_t lock1;
> __dma_from_device_group_begin(rxbuf);
> char dma_buffer1[16];
> char dma_buffer2[16];
> __dma_from_device_group_end(rxbuf);
> spinlock_t lock2;
> };
>
> Petr T
Made this change, and pushed out to my tree.
I'll post the new version in a couple of days, if no other issues
surface.
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > include/linux/dma-mapping.h | 10 ++++++++++
> > 1 file changed, 10 insertions(+)
> >
> > diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> > index aa36a0d1d9df..47b7de3786a1 100644
> > --- a/include/linux/dma-mapping.h
> > +++ b/include/linux/dma-mapping.h
> > @@ -703,6 +703,16 @@ static inline int dma_get_cache_alignment(void)
> > }
> > #endif
> >
> > +#ifdef ARCH_HAS_DMA_MINALIGN
> > +#define ____dma_from_device_aligned __aligned(ARCH_DMA_MINALIGN)
> > +#else
> > +#define ____dma_from_device_aligned
> > +#endif
> > +/* Apply to the 1st field of the DMA buffer */
> > +#define __dma_from_device_aligned_begin ____dma_from_device_aligned
> > +/* Apply to the 1st field beyond the DMA buffer */
> > +#define __dma_from_device_aligned_end ____dma_from_device_aligned
> > +
> > static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
> > dma_addr_t *dma_handle, gfp_t gfp)
> > {
On Wed, 31 Dec 2025 15:48:26 -0500
"Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Wed, Dec 31, 2025 at 03:01:59PM +0100, Petr Tesarik wrote:
> > On Tue, 30 Dec 2025 05:15:46 -0500
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >
> > > When a structure contains a buffer that DMA writes to alongside fields
> > > that the CPU writes to, cache line sharing between the DMA buffer and
> > > CPU-written fields can cause data corruption on non-cache-coherent
> > > platforms.
> > >
> > > Add __dma_from_device_aligned_begin/__dma_from_device_aligned_end
> > > annotations to ensure proper alignment to prevent this:
> > >
> > > struct my_device {
> > > spinlock_t lock1;
> > > __dma_from_device_aligned_begin char dma_buffer1[16];
> > > char dma_buffer2[16];
> > > __dma_from_device_aligned_end spinlock_t lock2;
> > > };
> > >
> > > When the DMA buffer is the last field in the structure, just
> > > __dma_from_device_aligned_begin is enough - the compiler's struct
> > > padding protects the tail:
> > >
> > > struct my_device {
> > > spinlock_t lock;
> > > struct mutex mlock;
> > > __dma_from_device_aligned_begin char dma_buffer1[16];
> > > char dma_buffer2[16];
> > > };
> >
> > This works, but it's a bit hard to read. Can we reuse the
> > __cacheline_group_{begin, end}() macros from <linux/cache.h>?
> > Something like this:
> >
> > #define __dma_from_device_group_begin(GROUP) \
> > __cacheline_group_begin(GROUP) \
> > ____dma_from_device_aligned
> > #define __dma_from_device_group_end(GROUP) \
> > __cacheline_group_end(GROUP) \
> > ____dma_from_device_aligned
> >
> > And used like this (the "rxbuf" group id was chosen arbitrarily):
> >
> > struct my_device {
> > spinlock_t lock1;
> > __dma_from_device_group_begin(rxbuf);
> > char dma_buffer1[16];
> > char dma_buffer2[16];
> > __dma_from_device_group_end(rxbuf);
> > spinlock_t lock2;
> > };
> >
> > Petr T
>
> Made this change, and pushed out to my tree.
>
> I'll post the new version in a couple of days, if no other issues
> surface.
FTR except my (non-critical) suggestions for PATCH 5/13, the updated
series looks good to me.
Thank you!
Petr T
On Wed, Dec 31, 2025 at 03:01:59PM +0100, Petr Tesarik wrote:
> On Tue, 30 Dec 2025 05:15:46 -0500
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
>
> > When a structure contains a buffer that DMA writes to alongside fields
> > that the CPU writes to, cache line sharing between the DMA buffer and
> > CPU-written fields can cause data corruption on non-cache-coherent
> > platforms.
> >
> > Add __dma_from_device_aligned_begin/__dma_from_device_aligned_end
> > annotations to ensure proper alignment to prevent this:
> >
> > struct my_device {
> > spinlock_t lock1;
> > __dma_from_device_aligned_begin char dma_buffer1[16];
> > char dma_buffer2[16];
> > __dma_from_device_aligned_end spinlock_t lock2;
> > };
> >
> > When the DMA buffer is the last field in the structure, just
> > __dma_from_device_aligned_begin is enough - the compiler's struct
> > padding protects the tail:
> >
> > struct my_device {
> > spinlock_t lock;
> > struct mutex mlock;
> > __dma_from_device_aligned_begin char dma_buffer1[16];
> > char dma_buffer2[16];
> > };
>
> This works, but it's a bit hard to read. Can we reuse the
> __cacheline_group_{begin, end}() macros from <linux/cache.h>?
> Something like this:
>
> #define __dma_from_device_group_begin(GROUP) \
> __cacheline_group_begin(GROUP) \
> ____dma_from_device_aligned
> #define __dma_from_device_group_end(GROUP) \
> __cacheline_group_end(GROUP) \
> ____dma_from_device_aligned
>
> And used like this (the "rxbuf" group id was chosen arbitrarily):
>
> struct my_device {
> spinlock_t lock1;
> __dma_from_device_group_begin(rxbuf);
> char dma_buffer1[16];
> char dma_buffer2[16];
> __dma_from_device_group_end(rxbuf);
> spinlock_t lock2;
> };
>
> Petr T
Oh, that's a clever idea!
Will do! And GROUP is optional if there's only one group in a structure.
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > include/linux/dma-mapping.h | 10 ++++++++++
> > 1 file changed, 10 insertions(+)
> >
> > diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> > index aa36a0d1d9df..47b7de3786a1 100644
> > --- a/include/linux/dma-mapping.h
> > +++ b/include/linux/dma-mapping.h
> > @@ -703,6 +703,16 @@ static inline int dma_get_cache_alignment(void)
> > }
> > #endif
> >
> > +#ifdef ARCH_HAS_DMA_MINALIGN
> > +#define ____dma_from_device_aligned __aligned(ARCH_DMA_MINALIGN)
> > +#else
> > +#define ____dma_from_device_aligned
> > +#endif
> > +/* Apply to the 1st field of the DMA buffer */
> > +#define __dma_from_device_aligned_begin ____dma_from_device_aligned
> > +/* Apply to the 1st field beyond the DMA buffer */
> > +#define __dma_from_device_aligned_end ____dma_from_device_aligned
> > +
> > static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
> > dma_addr_t *dma_handle, gfp_t gfp)
> > {
© 2016 - 2026 Red Hat, Inc.