This will add usb_alloc_noncoherent() and usb_free_noncoherent()
functions to support alloc and free buffer in a dma-noncoherent way.
To explicit manage the memory ownership for the kernel and device,
this will also add usb_dma_noncoherent_sync_for_cpu/device() functions
and call it at proper time. The management requires the user save
sg_table returned by usb_alloc_noncoherent() to urb->sgt.
Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
---
drivers/usb/core/hcd.c | 30 ++++++++++++++++
drivers/usb/core/usb.c | 80 ++++++++++++++++++++++++++++++++++++++++++
include/linux/usb.h | 9 +++++
3 files changed, 119 insertions(+)
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index c22de97432a0..5fa00d32afb8 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1496,6 +1496,34 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
}
EXPORT_SYMBOL_GPL(usb_hcd_map_urb_for_dma);
+static void usb_dma_noncoherent_sync_for_cpu(struct usb_hcd *hcd,
+ struct urb *urb)
+{
+ enum dma_data_direction dir;
+
+ if (!urb->sgt)
+ return;
+
+ dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+ invalidate_kernel_vmap_range(urb->transfer_buffer,
+ urb->transfer_buffer_length);
+ dma_sync_sgtable_for_cpu(hcd->self.sysdev, urb->sgt, dir);
+}
+
+static void usb_dma_noncoherent_sync_for_device(struct usb_hcd *hcd,
+ struct urb *urb)
+{
+ enum dma_data_direction dir;
+
+ if (!urb->sgt)
+ return;
+
+ dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+ flush_kernel_vmap_range(urb->transfer_buffer,
+ urb->transfer_buffer_length);
+ dma_sync_sgtable_for_device(hcd->self.sysdev, urb->sgt, dir);
+}
+
/*-------------------------------------------------------------------------*/
/* may be called in any context with a valid urb->dev usecount
@@ -1516,6 +1544,7 @@ int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags)
atomic_inc(&urb->use_count);
atomic_inc(&urb->dev->urbnum);
usbmon_urb_submit(&hcd->self, urb);
+ usb_dma_noncoherent_sync_for_device(hcd, urb);
/* NOTE requirements on root-hub callers (usbfs and the hub
* driver, for now): URBs' urb->transfer_buffer must be
@@ -1632,6 +1661,7 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
status = -EREMOTEIO;
unmap_urb_for_dma(hcd, urb);
+ usb_dma_noncoherent_sync_for_cpu(hcd, urb);
usbmon_urb_complete(&hcd->self, urb, status);
usb_anchor_suspend_wakeups(anchor);
usb_unanchor_urb(urb);
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 118fa4c93a79..b78e61f38d0b 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -1030,6 +1030,86 @@ void usb_free_coherent(struct usb_device *dev, size_t size, void *addr,
}
EXPORT_SYMBOL_GPL(usb_free_coherent);
+/**
+ * usb_alloc_noncoherent - allocate dma-noncoherent buffer for URB_NO_xxx_DMA_MAP
+ * @dev: device the buffer will be used with
+ * @size: requested buffer size
+ * @mem_flags: affect whether allocation may block
+ * @dma: used to return DMA address of buffer
+ * @dir: dma transfer direction
+ * @table: used to return sg_table of allocated memory
+ *
+ * Return: Either null (indicating no buffer could be allocated), or the
+ * cpu-space pointer to a buffer that may be used to perform DMA to the
+ * specified device. Such cpu-space buffers are returned along with the DMA
+ * address (through the pointer provided).
+ *
+ * To explicit manage the memory ownership for the kernel vs the device by
+ * usb core, the user needs save sg_table to urb->sgt. Then usb core will
+ * do dma sync for cpu and device properly.
+ *
+ * When the buffer is no longer used, free it with usb_free_noncoherent().
+ */
+void *usb_alloc_noncoherent(struct usb_device *dev, size_t size,
+ gfp_t mem_flags, dma_addr_t *dma,
+ enum dma_data_direction dir,
+ struct sg_table **table)
+{
+ struct device *dmadev;
+ struct sg_table *sgt;
+ void *buffer;
+
+ if (!dev || !dev->bus)
+ return NULL;
+
+ dmadev = bus_to_hcd(dev->bus)->self.sysdev;
+
+ sgt = dma_alloc_noncontiguous(dmadev, size, dir, mem_flags, 0);
+ if (!sgt)
+ return NULL;
+
+ buffer = dma_vmap_noncontiguous(dmadev, size, sgt);
+ if (!buffer) {
+ dma_free_noncontiguous(dmadev, size, sgt, dir);
+ return NULL;
+ }
+
+ *table = sgt;
+ *dma = sg_dma_address(sgt->sgl);
+
+ return buffer;
+}
+EXPORT_SYMBOL_GPL(usb_alloc_noncoherent);
+
+/**
+ * usb_free_noncoherent - free memory allocated with usb_alloc_noncoherent()
+ * @dev: device the buffer was used with
+ * @size: requested buffer size
+ * @addr: CPU address of buffer
+ * @dir: dma transfer direction
+ * @table: describe the allocated and DMA mapped memory,
+ *
+ * This reclaims an I/O buffer, letting it be reused. The memory must have
+ * been allocated using usb_alloc_noncoherent(), and the parameters must match
+ * those provided in that allocation request.
+ */
+void usb_free_noncoherent(struct usb_device *dev, size_t size,
+ void *addr, enum dma_data_direction dir,
+ struct sg_table *table)
+{
+ struct device *dmadev;
+
+ if (!dev || !dev->bus)
+ return;
+ if (!addr)
+ return;
+
+ dmadev = bus_to_hcd(dev->bus)->self.sysdev;
+ dma_vunmap_noncontiguous(dmadev, addr);
+ dma_free_noncontiguous(dmadev, size, table, dir);
+}
+EXPORT_SYMBOL_GPL(usb_free_noncoherent);
+
/*
* Notifications of device and interface registration
*/
diff --git a/include/linux/usb.h b/include/linux/usb.h
index e8662843e68c..fee0e3cfad4e 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1619,6 +1619,7 @@ struct urb {
void *transfer_buffer; /* (in) associated data buffer */
dma_addr_t transfer_dma; /* (in) dma addr for transfer_buffer */
struct scatterlist *sg; /* (in) scatter gather buffer list */
+ struct sg_table *sgt; /* (in) scatter gather table for noncoherent buffer */
int num_mapped_sgs; /* (internal) mapped sg entries */
int num_sgs; /* (in) number of entries in the sg list */
u32 transfer_buffer_length; /* (in) data buffer length */
@@ -1824,6 +1825,14 @@ void *usb_alloc_coherent(struct usb_device *dev, size_t size,
void usb_free_coherent(struct usb_device *dev, size_t size,
void *addr, dma_addr_t dma);
+enum dma_data_direction;
+
+void *usb_alloc_noncoherent(struct usb_device *dev, size_t size,
+ gfp_t mem_flags, dma_addr_t *dma, enum dma_data_direction dir,
+ struct sg_table **table);
+void usb_free_noncoherent(struct usb_device *dev, size_t size, void *addr,
+ enum dma_data_direction dir, struct sg_table *table);
+
/*-------------------------------------------------------------------*
* SYNCHRONOUS CALL SUPPORT *
*-------------------------------------------------------------------*/
--
2.34.1
On Fri, Jun 27, 2025 at 06:19:37PM +0800, Xu Yang wrote: > This will add usb_alloc_noncoherent() and usb_free_noncoherent() > functions to support alloc and free buffer in a dma-noncoherent way. > > To explicit manage the memory ownership for the kernel and device, > this will also add usb_dma_noncoherent_sync_for_cpu/device() functions > and call it at proper time. The management requires the user save > sg_table returned by usb_alloc_noncoherent() to urb->sgt. > > Signed-off-by: Xu Yang <xu.yang_2@nxp.com> > --- > drivers/usb/core/hcd.c | 30 ++++++++++++++++ > drivers/usb/core/usb.c | 80 ++++++++++++++++++++++++++++++++++++++++++ > include/linux/usb.h | 9 +++++ > 3 files changed, 119 insertions(+) > > diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c > index c22de97432a0..5fa00d32afb8 100644 > --- a/drivers/usb/core/hcd.c > +++ b/drivers/usb/core/hcd.c > @@ -1496,6 +1496,34 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, > } > EXPORT_SYMBOL_GPL(usb_hcd_map_urb_for_dma); > > +static void usb_dma_noncoherent_sync_for_cpu(struct usb_hcd *hcd, > + struct urb *urb) > +{ > + enum dma_data_direction dir; > + > + if (!urb->sgt) > + return; > + > + dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; Are the following operations really necessary if the direction is OUT? There are no bidirectional URBs, and an OUT transfer never modifies the contents of the transfer buffer so the buffer contents will be the same after the URB completes as they were when the URB was submitted. > + invalidate_kernel_vmap_range(urb->transfer_buffer, > + urb->transfer_buffer_length); > + dma_sync_sgtable_for_cpu(hcd->self.sysdev, urb->sgt, dir); > +} This entire routine should be inserted at the appropriate place in usb_hcd_unmap_urb_for_dma() instead of being standalone. > +static void usb_dma_noncoherent_sync_for_device(struct usb_hcd *hcd, > + struct urb *urb) > +{ > + enum dma_data_direction dir; > + > + if (!urb->sgt) > + return; > + > + dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > + flush_kernel_vmap_range(urb->transfer_buffer, > + urb->transfer_buffer_length); > + dma_sync_sgtable_for_device(hcd->self.sysdev, urb->sgt, dir); > +} Likewise, this code belongs inside usb_hcd_map_urb_for_dma(). Also, the material that this routine replaces in the uvc and stk1160 drivers do not call flush_kernel_vmap_range(). Why did you add that here? Was this omission a bug in those drivers? Alan Stern
Hi Alan, On Fri, Jun 27, 2025 at 10:23:36AM -0400, Alan Stern wrote: > On Fri, Jun 27, 2025 at 06:19:37PM +0800, Xu Yang wrote: > > This will add usb_alloc_noncoherent() and usb_free_noncoherent() > > functions to support alloc and free buffer in a dma-noncoherent way. > > > > To explicit manage the memory ownership for the kernel and device, > > this will also add usb_dma_noncoherent_sync_for_cpu/device() functions > > and call it at proper time. The management requires the user save > > sg_table returned by usb_alloc_noncoherent() to urb->sgt. > > > > Signed-off-by: Xu Yang <xu.yang_2@nxp.com> > > --- > > drivers/usb/core/hcd.c | 30 ++++++++++++++++ > > drivers/usb/core/usb.c | 80 ++++++++++++++++++++++++++++++++++++++++++ > > include/linux/usb.h | 9 +++++ > > 3 files changed, 119 insertions(+) > > > > diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c > > index c22de97432a0..5fa00d32afb8 100644 > > --- a/drivers/usb/core/hcd.c > > +++ b/drivers/usb/core/hcd.c > > @@ -1496,6 +1496,34 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, > > } > > EXPORT_SYMBOL_GPL(usb_hcd_map_urb_for_dma); > > > > +static void usb_dma_noncoherent_sync_for_cpu(struct usb_hcd *hcd, > > + struct urb *urb) > > +{ > > + enum dma_data_direction dir; > > + > > + if (!urb->sgt) > > + return; > > + > > + dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > > Are the following operations really necessary if the direction is OUT? > There are no bidirectional URBs, and an OUT transfer never modifies the > contents of the transfer buffer so the buffer contents will be the same > after the URB completes as they were when the URB was submitted. According to Laurent's reply email, it may be needed for some archs. > > > + invalidate_kernel_vmap_range(urb->transfer_buffer, > > + urb->transfer_buffer_length); > > + dma_sync_sgtable_for_cpu(hcd->self.sysdev, urb->sgt, dir); > > +} > > This entire routine should be inserted at the appropriate place in > usb_hcd_unmap_urb_for_dma() instead of being standalone. Okay. > > > +static void usb_dma_noncoherent_sync_for_device(struct usb_hcd *hcd, > > + struct urb *urb) > > +{ > > + enum dma_data_direction dir; > > + > > + if (!urb->sgt) > > + return; > > + > > + dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > > + flush_kernel_vmap_range(urb->transfer_buffer, > > + urb->transfer_buffer_length); > > + dma_sync_sgtable_for_device(hcd->self.sysdev, urb->sgt, dir); > > +} > > Likewise, this code belongs inside usb_hcd_map_urb_for_dma(). Okay. > > Also, the material that this routine replaces in the uvc and stk1160 > drivers do not call flush_kernel_vmap_range(). Why did you add that > here? Was this omission a bug in those drivers? According to dma-api.rst: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/tree/Documentation/core-api/dma-api.rst?h=linux-6.15.y#n664 "Once a non-contiguous allocation is mapped using this function, the flush_kernel_vmap_range() and invalidate_kernel_vmap_range() APIs must be used to manage the coherency between the kernel mapping, the device and user space mappings (if any)." Possibly the uvc and stk1160 missed calling it, but since they won't be the only user of the USB core, so we'd better call these APIs. Thanks, Xu Yang > > Alan Stern
On Mon, Jun 30, 2025 at 04:18:51PM +0800, Xu Yang wrote: > > Also, the material that this routine replaces in the uvc and stk1160 > > drivers do not call flush_kernel_vmap_range(). Why did you add that > > here? Was this omission a bug in those drivers? > > According to dma-api.rst: > https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/tree/Documentation/core-api/dma-api.rst?h=linux-6.15.y#n664 > > "Once a non-contiguous allocation is mapped using this function, the > flush_kernel_vmap_range() and invalidate_kernel_vmap_range() APIs must > be used to manage the coherency between the kernel mapping, the device > and user space mappings (if any)." > > Possibly the uvc and stk1160 missed calling it, but since they won't be > the only user of the USB core, so we'd better call these APIs. Documentation/core-api/cachetbl.rst says: ``void flush_kernel_vmap_range(void *vaddr, int size)`` flushes the kernel cache for a given virtual address range in the vmap area. This is to make sure that any data the kernel modified in the vmap range is made visible to the physical page. The design is to make this area safe to perform I/O on. Note that this API does *not* also flush the offset map alias of the area. ``void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates`` the cache for a given virtual address range in the vmap area which prevents the processor from making the cache stale by speculatively reading data while the I/O was occurring to the physical pages. This is only necessary for data reads into the vmap area. So invalidate_kernel_vmap_range() is not needed for data writes, that is, for OUT transfers. And ironically, flush_kernel_vmap_range() _is_ needed (but only for OUT transfers). On the other hand, Christoph may think these call should be included regardless. Let's see what he recommends. Christoph? (Actually, I would expect dma_sync_sgtable_for_cpu() and dma_sync_sgtable_for_device() to handle all of this for us automatically, but never mind...) Alan Stern
Hi Alan, On Mon, Jun 30, 2025 at 10:16:30AM -0400, Alan Stern wrote: > On Mon, Jun 30, 2025 at 04:18:51PM +0800, Xu Yang wrote: > > > Also, the material that this routine replaces in the uvc and stk1160 > > > drivers do not call flush_kernel_vmap_range(). Why did you add that > > > here? Was this omission a bug in those drivers? > > > > According to dma-api.rst: > > https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/tree/Documentation/core-api/dma-api.rst?h=linux-6.15.y#n664 > > > > "Once a non-contiguous allocation is mapped using this function, the > > flush_kernel_vmap_range() and invalidate_kernel_vmap_range() APIs must > > be used to manage the coherency between the kernel mapping, the device > > and user space mappings (if any)." > > > > Possibly the uvc and stk1160 missed calling it, but since they won't be > > the only user of the USB core, so we'd better call these APIs. > > Documentation/core-api/cachetbl.rst says: > > ``void flush_kernel_vmap_range(void *vaddr, int size)`` > > flushes the kernel cache for a given virtual address range in > the vmap area. This is to make sure that any data the kernel > modified in the vmap range is made visible to the physical > page. The design is to make this area safe to perform I/O on. > Note that this API does *not* also flush the offset map alias > of the area. > > ``void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates`` > > the cache for a given virtual address range in the vmap area > which prevents the processor from making the cache stale by > speculatively reading data while the I/O was occurring to the > physical pages. This is only necessary for data reads into the > vmap area. > > So invalidate_kernel_vmap_range() is not needed for data writes, that > is, for OUT transfers. And ironically, flush_kernel_vmap_range() _is_ > needed (but only for OUT transfers). flush_kernel_vmap_range() for OUT transfers and invalidate_kernel_vmap_range() for IN transfers make sense to me. > On the other hand, Christoph may think these call should be included > regardless. Let's see what he recommends. Christoph? > > (Actually, I would expect dma_sync_sgtable_for_cpu() and > dma_sync_sgtable_for_device() to handle all of this for us > automatically, but never mind...) -- Regards, Laurent Pinchart
Hi Alan and Laurent, On Mon, Jun 30, 2025 at 08:38:51PM +0300, Laurent Pinchart wrote: > Hi Alan, > > On Mon, Jun 30, 2025 at 10:16:30AM -0400, Alan Stern wrote: > > On Mon, Jun 30, 2025 at 04:18:51PM +0800, Xu Yang wrote: > > > > Also, the material that this routine replaces in the uvc and stk1160 > > > > drivers do not call flush_kernel_vmap_range(). Why did you add that > > > > here? Was this omission a bug in those drivers? > > > > > > According to dma-api.rst: > > > https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/tree/Documentation/core-api/dma-api.rst?h=linux-6.15.y#n664 > > > > > > "Once a non-contiguous allocation is mapped using this function, the > > > flush_kernel_vmap_range() and invalidate_kernel_vmap_range() APIs must > > > be used to manage the coherency between the kernel mapping, the device > > > and user space mappings (if any)." > > > > > > Possibly the uvc and stk1160 missed calling it, but since they won't be > > > the only user of the USB core, so we'd better call these APIs. > > > > Documentation/core-api/cachetbl.rst says: > > > > ``void flush_kernel_vmap_range(void *vaddr, int size)`` > > > > flushes the kernel cache for a given virtual address range in > > the vmap area. This is to make sure that any data the kernel > > modified in the vmap range is made visible to the physical > > page. The design is to make this area safe to perform I/O on. > > Note that this API does *not* also flush the offset map alias > > of the area. > > > > ``void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates`` > > > > the cache for a given virtual address range in the vmap area > > which prevents the processor from making the cache stale by > > speculatively reading data while the I/O was occurring to the > > physical pages. This is only necessary for data reads into the > > vmap area. > > > > So invalidate_kernel_vmap_range() is not needed for data writes, that > > is, for OUT transfers. And ironically, flush_kernel_vmap_range() _is_ > > needed (but only for OUT transfers). > > flush_kernel_vmap_range() for OUT transfers and > invalidate_kernel_vmap_range() for IN transfers make sense to me. Many thanks for your suggestions! I'll do it in next version. Thanks, Xu Yang > > > On the other hand, Christoph may think these call should be included > > regardless. Let's see what he recommends. Christoph? > > > > (Actually, I would expect dma_sync_sgtable_for_cpu() and > > dma_sync_sgtable_for_device() to handle all of this for us > > automatically, but never mind...) > > -- > Regards, > > Laurent Pinchart
On Fri, Jun 27, 2025 at 10:23:36AM -0400, Alan Stern wrote: > On Fri, Jun 27, 2025 at 06:19:37PM +0800, Xu Yang wrote: > > This will add usb_alloc_noncoherent() and usb_free_noncoherent() > > functions to support alloc and free buffer in a dma-noncoherent way. > > > > To explicit manage the memory ownership for the kernel and device, > > this will also add usb_dma_noncoherent_sync_for_cpu/device() functions > > and call it at proper time. The management requires the user save > > sg_table returned by usb_alloc_noncoherent() to urb->sgt. > > > > Signed-off-by: Xu Yang <xu.yang_2@nxp.com> > > --- > > drivers/usb/core/hcd.c | 30 ++++++++++++++++ > > drivers/usb/core/usb.c | 80 ++++++++++++++++++++++++++++++++++++++++++ > > include/linux/usb.h | 9 +++++ > > 3 files changed, 119 insertions(+) > > > > diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c > > index c22de97432a0..5fa00d32afb8 100644 > > --- a/drivers/usb/core/hcd.c > > +++ b/drivers/usb/core/hcd.c > > @@ -1496,6 +1496,34 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, > > } > > EXPORT_SYMBOL_GPL(usb_hcd_map_urb_for_dma); > > > > +static void usb_dma_noncoherent_sync_for_cpu(struct usb_hcd *hcd, > > + struct urb *urb) > > +{ > > + enum dma_data_direction dir; > > + > > + if (!urb->sgt) > > + return; > > + > > + dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > > Are the following operations really necessary if the direction is OUT? > There are no bidirectional URBs, and an OUT transfer never modifies the > contents of the transfer buffer so the buffer contents will be the same > after the URB completes as they were when the URB was submitted. The arch part of dma_sync_sgtable_for_cpu(DMA_TO_DEVICE) is a no-op on all architectures but microblaze, mips, parisc and powerpc (at least in some configurations of those architectures). The IOMMU DMA mapping backend calls into the arch-specific code, and also handles swiotlb, which is a no-op for DMA_TO_DEVICE. There's also some IOMMU-related arch-specific handling for sparc. I think dma_sync_sgtable_for_cpu() should be called for the DMA_TO_DEVICE direction, to ensure proper operation in those uncommon but real cases where platforms need to perform some operation. It has a non-zero cost on other platforms, as the CPU will need to go through a few function calls to end up in no-ops and then go back up the call stack. invalidate_kernel_vmap_range() may not be needed. I don't recall why it was added. The call was introduced in commit 20e1dbf2bbe2431072571000ed31dfef09359c08 Author: Ricardo Ribalda <ribalda@chromium.org> Date: Sat Mar 13 00:55:20 2021 +0100 media: uvcvideo: Use dma_alloc_noncontiguous API Ricardo, do we need to invalidate the vmap range in the DMA_TO_DEVICE case ? > > + invalidate_kernel_vmap_range(urb->transfer_buffer, > > + urb->transfer_buffer_length); > > + dma_sync_sgtable_for_cpu(hcd->self.sysdev, urb->sgt, dir); In the DMA_FROM_DEVICE case, shouldn't the vmap range should be invalidated after calling dma_sync_sgtable_for_cpu() ? Otherwise I think speculative reads coming between invalidation and dma sync could result in data corruption. > > +} > > This entire routine should be inserted at the appropriate place in > usb_hcd_unmap_urb_for_dma() instead of being standalone. > > > +static void usb_dma_noncoherent_sync_for_device(struct usb_hcd *hcd, > > + struct urb *urb) > > +{ > > + enum dma_data_direction dir; > > + > > + if (!urb->sgt) > > + return; > > + > > + dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > > + flush_kernel_vmap_range(urb->transfer_buffer, > > + urb->transfer_buffer_length); > > + dma_sync_sgtable_for_device(hcd->self.sysdev, urb->sgt, dir); > > +} > > Likewise, this code belongs inside usb_hcd_map_urb_for_dma(). > > Also, the material that this routine replaces in the uvc and stk1160 > drivers do not call flush_kernel_vmap_range(). Why did you add that > here? Was this omission a bug in those drivers? > > Alan Stern -- Regards, Laurent Pinchart
Hi Laurent, On Mon, Jun 30, 2025 at 02:39:24AM +0300, Laurent Pinchart wrote: > On Fri, Jun 27, 2025 at 10:23:36AM -0400, Alan Stern wrote: > > On Fri, Jun 27, 2025 at 06:19:37PM +0800, Xu Yang wrote: > > > This will add usb_alloc_noncoherent() and usb_free_noncoherent() > > > functions to support alloc and free buffer in a dma-noncoherent way. > > > > > > To explicit manage the memory ownership for the kernel and device, > > > this will also add usb_dma_noncoherent_sync_for_cpu/device() functions > > > and call it at proper time. The management requires the user save > > > sg_table returned by usb_alloc_noncoherent() to urb->sgt. > > > > > > Signed-off-by: Xu Yang <xu.yang_2@nxp.com> > > > --- > > > drivers/usb/core/hcd.c | 30 ++++++++++++++++ > > > drivers/usb/core/usb.c | 80 ++++++++++++++++++++++++++++++++++++++++++ > > > include/linux/usb.h | 9 +++++ > > > 3 files changed, 119 insertions(+) > > > > > > diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c > > > index c22de97432a0..5fa00d32afb8 100644 > > > --- a/drivers/usb/core/hcd.c > > > +++ b/drivers/usb/core/hcd.c > > > @@ -1496,6 +1496,34 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, > > > } > > > EXPORT_SYMBOL_GPL(usb_hcd_map_urb_for_dma); > > > > > > +static void usb_dma_noncoherent_sync_for_cpu(struct usb_hcd *hcd, > > > + struct urb *urb) > > > +{ > > > + enum dma_data_direction dir; > > > + > > > + if (!urb->sgt) > > > + return; > > > + > > > + dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > > > > Are the following operations really necessary if the direction is OUT? > > There are no bidirectional URBs, and an OUT transfer never modifies the > > contents of the transfer buffer so the buffer contents will be the same > > after the URB completes as they were when the URB was submitted. > > The arch part of dma_sync_sgtable_for_cpu(DMA_TO_DEVICE) is a no-op on > all architectures but microblaze, mips, parisc and powerpc (at least in > some configurations of those architectures). > > The IOMMU DMA mapping backend calls into the arch-specific code, and > also handles swiotlb, which is a no-op for DMA_TO_DEVICE. There's also > some IOMMU-related arch-specific handling for sparc. > > I think dma_sync_sgtable_for_cpu() should be called for the > DMA_TO_DEVICE direction, to ensure proper operation in those uncommon > but real cases where platforms need to perform some operation. It has a > non-zero cost on other platforms, as the CPU will need to go through a > few function calls to end up in no-ops and then go back up the call > stack. > > invalidate_kernel_vmap_range() may not be needed. I don't recall why it > was added. The call was introduced in > > commit 20e1dbf2bbe2431072571000ed31dfef09359c08 > Author: Ricardo Ribalda <ribalda@chromium.org> > Date: Sat Mar 13 00:55:20 2021 +0100 > > media: uvcvideo: Use dma_alloc_noncontiguous API > > Ricardo, do we need to invalidate the vmap range in the DMA_TO_DEVICE > case ? > > > > + invalidate_kernel_vmap_range(urb->transfer_buffer, > > > + urb->transfer_buffer_length); > > > + dma_sync_sgtable_for_cpu(hcd->self.sysdev, urb->sgt, dir); > > In the DMA_FROM_DEVICE case, shouldn't the vmap range should be > invalidated after calling dma_sync_sgtable_for_cpu() ? Otherwise I think > speculative reads coming between invalidation and dma sync could result > in data corruption. Your concern sounds resonable. But I see some drivers also call invalidate_kernel_vmap_range() before dma_sync_sgtable_for_cpu(). I'm not sure what's the correct order :( https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/sound/core/memalloc.c?h=linux-6.15.y#n600 https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/drivers/media/common/videobuf2/videobuf2-dma-contig.c?h=linux-6.15.y#n157 Thanks, Xu Yang > > > > +} > > > > This entire routine should be inserted at the appropriate place in > > usb_hcd_unmap_urb_for_dma() instead of being standalone. > > > > > +static void usb_dma_noncoherent_sync_for_device(struct usb_hcd *hcd, > > > + struct urb *urb) > > > +{ > > > + enum dma_data_direction dir; > > > + > > > + if (!urb->sgt) > > > + return; > > > + > > > + dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > > > + flush_kernel_vmap_range(urb->transfer_buffer, > > > + urb->transfer_buffer_length); > > > + dma_sync_sgtable_for_device(hcd->self.sysdev, urb->sgt, dir); > > > +} > > > > Likewise, this code belongs inside usb_hcd_map_urb_for_dma(). > > > > Also, the material that this routine replaces in the uvc and stk1160 > > drivers do not call flush_kernel_vmap_range(). Why did you add that > > here? Was this omission a bug in those drivers? > > > > Alan Stern > > -- > Regards, > > Laurent Pinchart
On Mon, 30 Jun 2025 at 01:39, Laurent Pinchart <laurent.pinchart@ideasonboard.com> wrote: > > On Fri, Jun 27, 2025 at 10:23:36AM -0400, Alan Stern wrote: > > On Fri, Jun 27, 2025 at 06:19:37PM +0800, Xu Yang wrote: > > > This will add usb_alloc_noncoherent() and usb_free_noncoherent() > > > functions to support alloc and free buffer in a dma-noncoherent way. > > > > > > To explicit manage the memory ownership for the kernel and device, > > > this will also add usb_dma_noncoherent_sync_for_cpu/device() functions > > > and call it at proper time. The management requires the user save > > > sg_table returned by usb_alloc_noncoherent() to urb->sgt. > > > > > > Signed-off-by: Xu Yang <xu.yang_2@nxp.com> > > > --- > > > drivers/usb/core/hcd.c | 30 ++++++++++++++++ > > > drivers/usb/core/usb.c | 80 ++++++++++++++++++++++++++++++++++++++++++ > > > include/linux/usb.h | 9 +++++ > > > 3 files changed, 119 insertions(+) > > > > > > diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c > > > index c22de97432a0..5fa00d32afb8 100644 > > > --- a/drivers/usb/core/hcd.c > > > +++ b/drivers/usb/core/hcd.c > > > @@ -1496,6 +1496,34 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, > > > } > > > EXPORT_SYMBOL_GPL(usb_hcd_map_urb_for_dma); > > > > > > +static void usb_dma_noncoherent_sync_for_cpu(struct usb_hcd *hcd, > > > + struct urb *urb) > > > +{ > > > + enum dma_data_direction dir; > > > + > > > + if (!urb->sgt) > > > + return; > > > + > > > + dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > > > > Are the following operations really necessary if the direction is OUT? > > There are no bidirectional URBs, and an OUT transfer never modifies the > > contents of the transfer buffer so the buffer contents will be the same > > after the URB completes as they were when the URB was submitted. > > The arch part of dma_sync_sgtable_for_cpu(DMA_TO_DEVICE) is a no-op on > all architectures but microblaze, mips, parisc and powerpc (at least in > some configurations of those architectures). > > The IOMMU DMA mapping backend calls into the arch-specific code, and > also handles swiotlb, which is a no-op for DMA_TO_DEVICE. There's also > some IOMMU-related arch-specific handling for sparc. > > I think dma_sync_sgtable_for_cpu() should be called for the > DMA_TO_DEVICE direction, to ensure proper operation in those uncommon > but real cases where platforms need to perform some operation. It has a > non-zero cost on other platforms, as the CPU will need to go through a > few function calls to end up in no-ops and then go back up the call > stack. > > invalidate_kernel_vmap_range() may not be needed. I don't recall why it > was added. The call was introduced in > > commit 20e1dbf2bbe2431072571000ed31dfef09359c08 > Author: Ricardo Ribalda <ribalda@chromium.org> > Date: Sat Mar 13 00:55:20 2021 +0100 > > media: uvcvideo: Use dma_alloc_noncontiguous API > > Ricardo, do we need to invalidate the vmap range in the DMA_TO_DEVICE > case ? That change came from Christoph https://lore.kernel.org/linux-media/20210128150955.GA30563@lst.de/ """ Given that we vmap the addresses this also needs flush_kernel_vmap_range / invalidate_kernel_vmap_range calls for VIVT architectures. """ > > > > + invalidate_kernel_vmap_range(urb->transfer_buffer, > > > + urb->transfer_buffer_length); > > > + dma_sync_sgtable_for_cpu(hcd->self.sysdev, urb->sgt, dir); > > In the DMA_FROM_DEVICE case, shouldn't the vmap range should be > invalidated after calling dma_sync_sgtable_for_cpu() ? Otherwise I think > speculative reads coming between invalidation and dma sync could result > in data corruption. > > > > +} > > > > This entire routine should be inserted at the appropriate place in > > usb_hcd_unmap_urb_for_dma() instead of being standalone. > > > > > +static void usb_dma_noncoherent_sync_for_device(struct usb_hcd *hcd, > > > + struct urb *urb) > > > +{ > > > + enum dma_data_direction dir; > > > + > > > + if (!urb->sgt) > > > + return; > > > + > > > + dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > > > + flush_kernel_vmap_range(urb->transfer_buffer, > > > + urb->transfer_buffer_length); > > > + dma_sync_sgtable_for_device(hcd->self.sysdev, urb->sgt, dir); > > > +} > > > > Likewise, this code belongs inside usb_hcd_map_urb_for_dma(). > > > > Also, the material that this routine replaces in the uvc and stk1160 > > drivers do not call flush_kernel_vmap_range(). Why did you add that > > here? Was this omission a bug in those drivers? > > > > Alan Stern > > -- > Regards, > > Laurent Pinchart -- Ricardo Ribalda
On Mon, Jun 30, 2025 at 08:48:23AM +0200, Ricardo Ribalda wrote: > On Mon, 30 Jun 2025 at 01:39, Laurent Pinchart wrote: > > On Fri, Jun 27, 2025 at 10:23:36AM -0400, Alan Stern wrote: > > > On Fri, Jun 27, 2025 at 06:19:37PM +0800, Xu Yang wrote: > > > > This will add usb_alloc_noncoherent() and usb_free_noncoherent() > > > > functions to support alloc and free buffer in a dma-noncoherent way. > > > > > > > > To explicit manage the memory ownership for the kernel and device, > > > > this will also add usb_dma_noncoherent_sync_for_cpu/device() functions > > > > and call it at proper time. The management requires the user save > > > > sg_table returned by usb_alloc_noncoherent() to urb->sgt. > > > > > > > > Signed-off-by: Xu Yang <xu.yang_2@nxp.com> > > > > --- > > > > drivers/usb/core/hcd.c | 30 ++++++++++++++++ > > > > drivers/usb/core/usb.c | 80 ++++++++++++++++++++++++++++++++++++++++++ > > > > include/linux/usb.h | 9 +++++ > > > > 3 files changed, 119 insertions(+) > > > > > > > > diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c > > > > index c22de97432a0..5fa00d32afb8 100644 > > > > --- a/drivers/usb/core/hcd.c > > > > +++ b/drivers/usb/core/hcd.c > > > > @@ -1496,6 +1496,34 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, > > > > } > > > > EXPORT_SYMBOL_GPL(usb_hcd_map_urb_for_dma); > > > > > > > > +static void usb_dma_noncoherent_sync_for_cpu(struct usb_hcd *hcd, > > > > + struct urb *urb) > > > > +{ > > > > + enum dma_data_direction dir; > > > > + > > > > + if (!urb->sgt) > > > > + return; > > > > + > > > > + dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > > > > > > Are the following operations really necessary if the direction is OUT? > > > There are no bidirectional URBs, and an OUT transfer never modifies the > > > contents of the transfer buffer so the buffer contents will be the same > > > after the URB completes as they were when the URB was submitted. > > > > The arch part of dma_sync_sgtable_for_cpu(DMA_TO_DEVICE) is a no-op on > > all architectures but microblaze, mips, parisc and powerpc (at least in > > some configurations of those architectures). > > > > The IOMMU DMA mapping backend calls into the arch-specific code, and > > also handles swiotlb, which is a no-op for DMA_TO_DEVICE. There's also > > some IOMMU-related arch-specific handling for sparc. > > > > I think dma_sync_sgtable_for_cpu() should be called for the > > DMA_TO_DEVICE direction, to ensure proper operation in those uncommon > > but real cases where platforms need to perform some operation. It has a > > non-zero cost on other platforms, as the CPU will need to go through a > > few function calls to end up in no-ops and then go back up the call > > stack. > > > > invalidate_kernel_vmap_range() may not be needed. I don't recall why it > > was added. The call was introduced in > > > > commit 20e1dbf2bbe2431072571000ed31dfef09359c08 > > Author: Ricardo Ribalda <ribalda@chromium.org> > > Date: Sat Mar 13 00:55:20 2021 +0100 > > > > media: uvcvideo: Use dma_alloc_noncontiguous API > > > > Ricardo, do we need to invalidate the vmap range in the DMA_TO_DEVICE > > case ? > > That change came from Christoph > https://lore.kernel.org/linux-media/20210128150955.GA30563@lst.de/ > > """ > > Given that we vmap the addresses this also needs > flush_kernel_vmap_range / invalidate_kernel_vmap_range calls for > VIVT architectures. > > """ Thank you, I looked for such a discussion in the list archive yesterday but somehow missed it. Christoph, you mentioned Right now we don't have a proper state machine for the *_kernel_vmap_range, but we should probably add one once usage of this grows. Has there been any progress on that front ? > > > > + invalidate_kernel_vmap_range(urb->transfer_buffer, > > > > + urb->transfer_buffer_length); > > > > + dma_sync_sgtable_for_cpu(hcd->self.sysdev, urb->sgt, dir); > > > > In the DMA_FROM_DEVICE case, shouldn't the vmap range should be > > invalidated after calling dma_sync_sgtable_for_cpu() ? Otherwise I think > > speculative reads coming between invalidation and dma sync could result > > in data corruption. > > > > > > +} > > > > > > This entire routine should be inserted at the appropriate place in > > > usb_hcd_unmap_urb_for_dma() instead of being standalone. > > > > > > > +static void usb_dma_noncoherent_sync_for_device(struct usb_hcd *hcd, > > > > + struct urb *urb) > > > > +{ > > > > + enum dma_data_direction dir; > > > > + > > > > + if (!urb->sgt) > > > > + return; > > > > + > > > > + dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > > > > + flush_kernel_vmap_range(urb->transfer_buffer, > > > > + urb->transfer_buffer_length); > > > > + dma_sync_sgtable_for_device(hcd->self.sysdev, urb->sgt, dir); > > > > +} > > > > > > Likewise, this code belongs inside usb_hcd_map_urb_for_dma(). > > > > > > Also, the material that this routine replaces in the uvc and stk1160 > > > drivers do not call flush_kernel_vmap_range(). Why did you add that > > > here? Was this omission a bug in those drivers? > > > > > > Alan Stern -- Regards, Laurent Pinchart
On Mon, Jun 30, 2025 at 11:23:13AM +0300, Laurent Pinchart wrote: > Christoph, you mentioned > > Right now we don't have a proper state machine for the > *_kernel_vmap_range, but we should probably add one once usage of this > grows. > > Has there been any progress on that front ? None that I'm aware off anyway.
On Fri, Jun 27, 2025 at 06:19:37PM +0800, Xu Yang wrote: > This will add usb_alloc_noncoherent() and usb_free_noncoherent() > functions to support alloc and free buffer in a dma-noncoherent way. > > To explicit manage the memory ownership for the kernel and device, > this will also add usb_dma_noncoherent_sync_for_cpu/device() functions > and call it at proper time. The management requires the user save > sg_table returned by usb_alloc_noncoherent() to urb->sgt. ... > +/** > + * usb_alloc_noncoherent - allocate dma-noncoherent buffer for URB_NO_xxx_DMA_MAP > + * @dev: device the buffer will be used with > + * @size: requested buffer size > + * @mem_flags: affect whether allocation may block > + * @dma: used to return DMA address of buffer > + * @dir: dma transfer direction > + * @table: used to return sg_table of allocated memory > + * > + * Return: Either null (indicating no buffer could be allocated), or the > + * cpu-space pointer to a buffer that may be used to perform DMA to the > + * specified device. Such cpu-space buffers are returned along with the DMA > + * address (through the pointer provided). Return section should be last in the kernel-doc (this requirement is documented). > + * To explicit manage the memory ownership for the kernel vs the device by > + * usb core, the user needs save sg_table to urb->sgt. Then usb core will > + * do dma sync for cpu and device properly. > + * > + * When the buffer is no longer used, free it with usb_free_noncoherent(). Here and everywhere else in your series, pay respect to acronyms by using them as acronyms: dma --> DMA cpu --> CPU usb --> USB and so on... > + */ > +void *usb_alloc_noncoherent(struct usb_device *dev, size_t size, > + gfp_t mem_flags, dma_addr_t *dma, > + enum dma_data_direction dir, > + struct sg_table **table) > +{ > + struct device *dmadev; > + struct sg_table *sgt; > + void *buffer; > + > + if (!dev || !dev->bus) When !dev case is possible? > + return NULL; > + > + dmadev = bus_to_hcd(dev->bus)->self.sysdev; > + > + sgt = dma_alloc_noncontiguous(dmadev, size, dir, mem_flags, 0); > + if (!sgt) > + return NULL; > + > + buffer = dma_vmap_noncontiguous(dmadev, size, sgt); > + if (!buffer) { > + dma_free_noncontiguous(dmadev, size, sgt, dir); > + return NULL; > + } > + > + *table = sgt; > + *dma = sg_dma_address(sgt->sgl); > + > + return buffer; > +} ... > +void usb_free_noncoherent(struct usb_device *dev, size_t size, > + void *addr, enum dma_data_direction dir, > + struct sg_table *table) > +{ > + struct device *dmadev; > + > + if (!dev || !dev->bus) Ditto. > + return; > + if (!addr) > + return; > + > + dmadev = bus_to_hcd(dev->bus)->self.sysdev; > + dma_vunmap_noncontiguous(dmadev, addr); > + dma_free_noncontiguous(dmadev, size, table, dir); > +} -- With Best Regards, Andy Shevchenko
Hi Andy, Thanks for your comments! On Fri, Jun 27, 2025 at 02:22:59PM +0300, Andy Shevchenko wrote: > On Fri, Jun 27, 2025 at 06:19:37PM +0800, Xu Yang wrote: > > This will add usb_alloc_noncoherent() and usb_free_noncoherent() > > functions to support alloc and free buffer in a dma-noncoherent way. > > > > To explicit manage the memory ownership for the kernel and device, > > this will also add usb_dma_noncoherent_sync_for_cpu/device() functions > > and call it at proper time. The management requires the user save > > sg_table returned by usb_alloc_noncoherent() to urb->sgt. > > ... > > > +/** > > + * usb_alloc_noncoherent - allocate dma-noncoherent buffer for URB_NO_xxx_DMA_MAP > > + * @dev: device the buffer will be used with > > + * @size: requested buffer size > > + * @mem_flags: affect whether allocation may block > > + * @dma: used to return DMA address of buffer > > + * @dir: dma transfer direction > > + * @table: used to return sg_table of allocated memory > > + * > > + * Return: Either null (indicating no buffer could be allocated), or the > > + * cpu-space pointer to a buffer that may be used to perform DMA to the > > + * specified device. Such cpu-space buffers are returned along with the DMA > > + * address (through the pointer provided). > > Return section should be last in the kernel-doc (this requirement is > documented). Okay. I'll improve it. > > > + * To explicit manage the memory ownership for the kernel vs the device by > > + * usb core, the user needs save sg_table to urb->sgt. Then usb core will > > + * do dma sync for cpu and device properly. > > + * > > + * When the buffer is no longer used, free it with usb_free_noncoherent(). > > Here and everywhere else in your series, pay respect to acronyms by using them > as acronyms: > > dma --> DMA > cpu --> CPU > usb --> USB > > and so on... Okay. > > > > + */ > > +void *usb_alloc_noncoherent(struct usb_device *dev, size_t size, > > + gfp_t mem_flags, dma_addr_t *dma, > > + enum dma_data_direction dir, > > + struct sg_table **table) > > +{ > > + struct device *dmadev; > > + struct sg_table *sgt; > > + void *buffer; > > + > > + if (!dev || !dev->bus) > > When !dev case is possible? Not exactly sure, but it depends on the user. This sanity test is also carried over from usb_alloc_coherent() to avoid any surprise. > > > + return NULL; > > + > > + dmadev = bus_to_hcd(dev->bus)->self.sysdev; > > + > > + sgt = dma_alloc_noncontiguous(dmadev, size, dir, mem_flags, 0); > > + if (!sgt) > > + return NULL; > > + > > + buffer = dma_vmap_noncontiguous(dmadev, size, sgt); > > + if (!buffer) { > > + dma_free_noncontiguous(dmadev, size, sgt, dir); > > + return NULL; > > + } > > + > > + *table = sgt; > > + *dma = sg_dma_address(sgt->sgl); > > + > > + return buffer; > > +} > > ... > > > +void usb_free_noncoherent(struct usb_device *dev, size_t size, > > + void *addr, enum dma_data_direction dir, > > + struct sg_table *table) > > +{ > > + struct device *dmadev; > > + > > + if (!dev || !dev->bus) > > Ditto. > > > + return; > > + if (!addr) > > + return; > > + > > + dmadev = bus_to_hcd(dev->bus)->self.sysdev; > > + dma_vunmap_noncontiguous(dmadev, addr); > > + dma_free_noncontiguous(dmadev, size, table, dir); > > +} Thanks, Xu Yang > > -- > With Best Regards, > Andy Shevchenko > >
© 2016 - 2025 Red Hat, Inc.