[RFC PATCH v3 13/35] NTB: ntb_transport: Introduce get_dma_dev() helper

Koichiro Den posted 35 patches 1 month, 3 weeks ago
[RFC PATCH v3 13/35] NTB: ntb_transport: Introduce get_dma_dev() helper
Posted by Koichiro Den 1 month, 3 weeks ago
When ntb_transport is used on top of an endpoint function (EPF) NTB
implementation, DMA mappings should be associated with the underlying
PCIe controller device rather than the virtual NTB PCI function. This
matters for IOMMU configuration and DMA mask validation.

Add a small helper, get_dma_dev(), that returns the appropriate struct
device for DMA mapping, i.e. &pdev->dev for a regular NTB host bridge
and the EPC parent device for EPF-based NTB endpoints. Use it in the
places where we set up DMA mappings or log DMA-related errors.

Signed-off-by: Koichiro Den <den@valinux.co.jp>
---
 drivers/ntb/ntb_transport.c | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index bac842177b55..78d0469edbcc 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -63,6 +63,7 @@
 #include <linux/mutex.h>
 #include "linux/ntb.h"
 #include "linux/ntb_transport.h"
+#include <linux/pci-epc.h>
 
 #define NTB_TRANSPORT_VERSION	4
 #define NTB_TRANSPORT_VER	"4"
@@ -259,6 +260,26 @@ struct ntb_payload_header {
 	unsigned int flags;
 };
 
+/*
+ * Return the device that should be used for DMA mapping.
+ *
+ * On RC, this is simply &pdev->dev.
+ * On EPF-backed NTB endpoints, use the EPC parent device so that
+ * DMA capabilities and IOMMU configuration are taken from the
+ * controller rather than the virtual NTB PCI function.
+ */
+static struct device *get_dma_dev(struct ntb_dev *ndev)
+{
+	struct device *dev = &ndev->pdev->dev;
+	struct pci_epc *epc;
+
+	epc = (struct pci_epc *)ntb_get_private_data(ndev);
+	if (epc)
+		dev = epc->dev.parent;
+
+	return dev;
+}
+
 enum {
 	VERSION = 0,
 	QP_LINKS,
@@ -771,13 +792,13 @@ static void ntb_transport_msi_desc_changed(void *data)
 static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
 {
 	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
-	struct pci_dev *pdev = nt->ndev->pdev;
+	struct device *dev = get_dma_dev(nt->ndev);
 
 	if (!mw->virt_addr)
 		return;
 
 	ntb_mw_clear_trans(nt->ndev, PIDX, num_mw);
-	dma_free_coherent(&pdev->dev, mw->alloc_size,
+	dma_free_coherent(dev, mw->alloc_size,
 			  mw->alloc_addr, mw->dma_addr);
 	mw->xlat_size = 0;
 	mw->buff_size = 0;
@@ -847,7 +868,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
 		      resource_size_t size)
 {
 	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
-	struct pci_dev *pdev = nt->ndev->pdev;
+	struct device *dev = get_dma_dev(nt->ndev);
 	size_t xlat_size, buff_size;
 	resource_size_t xlat_align;
 	resource_size_t xlat_align_size;
@@ -877,12 +898,12 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
 	mw->buff_size = buff_size;
 	mw->alloc_size = buff_size;
 
-	rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
+	rc = ntb_alloc_mw_buffer(mw, dev, xlat_align);
 	if (rc) {
 		mw->alloc_size *= 2;
-		rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
+		rc = ntb_alloc_mw_buffer(mw, dev, xlat_align);
 		if (rc) {
-			dev_err(&pdev->dev,
+			dev_err(dev,
 				"Unable to alloc aligned MW buff\n");
 			mw->xlat_size = 0;
 			mw->buff_size = 0;
@@ -895,7 +916,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
 	rc = ntb_mw_set_trans(nt->ndev, PIDX, num_mw, mw->dma_addr,
 			      mw->xlat_size, offset);
 	if (rc) {
-		dev_err(&pdev->dev, "Unable to set mw%d translation", num_mw);
+		dev_err(dev, "Unable to set mw%d translation", num_mw);
 		ntb_free_mw(nt, num_mw);
 		return -EIO;
 	}
-- 
2.51.0
Re: [RFC PATCH v3 13/35] NTB: ntb_transport: Introduce get_dma_dev() helper
Posted by Frank Li 1 month, 3 weeks ago
On Thu, Dec 18, 2025 at 12:15:47AM +0900, Koichiro Den wrote:
> When ntb_transport is used on top of an endpoint function (EPF) NTB
> implementation, DMA mappings should be associated with the underlying
> PCIe controller device rather than the virtual NTB PCI function. This
> matters for IOMMU configuration and DMA mask validation.
>
> Add a small helper, get_dma_dev(), that returns the appropriate struct
> device for DMA mapping, i.e. &pdev->dev for a regular NTB host bridge
> and the EPC parent device for EPF-based NTB endpoints. Use it in the
> places where we set up DMA mappings or log DMA-related errors.
>
> Signed-off-by: Koichiro Den <den@valinux.co.jp>
> ---
>  drivers/ntb/ntb_transport.c | 35 ++++++++++++++++++++++++++++-------
>  1 file changed, 28 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
> index bac842177b55..78d0469edbcc 100644
> --- a/drivers/ntb/ntb_transport.c
> +++ b/drivers/ntb/ntb_transport.c
> @@ -63,6 +63,7 @@
>  #include <linux/mutex.h>
>  #include "linux/ntb.h"
>  #include "linux/ntb_transport.h"
> +#include <linux/pci-epc.h>
>
>  #define NTB_TRANSPORT_VERSION	4
>  #define NTB_TRANSPORT_VER	"4"
> @@ -259,6 +260,26 @@ struct ntb_payload_header {
>  	unsigned int flags;
>  };
>
> +/*
> + * Return the device that should be used for DMA mapping.
> + *
> + * On RC, this is simply &pdev->dev.
> + * On EPF-backed NTB endpoints, use the EPC parent device so that
> + * DMA capabilities and IOMMU configuration are taken from the
> + * controller rather than the virtual NTB PCI function.
> + */
> +static struct device *get_dma_dev(struct ntb_dev *ndev)
> +{
> +	struct device *dev = &ndev->pdev->dev;
> +	struct pci_epc *epc;
> +
> +	epc = (struct pci_epc *)ntb_get_private_data(ndev);
> +	if (epc)
> +		dev = epc->dev.parent;
> +
> +	return dev;
> +}
> +

I think add callback .get_dma_dev() directly. So vntb epf driver to provide
a implement. The file is common for all ntb transfer, should not include
ntb lower driver's specific implmentatin.

Frank

>  enum {
>  	VERSION = 0,
>  	QP_LINKS,
> @@ -771,13 +792,13 @@ static void ntb_transport_msi_desc_changed(void *data)
>  static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
>  {
>  	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
> -	struct pci_dev *pdev = nt->ndev->pdev;
> +	struct device *dev = get_dma_dev(nt->ndev);
>
>  	if (!mw->virt_addr)
>  		return;
>
>  	ntb_mw_clear_trans(nt->ndev, PIDX, num_mw);
> -	dma_free_coherent(&pdev->dev, mw->alloc_size,
> +	dma_free_coherent(dev, mw->alloc_size,
>  			  mw->alloc_addr, mw->dma_addr);
>  	mw->xlat_size = 0;
>  	mw->buff_size = 0;
> @@ -847,7 +868,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
>  		      resource_size_t size)
>  {
>  	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
> -	struct pci_dev *pdev = nt->ndev->pdev;
> +	struct device *dev = get_dma_dev(nt->ndev);
>  	size_t xlat_size, buff_size;
>  	resource_size_t xlat_align;
>  	resource_size_t xlat_align_size;
> @@ -877,12 +898,12 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
>  	mw->buff_size = buff_size;
>  	mw->alloc_size = buff_size;
>
> -	rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
> +	rc = ntb_alloc_mw_buffer(mw, dev, xlat_align);
>  	if (rc) {
>  		mw->alloc_size *= 2;
> -		rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
> +		rc = ntb_alloc_mw_buffer(mw, dev, xlat_align);
>  		if (rc) {
> -			dev_err(&pdev->dev,
> +			dev_err(dev,
>  				"Unable to alloc aligned MW buff\n");
>  			mw->xlat_size = 0;
>  			mw->buff_size = 0;
> @@ -895,7 +916,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
>  	rc = ntb_mw_set_trans(nt->ndev, PIDX, num_mw, mw->dma_addr,
>  			      mw->xlat_size, offset);
>  	if (rc) {
> -		dev_err(&pdev->dev, "Unable to set mw%d translation", num_mw);
> +		dev_err(dev, "Unable to set mw%d translation", num_mw);
>  		ntb_free_mw(nt, num_mw);
>  		return -EIO;
>  	}
> --
> 2.51.0
>
Re: [RFC PATCH v3 13/35] NTB: ntb_transport: Introduce get_dma_dev() helper
Posted by Koichiro Den 1 month, 3 weeks ago
On Fri, Dec 19, 2025 at 09:31:11AM -0500, Frank Li wrote:
> On Thu, Dec 18, 2025 at 12:15:47AM +0900, Koichiro Den wrote:
> > When ntb_transport is used on top of an endpoint function (EPF) NTB
> > implementation, DMA mappings should be associated with the underlying
> > PCIe controller device rather than the virtual NTB PCI function. This
> > matters for IOMMU configuration and DMA mask validation.
> >
> > Add a small helper, get_dma_dev(), that returns the appropriate struct
> > device for DMA mapping, i.e. &pdev->dev for a regular NTB host bridge
> > and the EPC parent device for EPF-based NTB endpoints. Use it in the
> > places where we set up DMA mappings or log DMA-related errors.
> >
> > Signed-off-by: Koichiro Den <den@valinux.co.jp>
> > ---
> >  drivers/ntb/ntb_transport.c | 35 ++++++++++++++++++++++++++++-------
> >  1 file changed, 28 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
> > index bac842177b55..78d0469edbcc 100644
> > --- a/drivers/ntb/ntb_transport.c
> > +++ b/drivers/ntb/ntb_transport.c
> > @@ -63,6 +63,7 @@
> >  #include <linux/mutex.h>
> >  #include "linux/ntb.h"
> >  #include "linux/ntb_transport.h"
> > +#include <linux/pci-epc.h>
> >
> >  #define NTB_TRANSPORT_VERSION	4
> >  #define NTB_TRANSPORT_VER	"4"
> > @@ -259,6 +260,26 @@ struct ntb_payload_header {
> >  	unsigned int flags;
> >  };
> >
> > +/*
> > + * Return the device that should be used for DMA mapping.
> > + *
> > + * On RC, this is simply &pdev->dev.
> > + * On EPF-backed NTB endpoints, use the EPC parent device so that
> > + * DMA capabilities and IOMMU configuration are taken from the
> > + * controller rather than the virtual NTB PCI function.
> > + */
> > +static struct device *get_dma_dev(struct ntb_dev *ndev)
> > +{
> > +	struct device *dev = &ndev->pdev->dev;
> > +	struct pci_epc *epc;
> > +
> > +	epc = (struct pci_epc *)ntb_get_private_data(ndev);
> > +	if (epc)
> > +		dev = epc->dev.parent;
> > +
> > +	return dev;
> > +}
> > +
> 
> I think add callback .get_dma_dev() directly. So vntb epf driver to provide
> a implement. The file is common for all ntb transfer, should not include
> ntb lower driver's specific implmentatin.

That makes sense, thanks for pointing that out.

Koichiro

> 
> Frank
> 
> >  enum {
> >  	VERSION = 0,
> >  	QP_LINKS,
> > @@ -771,13 +792,13 @@ static void ntb_transport_msi_desc_changed(void *data)
> >  static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
> >  {
> >  	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
> > -	struct pci_dev *pdev = nt->ndev->pdev;
> > +	struct device *dev = get_dma_dev(nt->ndev);
> >
> >  	if (!mw->virt_addr)
> >  		return;
> >
> >  	ntb_mw_clear_trans(nt->ndev, PIDX, num_mw);
> > -	dma_free_coherent(&pdev->dev, mw->alloc_size,
> > +	dma_free_coherent(dev, mw->alloc_size,
> >  			  mw->alloc_addr, mw->dma_addr);
> >  	mw->xlat_size = 0;
> >  	mw->buff_size = 0;
> > @@ -847,7 +868,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
> >  		      resource_size_t size)
> >  {
> >  	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
> > -	struct pci_dev *pdev = nt->ndev->pdev;
> > +	struct device *dev = get_dma_dev(nt->ndev);
> >  	size_t xlat_size, buff_size;
> >  	resource_size_t xlat_align;
> >  	resource_size_t xlat_align_size;
> > @@ -877,12 +898,12 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
> >  	mw->buff_size = buff_size;
> >  	mw->alloc_size = buff_size;
> >
> > -	rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
> > +	rc = ntb_alloc_mw_buffer(mw, dev, xlat_align);
> >  	if (rc) {
> >  		mw->alloc_size *= 2;
> > -		rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
> > +		rc = ntb_alloc_mw_buffer(mw, dev, xlat_align);
> >  		if (rc) {
> > -			dev_err(&pdev->dev,
> > +			dev_err(dev,
> >  				"Unable to alloc aligned MW buff\n");
> >  			mw->xlat_size = 0;
> >  			mw->buff_size = 0;
> > @@ -895,7 +916,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
> >  	rc = ntb_mw_set_trans(nt->ndev, PIDX, num_mw, mw->dma_addr,
> >  			      mw->xlat_size, offset);
> >  	if (rc) {
> > -		dev_err(&pdev->dev, "Unable to set mw%d translation", num_mw);
> > +		dev_err(dev, "Unable to set mw%d translation", num_mw);
> >  		ntb_free_mw(nt, num_mw);
> >  		return -EIO;
> >  	}
> > --
> > 2.51.0
> >