From: Manoj Panicker <manoj.panicker2@amd.com>
Implement TPH support in Broadcom BNXT device driver. The driver uses TPH
functions to retrieve and configure the device's Steering Tags when its
interrupt affinity is being changed. With appropriate firmware, we see
sustancial memory bandwidth savings and other benefits using real network
benchmarks.
Co-developed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Co-developed-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Manoj Panicker <manoj.panicker2@amd.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
---
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 85 +++++++++++++++++++++++
drivers/net/ethernet/broadcom/bnxt/bnxt.h | 7 ++
net/core/netdev_rx_queue.c | 1 +
3 files changed, 93 insertions(+)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6e422e24750a..ea0bd25d1efb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -55,6 +55,8 @@
#include <net/page_pool/helpers.h>
#include <linux/align.h>
#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
+#include <linux/pci-tph.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
@@ -10865,6 +10867,63 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
return 0;
}
+static void __bnxt_irq_affinity_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ struct bnxt_rx_ring_info *rxr;
+ struct bnxt_irq *irq;
+ u16 tag;
+ int err;
+
+ irq = container_of(notify, struct bnxt_irq, affinity_notify);
+ cpumask_copy(irq->cpu_mask, mask);
+
+ if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM,
+ cpumask_first(irq->cpu_mask), &tag))
+ return;
+
+ if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag))
+ return;
+
+ if (netif_running(irq->bp->dev)) {
+ rxr = &irq->bp->rx_ring[irq->ring_nr];
+ rtnl_lock();
+ err = netdev_rx_queue_restart(irq->bp->dev, irq->ring_nr);
+ if (err)
+ netdev_err(irq->bp->dev,
+ "rx queue restart failed: err=%d\n", err);
+ rtnl_unlock();
+ }
+}
+
+static void __bnxt_irq_affinity_release(struct kref __always_unused *ref)
+{
+}
+
+static void bnxt_release_irq_notifier(struct bnxt_irq *irq)
+{
+ irq_set_affinity_notifier(irq->vector, NULL);
+}
+
+static void bnxt_register_irq_notifier(struct bnxt *bp, struct bnxt_irq *irq)
+{
+ struct irq_affinity_notify *notify;
+
+ /* Nothing to do if TPH is not enabled */
+ if (!bp->tph_mode)
+ return;
+
+ irq->bp = bp;
+
+ /* Register IRQ affinity notifier */
+ notify = &irq->affinity_notify;
+ notify->irq = irq->vector;
+ notify->notify = __bnxt_irq_affinity_notify;
+ notify->release = __bnxt_irq_affinity_release;
+
+ irq_set_affinity_notifier(irq->vector, notify);
+}
+
static void bnxt_free_irq(struct bnxt *bp)
{
struct bnxt_irq *irq;
@@ -10887,11 +10946,18 @@ static void bnxt_free_irq(struct bnxt *bp)
free_cpumask_var(irq->cpu_mask);
irq->have_cpumask = 0;
}
+
+ bnxt_release_irq_notifier(irq);
+
free_irq(irq->vector, bp->bnapi[i]);
}
irq->requested = 0;
}
+
+ /* Disable TPH support */
+ pcie_disable_tph(bp->pdev);
+ bp->tph_mode = 0;
}
static int bnxt_request_irq(struct bnxt *bp)
@@ -10911,6 +10977,12 @@ static int bnxt_request_irq(struct bnxt *bp)
#ifdef CONFIG_RFS_ACCEL
rmap = bp->dev->rx_cpu_rmap;
#endif
+
+ /* Enable TPH support as part of IRQ request */
+ rc = pcie_enable_tph(bp->pdev, PCI_TPH_ST_IV_MODE);
+ if (!rc)
+ bp->tph_mode = PCI_TPH_ST_IV_MODE;
+
for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
int map_idx = bnxt_cp_num_to_irq_num(bp, i);
struct bnxt_irq *irq = &bp->irq_tbl[map_idx];
@@ -10934,8 +11006,11 @@ static int bnxt_request_irq(struct bnxt *bp)
if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) {
int numa_node = dev_to_node(&bp->pdev->dev);
+ u16 tag;
irq->have_cpumask = 1;
+ irq->msix_nr = map_idx;
+ irq->ring_nr = i;
cpumask_set_cpu(cpumask_local_spread(i, numa_node),
irq->cpu_mask);
rc = irq_set_affinity_hint(irq->vector, irq->cpu_mask);
@@ -10945,6 +11020,16 @@ static int bnxt_request_irq(struct bnxt *bp)
irq->vector);
break;
}
+
+ bnxt_register_irq_notifier(bp, irq);
+
+ /* Init ST table entry */
+ if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM,
+ cpumask_first(irq->cpu_mask),
+ &tag))
+ continue;
+
+ pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag);
}
}
return rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 69231e85140b..641d25646367 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1227,6 +1227,11 @@ struct bnxt_irq {
u8 have_cpumask:1;
char name[IFNAMSIZ + BNXT_IRQ_NAME_EXTRA];
cpumask_var_t cpu_mask;
+
+ struct bnxt *bp;
+ int msix_nr;
+ int ring_nr;
+ struct irq_affinity_notify affinity_notify;
};
#define HWRM_RING_ALLOC_TX 0x1
@@ -2183,6 +2188,8 @@ struct bnxt {
struct net_device *dev;
struct pci_dev *pdev;
+ u8 tph_mode;
+
atomic_t intr_sem;
u32 flags;
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index e217a5838c87..10e95d7b6892 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -79,3 +79,4 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
return err;
}
+EXPORT_SYMBOL_GPL(netdev_rx_queue_restart);
--
2.45.1
Hi Wei, kernel test robot noticed the following build warnings: [auto build test WARNING on linus/master] [also build test WARNING on next-20240920] [cannot apply to pci/next pci/for-linus v6.11] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Wei-Huang/PCI-Add-TLP-Processing-Hints-TPH-support/20240917-045345 base: linus/master patch link: https://lore.kernel.org/r/20240916205103.3882081-5-wei.huang2%40amd.com patch subject: [PATCH V5 4/5] bnxt_en: Add TPH support in BNXT driver config: i386-allmodconfig (https://download.01.org/0day-ci/archive/20240920/202409201831.ToruGbMs-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240920/202409201831.ToruGbMs-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202409201831.ToruGbMs-lkp@intel.com/ All warnings (new ones prefixed by >>): drivers/net/ethernet/broadcom/bnxt/bnxt.c: In function '__bnxt_irq_affinity_notify': >> drivers/net/ethernet/broadcom/bnxt/bnxt.c:10873:35: warning: variable 'rxr' set but not used [-Wunused-but-set-variable] 10873 | struct bnxt_rx_ring_info *rxr; | ^~~ vim +/rxr +10873 drivers/net/ethernet/broadcom/bnxt/bnxt.c 10869 10870 static void __bnxt_irq_affinity_notify(struct irq_affinity_notify *notify, 10871 const cpumask_t *mask) 10872 { 10873 struct bnxt_rx_ring_info *rxr; 10874 struct bnxt_irq *irq; 10875 u16 tag; 10876 int err; 10877 10878 irq = container_of(notify, struct bnxt_irq, affinity_notify); 10879 cpumask_copy(irq->cpu_mask, mask); 10880 10881 if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM, 10882 cpumask_first(irq->cpu_mask), &tag)) 10883 return; 10884 10885 if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag)) 10886 return; 10887 10888 if (netif_running(irq->bp->dev)) { 10889 rxr = &irq->bp->rx_ring[irq->ring_nr]; 10890 rtnl_lock(); 10891 err = netdev_rx_queue_restart(irq->bp->dev, irq->ring_nr); 10892 if (err) 10893 netdev_err(irq->bp->dev, 10894 "rx queue restart failed: err=%d\n", err); 10895 rtnl_unlock(); 10896 } 10897 } 10898 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Mon, Sep 16, 2024 at 03:51:02PM -0500, Wei Huang wrote: > From: Manoj Panicker <manoj.panicker2@amd.com> > > Implement TPH support in Broadcom BNXT device driver. The driver uses TPH > functions to retrieve and configure the device's Steering Tags when its > interrupt affinity is being changed. With appropriate firmware, we see > sustancial memory bandwidth savings and other benefits using real network > benchmarks. > > Co-developed-by: Somnath Kotur <somnath.kotur@broadcom.com> > Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com> > Co-developed-by: Wei Huang <wei.huang2@amd.com> > Signed-off-by: Wei Huang <wei.huang2@amd.com> > Signed-off-by: Manoj Panicker <manoj.panicker2@amd.com> > Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com> > Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com> > --- > drivers/net/ethernet/broadcom/bnxt/bnxt.c | 85 +++++++++++++++++++++++ > drivers/net/ethernet/broadcom/bnxt/bnxt.h | 7 ++ > net/core/netdev_rx_queue.c | 1 + > 3 files changed, 93 insertions(+) > > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c ... > @@ -10865,6 +10867,63 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init) > return 0; > } > > +static void __bnxt_irq_affinity_notify(struct irq_affinity_notify *notify, > + const cpumask_t *mask) > +{ > + struct bnxt_rx_ring_info *rxr; Hi Wei Huang, A minor nit from my side: rxr is set but otherwise unused in this function. Flagged by x86_64 W=1 builds with gcc-14 and clang-18. > + struct bnxt_irq *irq; > + u16 tag; > + int err; > + > + irq = container_of(notify, struct bnxt_irq, affinity_notify); > + cpumask_copy(irq->cpu_mask, mask); > + > + if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM, > + cpumask_first(irq->cpu_mask), &tag)) > + return; > + > + if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag)) > + return; > + > + if (netif_running(irq->bp->dev)) { > + rxr = &irq->bp->rx_ring[irq->ring_nr]; > + rtnl_lock(); > + err = netdev_rx_queue_restart(irq->bp->dev, irq->ring_nr); > + if (err) > + netdev_err(irq->bp->dev, > + "rx queue restart failed: err=%d\n", err); > + rtnl_unlock(); > + } > +} ...
Hi Bjorn, This patch can not be compiled directly on pci.git tree because it uses netdev_rx_queue_restart() per Broadcom's suggestion. This function was just merged to netdev last week. How could we resolve this double depedency issue? Can you take the first three TPH patches after review and I will send the rest via netdev? Thanks, -Wei On 9/16/24 3:51 PM, Wei Huang wrote: > From: Manoj Panicker <manoj.panicker2@amd.com> > > Implement TPH support in Broadcom BNXT device driver. The driver uses TPH > functions to retrieve and configure the device's Steering Tags when its > interrupt affinity is being changed. With appropriate firmware, we see > sustancial memory bandwidth savings and other benefits using real network > benchmarks. > > Co-developed-by: Somnath Kotur <somnath.kotur@broadcom.com> > Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com> > Co-developed-by: Wei Huang <wei.huang2@amd.com> > Signed-off-by: Wei Huang <wei.huang2@amd.com> > Signed-off-by: Manoj Panicker <manoj.panicker2@amd.com> > Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com> > Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com> > --- > drivers/net/ethernet/broadcom/bnxt/bnxt.c | 85 +++++++++++++++++++++++ > drivers/net/ethernet/broadcom/bnxt/bnxt.h | 7 ++ > net/core/netdev_rx_queue.c | 1 + > 3 files changed, 93 insertions(+) > > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c > index 6e422e24750a..ea0bd25d1efb 100644 > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c > @@ -55,6 +55,8 @@ > #include <net/page_pool/helpers.h> > #include <linux/align.h> > #include <net/netdev_queues.h> > +#include <net/netdev_rx_queue.h> > +#include <linux/pci-tph.h> > > #include "bnxt_hsi.h" > #include "bnxt.h" > @@ -10865,6 +10867,63 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init) > return 0; > } > > +static void __bnxt_irq_affinity_notify(struct irq_affinity_notify *notify, > + const cpumask_t *mask) > +{ > + struct bnxt_rx_ring_info *rxr; > + struct bnxt_irq *irq; > + u16 tag; > + int err; > + > + irq = container_of(notify, struct bnxt_irq, affinity_notify); > + cpumask_copy(irq->cpu_mask, mask); > + > + if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM, > + cpumask_first(irq->cpu_mask), &tag)) > + return; > + > + if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag)) > + return; > + > + if (netif_running(irq->bp->dev)) { > + rxr = &irq->bp->rx_ring[irq->ring_nr]; > + rtnl_lock(); > + err = netdev_rx_queue_restart(irq->bp->dev, irq->ring_nr); > + if (err) > + netdev_err(irq->bp->dev, > + "rx queue restart failed: err=%d\n", err); > + rtnl_unlock(); > + } > +} > + > +static void __bnxt_irq_affinity_release(struct kref __always_unused *ref) > +{ > +} > + > +static void bnxt_release_irq_notifier(struct bnxt_irq *irq) > +{ > + irq_set_affinity_notifier(irq->vector, NULL); > +} > + > +static void bnxt_register_irq_notifier(struct bnxt *bp, struct bnxt_irq *irq) > +{ > + struct irq_affinity_notify *notify; > + > + /* Nothing to do if TPH is not enabled */ > + if (!bp->tph_mode) > + return; > + > + irq->bp = bp; > + > + /* Register IRQ affinity notifier */ > + notify = &irq->affinity_notify; > + notify->irq = irq->vector; > + notify->notify = __bnxt_irq_affinity_notify; > + notify->release = __bnxt_irq_affinity_release; > + > + irq_set_affinity_notifier(irq->vector, notify); > +} > + > static void bnxt_free_irq(struct bnxt *bp) > { > struct bnxt_irq *irq; > @@ -10887,11 +10946,18 @@ static void bnxt_free_irq(struct bnxt *bp) > free_cpumask_var(irq->cpu_mask); > irq->have_cpumask = 0; > } > + > + bnxt_release_irq_notifier(irq); > + > free_irq(irq->vector, bp->bnapi[i]); > } > > irq->requested = 0; > } > + > + /* Disable TPH support */ > + pcie_disable_tph(bp->pdev); > + bp->tph_mode = 0; > } > > static int bnxt_request_irq(struct bnxt *bp) > @@ -10911,6 +10977,12 @@ static int bnxt_request_irq(struct bnxt *bp) > #ifdef CONFIG_RFS_ACCEL > rmap = bp->dev->rx_cpu_rmap; > #endif > + > + /* Enable TPH support as part of IRQ request */ > + rc = pcie_enable_tph(bp->pdev, PCI_TPH_ST_IV_MODE); > + if (!rc) > + bp->tph_mode = PCI_TPH_ST_IV_MODE; > + > for (i = 0, j = 0; i < bp->cp_nr_rings; i++) { > int map_idx = bnxt_cp_num_to_irq_num(bp, i); > struct bnxt_irq *irq = &bp->irq_tbl[map_idx]; > @@ -10934,8 +11006,11 @@ static int bnxt_request_irq(struct bnxt *bp) > > if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) { > int numa_node = dev_to_node(&bp->pdev->dev); > + u16 tag; > > irq->have_cpumask = 1; > + irq->msix_nr = map_idx; > + irq->ring_nr = i; > cpumask_set_cpu(cpumask_local_spread(i, numa_node), > irq->cpu_mask); > rc = irq_set_affinity_hint(irq->vector, irq->cpu_mask); > @@ -10945,6 +11020,16 @@ static int bnxt_request_irq(struct bnxt *bp) > irq->vector); > break; > } > + > + bnxt_register_irq_notifier(bp, irq); > + > + /* Init ST table entry */ > + if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM, > + cpumask_first(irq->cpu_mask), > + &tag)) > + continue; > + > + pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag); > } > } > return rc; > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h > index 69231e85140b..641d25646367 100644 > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h > @@ -1227,6 +1227,11 @@ struct bnxt_irq { > u8 have_cpumask:1; > char name[IFNAMSIZ + BNXT_IRQ_NAME_EXTRA]; > cpumask_var_t cpu_mask; > + > + struct bnxt *bp; > + int msix_nr; > + int ring_nr; > + struct irq_affinity_notify affinity_notify; > }; > > #define HWRM_RING_ALLOC_TX 0x1 > @@ -2183,6 +2188,8 @@ struct bnxt { > struct net_device *dev; > struct pci_dev *pdev; > > + u8 tph_mode; > + > atomic_t intr_sem; > > u32 flags; > diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c > index e217a5838c87..10e95d7b6892 100644 > --- a/net/core/netdev_rx_queue.c > +++ b/net/core/netdev_rx_queue.c > @@ -79,3 +79,4 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) > > return err; > } > +EXPORT_SYMBOL_GPL(netdev_rx_queue_restart);
On Mon, Sep 16, 2024 at 04:25:03PM -0500, Wei Huang wrote: > This patch can not be compiled directly on pci.git tree because it uses > netdev_rx_queue_restart() per Broadcom's suggestion. This function was just > merged to netdev last week. > > How could we resolve this double depedency issue? Can you take the first > three TPH patches after review and I will send the rest via netdev? Just rebase this series on v6.12-rc1 when it is tagged this Sunday and resubmit. pci.git is usually based on the latest rc1 release. Thanks, Lukas
On 9/23/24 2:25 AM, Lukas Wunner wrote: > On Mon, Sep 16, 2024 at 04:25:03PM -0500, Wei Huang wrote: >> This patch can not be compiled directly on pci.git tree because it uses >> netdev_rx_queue_restart() per Broadcom's suggestion. This function was just >> merged to netdev last week. >> >> How could we resolve this double depedency issue? Can you take the first >> three TPH patches after review and I will send the rest via netdev? > > Just rebase this series on v6.12-rc1 when it is tagged this Sunday > and resubmit. pci.git is usually based on the latest rc1 release. Thanks for the advice - will do it as suggested. > > Thanks, > > Lukas
© 2016 - 2024 Red Hat, Inc.