ntb_transport can now scale throughput across multiple queue pairs when
remote eDMA is enabled on ntb_transport (use_remote_edma=1).
Teach ntb_netdev to allocate multiple ntb_transport queue pairs and
expose them as a multi-queue net_device. In particular, when remote eDMA
is enabled, each queue pair can be serviced in parallel by the eDMA
engine.
With this patch, up to N queue pairs are created, where N is chosen as
follows:
- By default, N is num_online_cpus(), to give each CPU its own queue.
- If the ntb_num_queues module parameter is non-zero, it overrides the
default and requests that many queues.
- In both cases the requested value is capped at a fixed upper bound
to avoid unbounded allocations, and by the number of queue pairs
actually available from ntb_transport.
If only one queue pair can be created (or ntb_num_queues=1 is set), the
driver effectively falls back to the previous single-queue behaviour.
Signed-off-by: Koichiro Den <den@valinux.co.jp>
---
drivers/net/ntb_netdev.c | 341 ++++++++++++++++++++++++++++-----------
1 file changed, 243 insertions(+), 98 deletions(-)
diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index fbeae05817e9..7aeca35b46c5 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -53,6 +53,8 @@
#include <linux/pci.h>
#include <linux/ntb.h>
#include <linux/ntb_transport.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
#define NTB_NETDEV_VER "0.7"
@@ -70,26 +72,84 @@ static unsigned int tx_start = 10;
/* Number of descriptors still available before stop upper layer tx */
static unsigned int tx_stop = 5;
+/*
+ * Upper bound on how many queue pairs we will try to create even if
+ * ntb_num_queues or num_online_cpus() is very large. This is an
+ * arbitrary safety cap to avoid unbounded allocations.
+ */
+#define NTB_NETDEV_MAX_QUEUES 64
+
+/*
+ * ntb_num_queues == 0 (default) means:
+ * - use num_online_cpus() as the desired queue count, capped by
+ * NTB_NETDEV_MAX_QUEUES.
+ * ntb_num_queues > 0:
+ * - try to create exactly ntb_num_queues queue pairs (again capped
+ * by NTB_NETDEV_MAX_QUEUES), but fall back to the number of queue
+ * pairs actually available from ntb_transport.
+ */
+static unsigned int ntb_num_queues;
+module_param(ntb_num_queues, uint, 0644);
+MODULE_PARM_DESC(ntb_num_queues,
+ "Number of NTB netdev queue pairs to use (0 = per-CPU)");
+
+struct ntb_netdev;
+
+struct ntb_netdev_queue {
+ struct ntb_netdev *ntdev;
+ struct ntb_transport_qp *qp;
+ struct timer_list tx_timer;
+ u16 qid;
+};
+
struct ntb_netdev {
struct pci_dev *pdev;
struct net_device *ndev;
- struct ntb_transport_qp *qp;
- struct timer_list tx_timer;
+ unsigned int num_queues;
+ struct ntb_netdev_queue *queues;
};
#define NTB_TX_TIMEOUT_MS 1000
#define NTB_RXQ_SIZE 100
+static unsigned int ntb_netdev_default_queues(void)
+{
+ unsigned int n;
+
+ if (ntb_num_queues)
+ n = ntb_num_queues;
+ else
+ n = num_online_cpus();
+
+ if (!n)
+ n = 1;
+
+ if (n > NTB_NETDEV_MAX_QUEUES)
+ n = NTB_NETDEV_MAX_QUEUES;
+
+ return n;
+}
+
static void ntb_netdev_event_handler(void *data, int link_is_up)
{
- struct net_device *ndev = data;
- struct ntb_netdev *dev = netdev_priv(ndev);
+ struct ntb_netdev_queue *q = data;
+ struct ntb_netdev *dev = q->ntdev;
+ struct net_device *ndev = dev->ndev;
+ bool any_up = false;
+ unsigned int i;
- netdev_dbg(ndev, "Event %x, Link %x\n", link_is_up,
- ntb_transport_link_query(dev->qp));
+ netdev_dbg(ndev, "Event %x, Link %x, qp %u\n", link_is_up,
+ ntb_transport_link_query(q->qp), q->qid);
if (link_is_up) {
- if (ntb_transport_link_query(dev->qp))
+ for (i = 0; i < dev->num_queues; i++) {
+ if (ntb_transport_link_query(dev->queues[i].qp)) {
+ any_up = true;
+ break;
+ }
+ }
+
+ if (any_up)
netif_carrier_on(ndev);
} else {
netif_carrier_off(ndev);
@@ -99,7 +159,9 @@ static void ntb_netdev_event_handler(void *data, int link_is_up)
static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len)
{
- struct net_device *ndev = qp_data;
+ struct ntb_netdev_queue *q = qp_data;
+ struct ntb_netdev *dev = q->ntdev;
+ struct net_device *ndev = dev->ndev;
struct sk_buff *skb;
int rc;
@@ -135,7 +197,8 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
}
enqueue_again:
- rc = ntb_transport_rx_enqueue(qp, skb, skb->data, ndev->mtu + ETH_HLEN);
+ rc = ntb_transport_rx_enqueue(q->qp, skb, skb->data,
+ ndev->mtu + ETH_HLEN);
if (rc) {
dev_kfree_skb_any(skb);
ndev->stats.rx_errors++;
@@ -143,42 +206,37 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
}
}
-static int __ntb_netdev_maybe_stop_tx(struct net_device *netdev,
- struct ntb_transport_qp *qp, int size)
+static int ntb_netdev_maybe_stop_tx(struct ntb_netdev_queue *q, int size)
{
- struct ntb_netdev *dev = netdev_priv(netdev);
+ struct net_device *ndev = q->ntdev->ndev;
+
+ if (ntb_transport_tx_free_entry(q->qp) >= size)
+ return 0;
+
+ netif_stop_subqueue(ndev, q->qid);
- netif_stop_queue(netdev);
/* Make sure to see the latest value of ntb_transport_tx_free_entry()
* since the queue was last started.
*/
smp_mb();
- if (likely(ntb_transport_tx_free_entry(qp) < size)) {
- mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time));
+ if (likely(ntb_transport_tx_free_entry(q->qp) < size)) {
+ mod_timer(&q->tx_timer, jiffies + usecs_to_jiffies(tx_time));
return -EBUSY;
}
- netif_start_queue(netdev);
- return 0;
-}
-
-static int ntb_netdev_maybe_stop_tx(struct net_device *ndev,
- struct ntb_transport_qp *qp, int size)
-{
- if (netif_queue_stopped(ndev) ||
- (ntb_transport_tx_free_entry(qp) >= size))
- return 0;
+ netif_wake_subqueue(ndev, q->qid);
- return __ntb_netdev_maybe_stop_tx(ndev, qp, size);
+ return 0;
}
static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len)
{
- struct net_device *ndev = qp_data;
+ struct ntb_netdev_queue *q = qp_data;
+ struct ntb_netdev *dev = q->ntdev;
+ struct net_device *ndev = dev->ndev;
struct sk_buff *skb;
- struct ntb_netdev *dev = netdev_priv(ndev);
skb = data;
if (!skb || !ndev)
@@ -194,13 +252,12 @@ static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
dev_kfree_skb_any(skb);
- if (ntb_transport_tx_free_entry(dev->qp) >= tx_start) {
+ if (ntb_transport_tx_free_entry(qp) >= tx_start) {
/* Make sure anybody stopping the queue after this sees the new
* value of ntb_transport_tx_free_entry()
*/
smp_mb();
- if (netif_queue_stopped(ndev))
- netif_wake_queue(ndev);
+ netif_wake_subqueue(ndev, q->qid);
}
}
@@ -208,16 +265,26 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb,
struct net_device *ndev)
{
struct ntb_netdev *dev = netdev_priv(ndev);
+ u16 qid = skb_get_queue_mapping(skb);
+ struct ntb_netdev_queue *q;
int rc;
- ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop);
+ if (unlikely(!dev->num_queues))
+ goto err;
+
+ if (unlikely(qid >= dev->num_queues))
+ qid = qid % dev->num_queues;
- rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len);
+ q = &dev->queues[qid];
+
+ ntb_netdev_maybe_stop_tx(q, tx_stop);
+
+ rc = ntb_transport_tx_enqueue(q->qp, skb, skb->data, skb->len);
if (rc)
goto err;
/* check for next submit */
- ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop);
+ ntb_netdev_maybe_stop_tx(q, tx_stop);
return NETDEV_TX_OK;
@@ -229,80 +296,103 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb,
static void ntb_netdev_tx_timer(struct timer_list *t)
{
- struct ntb_netdev *dev = timer_container_of(dev, t, tx_timer);
+ struct ntb_netdev_queue *q = container_of(t, struct ntb_netdev_queue, tx_timer);
+ struct ntb_netdev *dev = q->ntdev;
struct net_device *ndev = dev->ndev;
- if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) {
- mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time));
+ if (ntb_transport_tx_free_entry(q->qp) < tx_stop) {
+ mod_timer(&q->tx_timer, jiffies + usecs_to_jiffies(tx_time));
} else {
- /* Make sure anybody stopping the queue after this sees the new
+ /*
+ * Make sure anybody stopping the queue after this sees the new
* value of ntb_transport_tx_free_entry()
*/
smp_mb();
- if (netif_queue_stopped(ndev))
- netif_wake_queue(ndev);
+ netif_wake_subqueue(ndev, q->qid);
}
}
static int ntb_netdev_open(struct net_device *ndev)
{
struct ntb_netdev *dev = netdev_priv(ndev);
+ struct ntb_netdev_queue *queue;
struct sk_buff *skb;
- int rc, i, len;
-
- /* Add some empty rx bufs */
- for (i = 0; i < NTB_RXQ_SIZE; i++) {
- skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
- if (!skb) {
- rc = -ENOMEM;
- goto err;
- }
+ int rc = 0, i, len;
+ unsigned int q;
- rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
- ndev->mtu + ETH_HLEN);
- if (rc) {
- dev_kfree_skb(skb);
- goto err;
+ /* Add some empty rx bufs for each queue */
+ for (q = 0; q < dev->num_queues; q++) {
+ queue = &dev->queues[q];
+
+ for (i = 0; i < NTB_RXQ_SIZE; i++) {
+ skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
+ if (!skb) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ rc = ntb_transport_rx_enqueue(queue->qp, skb, skb->data,
+ ndev->mtu + ETH_HLEN);
+ if (rc) {
+ dev_kfree_skb(skb);
+ goto err;
+ }
}
- }
- timer_setup(&dev->tx_timer, ntb_netdev_tx_timer, 0);
+ timer_setup(&queue->tx_timer, ntb_netdev_tx_timer, 0);
+ }
netif_carrier_off(ndev);
- ntb_transport_link_up(dev->qp);
- netif_start_queue(ndev);
+
+ for (q = 0; q < dev->num_queues; q++)
+ ntb_transport_link_up(dev->queues[q].qp);
+
+ netif_tx_start_all_queues(ndev);
return 0;
err:
- while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
- dev_kfree_skb(skb);
+ for (q = 0; q < dev->num_queues; q++) {
+ queue = &dev->queues[q];
+
+ while ((skb = ntb_transport_rx_remove(queue->qp, &len)))
+ dev_kfree_skb(skb);
+ }
return rc;
}
static int ntb_netdev_close(struct net_device *ndev)
{
struct ntb_netdev *dev = netdev_priv(ndev);
+ struct ntb_netdev_queue *queue;
struct sk_buff *skb;
+ unsigned int q;
int len;
- ntb_transport_link_down(dev->qp);
+ netif_tx_stop_all_queues(ndev);
+
+ for (q = 0; q < dev->num_queues; q++) {
+ queue = &dev->queues[q];
- while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
- dev_kfree_skb(skb);
+ ntb_transport_link_down(queue->qp);
- timer_delete_sync(&dev->tx_timer);
+ while ((skb = ntb_transport_rx_remove(queue->qp, &len)))
+ dev_kfree_skb(skb);
+ timer_delete_sync(&queue->tx_timer);
+ }
return 0;
}
static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu)
{
struct ntb_netdev *dev = netdev_priv(ndev);
+ struct ntb_netdev_queue *queue;
struct sk_buff *skb;
- int len, rc;
+ unsigned int q, i;
+ int len, rc = 0;
- if (new_mtu > ntb_transport_max_size(dev->qp) - ETH_HLEN)
+ if (new_mtu > ntb_transport_max_size(dev->queues[0].qp) - ETH_HLEN)
return -EINVAL;
if (!netif_running(ndev)) {
@@ -311,41 +401,54 @@ static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu)
}
/* Bring down the link and dispose of posted rx entries */
- ntb_transport_link_down(dev->qp);
+ for (q = 0; q < dev->num_queues; q++)
+ ntb_transport_link_down(dev->queues[0].qp);
if (ndev->mtu < new_mtu) {
- int i;
-
- for (i = 0; (skb = ntb_transport_rx_remove(dev->qp, &len)); i++)
- dev_kfree_skb(skb);
+ for (q = 0; q < dev->num_queues; q++) {
+ queue = &dev->queues[q];
- for (; i; i--) {
- skb = netdev_alloc_skb(ndev, new_mtu + ETH_HLEN);
- if (!skb) {
- rc = -ENOMEM;
- goto err;
- }
-
- rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
- new_mtu + ETH_HLEN);
- if (rc) {
+ for (i = 0;
+ (skb = ntb_transport_rx_remove(queue->qp, &len));
+ i++)
dev_kfree_skb(skb);
- goto err;
+
+ for (; i; i--) {
+ skb = netdev_alloc_skb(ndev,
+ new_mtu + ETH_HLEN);
+ if (!skb) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ rc = ntb_transport_rx_enqueue(queue->qp, skb,
+ skb->data,
+ new_mtu +
+ ETH_HLEN);
+ if (rc) {
+ dev_kfree_skb(skb);
+ goto err;
+ }
}
}
}
WRITE_ONCE(ndev->mtu, new_mtu);
- ntb_transport_link_up(dev->qp);
+ for (q = 0; q < dev->num_queues; q++)
+ ntb_transport_link_up(dev->queues[q].qp);
return 0;
err:
- ntb_transport_link_down(dev->qp);
+ for (q = 0; q < dev->num_queues; q++) {
+ struct ntb_netdev_queue *queue = &dev->queues[q];
+
+ ntb_transport_link_down(queue->qp);
- while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
- dev_kfree_skb(skb);
+ while ((skb = ntb_transport_rx_remove(queue->qp, &len)))
+ dev_kfree_skb(skb);
+ }
netdev_err(ndev, "Error changing MTU, device inoperable\n");
return rc;
@@ -404,6 +507,7 @@ static int ntb_netdev_probe(struct device *client_dev)
struct net_device *ndev;
struct pci_dev *pdev;
struct ntb_netdev *dev;
+ unsigned int q, desired_queues;
int rc;
ntb = dev_ntb(client_dev->parent);
@@ -411,7 +515,9 @@ static int ntb_netdev_probe(struct device *client_dev)
if (!pdev)
return -ENODEV;
- ndev = alloc_etherdev(sizeof(*dev));
+ desired_queues = ntb_netdev_default_queues();
+
+ ndev = alloc_etherdev_mq(sizeof(*dev), desired_queues);
if (!ndev)
return -ENOMEM;
@@ -420,6 +526,15 @@ static int ntb_netdev_probe(struct device *client_dev)
dev = netdev_priv(ndev);
dev->ndev = ndev;
dev->pdev = pdev;
+ dev->num_queues = 0;
+
+ dev->queues = kcalloc(desired_queues, sizeof(*dev->queues),
+ GFP_KERNEL);
+ if (!dev->queues) {
+ rc = -ENOMEM;
+ goto err_free_netdev;
+ }
+
ndev->features = NETIF_F_HIGHDMA;
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
@@ -436,26 +551,51 @@ static int ntb_netdev_probe(struct device *client_dev)
ndev->min_mtu = 0;
ndev->max_mtu = ETH_MAX_MTU;
- dev->qp = ntb_transport_create_queue(ndev, client_dev,
- &ntb_netdev_handlers);
- if (!dev->qp) {
+ for (q = 0; q < desired_queues; q++) {
+ struct ntb_netdev_queue *queue = &dev->queues[q];
+
+ queue->ntdev = dev;
+ queue->qid = q;
+ queue->qp = ntb_transport_create_queue(queue, client_dev,
+ &ntb_netdev_handlers);
+ if (!queue->qp)
+ break;
+
+ dev->num_queues++;
+ }
+
+ if (!dev->num_queues) {
rc = -EIO;
- goto err;
+ goto err_free_queues;
}
- ndev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN;
+ rc = netif_set_real_num_tx_queues(ndev, dev->num_queues);
+ if (rc)
+ goto err_free_qps;
+
+ rc = netif_set_real_num_rx_queues(ndev, dev->num_queues);
+ if (rc)
+ goto err_free_qps;
+
+ ndev->mtu = ntb_transport_max_size(dev->queues[0].qp) - ETH_HLEN;
rc = register_netdev(ndev);
if (rc)
- goto err1;
+ goto err_free_qps;
dev_set_drvdata(client_dev, ndev);
- dev_info(&pdev->dev, "%s created\n", ndev->name);
+ dev_info(&pdev->dev, "%s created with %u queue pairs\n",
+ ndev->name, dev->num_queues);
return 0;
-err1:
- ntb_transport_free_queue(dev->qp);
-err:
+err_free_qps:
+ for (q = 0; q < dev->num_queues; q++)
+ ntb_transport_free_queue(dev->queues[q].qp);
+
+err_free_queues:
+ kfree(dev->queues);
+
+err_free_netdev:
free_netdev(ndev);
return rc;
}
@@ -464,9 +604,14 @@ static void ntb_netdev_remove(struct device *client_dev)
{
struct net_device *ndev = dev_get_drvdata(client_dev);
struct ntb_netdev *dev = netdev_priv(ndev);
+ unsigned int q;
+
unregister_netdev(ndev);
- ntb_transport_free_queue(dev->qp);
+ for (q = 0; q < dev->num_queues; q++)
+ ntb_transport_free_queue(dev->queues[q].qp);
+
+ kfree(dev->queues);
free_netdev(ndev);
}
--
2.48.1