Link queues to NAPI instances via netdev-genl API so that users can
query this information with netlink. Handle a few cases in the driver:
1. Link/unlink the NAPIs when XDP is enabled/disabled
2. Handle IGC_FLAG_QUEUE_PAIRS enabled and disabled
Example output when IGC_FLAG_QUEUE_PAIRS is enabled:
$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
--dump queue-get --json='{"ifindex": 2}'
[{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'rx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'rx'},
{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'tx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}]
Since IGC_FLAG_QUEUE_PAIRS is enabled, you'll note that the same NAPI ID
is present for both rx and tx queues at the same index, for example
index 0:
{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'},
To test IGC_FLAG_QUEUE_PAIRS disabled, a test system was booted using
the grub command line option "maxcpus=2" to force
igc_set_interrupt_capability to disable IGC_FLAG_QUEUE_PAIRS.
Example output when IGC_FLAG_QUEUE_PAIRS is disabled:
$ lscpu | grep "On-line CPU"
On-line CPU(s) list: 0,2
$ ethtool -l enp86s0 | tail -5
Current hardware settings:
RX: n/a
TX: n/a
Other: 1
Combined: 2
$ cat /proc/interrupts | grep enp
144: [...] enp86s0
145: [...] enp86s0-rx-0
146: [...] enp86s0-rx-1
147: [...] enp86s0-tx-0
148: [...] enp86s0-tx-1
1 "other" IRQ, and 2 IRQs for each of RX and Tx, so we expect netlink to
report 4 IRQs with unique NAPI IDs:
$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
--dump napi-get --json='{"ifindex": 2}'
[{'id': 8196, 'ifindex': 2, 'irq': 148},
{'id': 8195, 'ifindex': 2, 'irq': 147},
{'id': 8194, 'ifindex': 2, 'irq': 146},
{'id': 8193, 'ifindex': 2, 'irq': 145}]
Now we examine which queues these NAPIs are associated with, expecting
that since IGC_FLAG_QUEUE_PAIRS is disabled each RX and TX queue will
have its own NAPI instance:
$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
--dump queue-get --json='{"ifindex": 2}'
[{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'},
{'id': 0, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}]
Signed-off-by: Joe Damato <jdamato@fastly.com>
---
v3:
- Replace igc_unset_queue_napi with igc_set_queue_napi(adapater, i,
NULL), as suggested by Vinicius Costa Gomes
- Simplify implemention of igc_set_queue_napi as suggested by Kurt
Kanzenbach, with a tweak to use ring->queue_index
v2:
- Update commit message to include tests for IGC_FLAG_QUEUE_PAIRS
disabled
- Refactored code to move napi queue mapping and unmapping to helper
functions igc_set_queue_napi and igc_unset_queue_napi
- Adjust the code to handle IGC_FLAG_QUEUE_PAIRS disabled
- Call helpers to map/unmap queues to NAPIs in igc_up, __igc_open,
igc_xdp_enable_pool, and igc_xdp_disable_pool
drivers/net/ethernet/intel/igc/igc.h | 2 ++
drivers/net/ethernet/intel/igc/igc_main.c | 33 ++++++++++++++++++++---
drivers/net/ethernet/intel/igc/igc_xdp.c | 2 ++
3 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index eac0f966e0e4..b8111ad9a9a8 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -337,6 +337,8 @@ struct igc_adapter {
struct igc_led_classdev *leds;
};
+void igc_set_queue_napi(struct igc_adapter *adapter, int q_idx,
+ struct napi_struct *napi);
void igc_up(struct igc_adapter *adapter);
void igc_down(struct igc_adapter *adapter);
int igc_open(struct net_device *netdev);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 7964bbedb16c..783fc8e12ba1 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4948,6 +4948,22 @@ static int igc_sw_init(struct igc_adapter *adapter)
return 0;
}
+void igc_set_queue_napi(struct igc_adapter *adapter, int vector,
+ struct napi_struct *napi)
+{
+ struct igc_q_vector *q_vector = adapter->q_vector[vector];
+
+ if (q_vector->rx.ring)
+ netif_queue_set_napi(adapter->netdev,
+ q_vector->rx.ring->queue_index,
+ NETDEV_QUEUE_TYPE_RX, napi);
+
+ if (q_vector->tx.ring)
+ netif_queue_set_napi(adapter->netdev,
+ q_vector->tx.ring->queue_index,
+ NETDEV_QUEUE_TYPE_TX, napi);
+}
+
/**
* igc_up - Open the interface and prepare it to handle traffic
* @adapter: board private structure
@@ -4955,6 +4971,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
void igc_up(struct igc_adapter *adapter)
{
struct igc_hw *hw = &adapter->hw;
+ struct napi_struct *napi;
int i = 0;
/* hardware has been reset, we need to reload some things */
@@ -4962,8 +4979,11 @@ void igc_up(struct igc_adapter *adapter)
clear_bit(__IGC_DOWN, &adapter->state);
- for (i = 0; i < adapter->num_q_vectors; i++)
- napi_enable(&adapter->q_vector[i]->napi);
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ napi = &adapter->q_vector[i]->napi;
+ napi_enable(napi);
+ igc_set_queue_napi(adapter, i, napi);
+ }
if (adapter->msix_entries)
igc_configure_msix(adapter);
@@ -5192,6 +5212,7 @@ void igc_down(struct igc_adapter *adapter)
for (i = 0; i < adapter->num_q_vectors; i++) {
if (adapter->q_vector[i]) {
napi_synchronize(&adapter->q_vector[i]->napi);
+ igc_set_queue_napi(adapter, i, NULL);
napi_disable(&adapter->q_vector[i]->napi);
}
}
@@ -6021,6 +6042,7 @@ static int __igc_open(struct net_device *netdev, bool resuming)
struct igc_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = adapter->pdev;
struct igc_hw *hw = &adapter->hw;
+ struct napi_struct *napi;
int err = 0;
int i = 0;
@@ -6056,8 +6078,11 @@ static int __igc_open(struct net_device *netdev, bool resuming)
clear_bit(__IGC_DOWN, &adapter->state);
- for (i = 0; i < adapter->num_q_vectors; i++)
- napi_enable(&adapter->q_vector[i]->napi);
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ napi = &adapter->q_vector[i]->napi;
+ napi_enable(napi);
+ igc_set_queue_napi(adapter, i, napi);
+ }
/* Clear any pending interrupts. */
rd32(IGC_ICR);
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c
index e27af72aada8..4da633430b80 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.c
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.c
@@ -84,6 +84,7 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter,
napi_disable(napi);
}
+ igc_set_queue_napi(adapter, queue_id, NULL);
set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
set_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
@@ -133,6 +134,7 @@ static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
+ igc_set_queue_napi(adapter, queue_id, napi);
if (needs_reset) {
napi_enable(napi);
--
2.25.1
On Fri, Oct 18, 2024 at 05:13:43PM +0000, Joe Damato wrote: > Link queues to NAPI instances via netdev-genl API so that users can > query this information with netlink. Handle a few cases in the driver: > 1. Link/unlink the NAPIs when XDP is enabled/disabled > 2. Handle IGC_FLAG_QUEUE_PAIRS enabled and disabled > > Example output when IGC_FLAG_QUEUE_PAIRS is enabled: > > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \ > --dump queue-get --json='{"ifindex": 2}' > > [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'}, > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'}, > {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'rx'}, > {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'rx'}, > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'}, > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'tx'}, > {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'}, > {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}] > > Since IGC_FLAG_QUEUE_PAIRS is enabled, you'll note that the same NAPI ID > is present for both rx and tx queues at the same index, for example > index 0: > > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'}, > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'}, > > To test IGC_FLAG_QUEUE_PAIRS disabled, a test system was booted using > the grub command line option "maxcpus=2" to force > igc_set_interrupt_capability to disable IGC_FLAG_QUEUE_PAIRS. > > Example output when IGC_FLAG_QUEUE_PAIRS is disabled: > > $ lscpu | grep "On-line CPU" > On-line CPU(s) list: 0,2 > > $ ethtool -l enp86s0 | tail -5 > Current hardware settings: > RX: n/a > TX: n/a > Other: 1 > Combined: 2 > > $ cat /proc/interrupts | grep enp > 144: [...] enp86s0 > 145: [...] enp86s0-rx-0 > 146: [...] enp86s0-rx-1 > 147: [...] enp86s0-tx-0 > 148: [...] enp86s0-tx-1 > > 1 "other" IRQ, and 2 IRQs for each of RX and Tx, so we expect netlink to > report 4 IRQs with unique NAPI IDs: > > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \ > --dump napi-get --json='{"ifindex": 2}' > [{'id': 8196, 'ifindex': 2, 'irq': 148}, > {'id': 8195, 'ifindex': 2, 'irq': 147}, > {'id': 8194, 'ifindex': 2, 'irq': 146}, > {'id': 8193, 'ifindex': 2, 'irq': 145}] > > Now we examine which queues these NAPIs are associated with, expecting > that since IGC_FLAG_QUEUE_PAIRS is disabled each RX and TX queue will > have its own NAPI instance: > > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \ > --dump queue-get --json='{"ifindex": 2}' > [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'}, > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'}, > {'id': 0, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'}, > {'id': 1, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}] > > Signed-off-by: Joe Damato <jdamato@fastly.com> > --- > v3: > - Replace igc_unset_queue_napi with igc_set_queue_napi(adapater, i, > NULL), as suggested by Vinicius Costa Gomes > - Simplify implemention of igc_set_queue_napi as suggested by Kurt > Kanzenbach, with a tweak to use ring->queue_index > > v2: > - Update commit message to include tests for IGC_FLAG_QUEUE_PAIRS > disabled > - Refactored code to move napi queue mapping and unmapping to helper > functions igc_set_queue_napi and igc_unset_queue_napi > - Adjust the code to handle IGC_FLAG_QUEUE_PAIRS disabled > - Call helpers to map/unmap queues to NAPIs in igc_up, __igc_open, > igc_xdp_enable_pool, and igc_xdp_disable_pool > > drivers/net/ethernet/intel/igc/igc.h | 2 ++ > drivers/net/ethernet/intel/igc/igc_main.c | 33 ++++++++++++++++++++--- > drivers/net/ethernet/intel/igc/igc_xdp.c | 2 ++ > 3 files changed, 33 insertions(+), 4 deletions(-) I took another look at this to make sure that RTNL is held when igc_set_queue_napi is called after the e1000 bug report came in [1], and there may be two locations I've missed: 1. igc_resume, which calls __igc_open 2. igc_io_error_detected, which calls igc_down In both cases, I think the code can be modified to hold rtnl around calls to __igc_open and igc_down. Let me know what you think ? If you agree that I should hold rtnl in both of those cases, what is the best way to proceed: - send a v4, or - wait for this to get merged (since I got the notification it was pulled into intel-next) and send a fixes ? Here's the full analysis I came up with; I tried to be thorough, but it is certainly possible I missed a call site: For the up case: - igc_up: - called from igc_reinit_locked, which is called via: - igc_reset_task (rtnl is held) - igc_set_features (ndo_set_features, which itself has an ASSERT_RTNL) - various places in igc_ethtool (set_priv_flags, nway_reset, ethtool_set_eee) all of which have RTNL held - igc_change_mtu which also has RTNL held - __igc_open - called from igc_resume, which may need an rtnl_lock ? - igc_open - called from igc_io_resume, rtnl is held - called from igc_reinit_queues, only via ethool set_channels, where rtnl is held - ndo_open where rtnl is held For the down case: - igc_down: - called from various ethtool locations (set_ringparam, set_pauseparam, set_link_ksettings) all of which hold rtnl - called from igc_io_error_detected, which may need an rtnl_lock - igc_reinit_locked which is fine, as described above - igc_change_mtu which is fine, as described above - called from __igc_close - called from __igc_shutdown which holds rtnl - called from igc_reinit_queues which is fine as described above - called from igc_close which is ndo_close
On 10/22/2024 1:28 PM, Joe Damato wrote: > I took another look at this to make sure that RTNL is held when > igc_set_queue_napi is called after the e1000 bug report came in [1], > and there may be two locations I've missed: > > 1. igc_resume, which calls __igc_open > 2. igc_io_error_detected, which calls igc_down > > In both cases, I think the code can be modified to hold rtnl around > calls to __igc_open and igc_down. > > Let me know what you think ? > > If you agree that I should hold rtnl in both of those cases, what is > the best way to proceed: > - send a v4, or > - wait for this to get merged (since I got the notification it was > pulled into intel-next) and send a fixes ? > Intel-next uses a stacked set of patches which we then send in batches via PRs as they pass our internal testing. We can drop the v3 and await v4. > Here's the full analysis I came up with; I tried to be thorough, but > it is certainly possible I missed a call site: > > For the up case: > > - igc_up: > - called from igc_reinit_locked, which is called via: > - igc_reset_task (rtnl is held) > - igc_set_features (ndo_set_features, which itself has an ASSERT_RTNL) > - various places in igc_ethtool (set_priv_flags, nway_reset, > ethtool_set_eee) all of which have RTNL held > - igc_change_mtu which also has RTNL held > - __igc_open > - called from igc_resume, which may need an rtnl_lock ? > - igc_open > - called from igc_io_resume, rtnl is held > - called from igc_reinit_queues, only via ethool set_channels, > where rtnl is held > - ndo_open where rtnl is held > > For the down case: > > - igc_down: > - called from various ethtool locations (set_ringparam, > set_pauseparam, set_link_ksettings) all of which hold rtnl > - called from igc_io_error_detected, which may need an rtnl_lock > - igc_reinit_locked which is fine, as described above > - igc_change_mtu which is fine, as described above > - called from __igc_close > - called from __igc_shutdown which holds rtnl > - called from igc_reinit_queues which is fine as described above > - called from igc_close which is ndo_close This analysis looks complete to me.
On Tue, Oct 22, 2024 at 01:53:01PM -0700, Jacob Keller wrote: > > > On 10/22/2024 1:28 PM, Joe Damato wrote: > > I took another look at this to make sure that RTNL is held when > > igc_set_queue_napi is called after the e1000 bug report came in [1], > > and there may be two locations I've missed: > > > > 1. igc_resume, which calls __igc_open > > 2. igc_io_error_detected, which calls igc_down > > > > In both cases, I think the code can be modified to hold rtnl around > > calls to __igc_open and igc_down. > > > > Let me know what you think ? > > > > If you agree that I should hold rtnl in both of those cases, what is > > the best way to proceed: > > - send a v4, or > > - wait for this to get merged (since I got the notification it was > > pulled into intel-next) and send a fixes ? > > > > Intel-next uses a stacked set of patches which we then send in batches > via PRs as they pass our internal testing. > > We can drop the v3 and await v4. OK, thanks for the info. I will prepare, test locally, and send a v4 shortly. > > Here's the full analysis I came up with; I tried to be thorough, but > > it is certainly possible I missed a call site: > > > > For the up case: > > > > - igc_up: > > - called from igc_reinit_locked, which is called via: > > - igc_reset_task (rtnl is held) > > - igc_set_features (ndo_set_features, which itself has an ASSERT_RTNL) > > - various places in igc_ethtool (set_priv_flags, nway_reset, > > ethtool_set_eee) all of which have RTNL held > > - igc_change_mtu which also has RTNL held > > - __igc_open > > - called from igc_resume, which may need an rtnl_lock ? > > - igc_open > > - called from igc_io_resume, rtnl is held > > - called from igc_reinit_queues, only via ethool set_channels, > > where rtnl is held > > - ndo_open where rtnl is held > > > > For the down case: > > > > - igc_down: > > - called from various ethtool locations (set_ringparam, > > set_pauseparam, set_link_ksettings) all of which hold rtnl > > - called from igc_io_error_detected, which may need an rtnl_lock > > - igc_reinit_locked which is fine, as described above > > - igc_change_mtu which is fine, as described above > > - called from __igc_close > > - called from __igc_shutdown which holds rtnl > > - called from igc_reinit_queues which is fine as described above > > - called from igc_close which is ndo_close > > This analysis looks complete to me. Thanks; I'd appreciate your comments on the e1000 RFC I posted, if you have a moment. I'm going to update that thread with more data now that I have analysed igc as there are some parallels: https://lore.kernel.org/netdev/20241022172153.217890-1-jdamato@fastly.com/
Joe Damato <jdamato@fastly.com> writes: > Link queues to NAPI instances via netdev-genl API so that users can > query this information with netlink. Handle a few cases in the driver: > 1. Link/unlink the NAPIs when XDP is enabled/disabled > 2. Handle IGC_FLAG_QUEUE_PAIRS enabled and disabled > > Example output when IGC_FLAG_QUEUE_PAIRS is enabled: > > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \ > --dump queue-get --json='{"ifindex": 2}' > > [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'}, > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'}, > {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'rx'}, > {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'rx'}, > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'}, > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'tx'}, > {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'}, > {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}] > > Since IGC_FLAG_QUEUE_PAIRS is enabled, you'll note that the same NAPI ID > is present for both rx and tx queues at the same index, for example > index 0: > > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'}, > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'}, > > To test IGC_FLAG_QUEUE_PAIRS disabled, a test system was booted using > the grub command line option "maxcpus=2" to force > igc_set_interrupt_capability to disable IGC_FLAG_QUEUE_PAIRS. > > Example output when IGC_FLAG_QUEUE_PAIRS is disabled: > > $ lscpu | grep "On-line CPU" > On-line CPU(s) list: 0,2 > > $ ethtool -l enp86s0 | tail -5 > Current hardware settings: > RX: n/a > TX: n/a > Other: 1 > Combined: 2 > > $ cat /proc/interrupts | grep enp > 144: [...] enp86s0 > 145: [...] enp86s0-rx-0 > 146: [...] enp86s0-rx-1 > 147: [...] enp86s0-tx-0 > 148: [...] enp86s0-tx-1 > > 1 "other" IRQ, and 2 IRQs for each of RX and Tx, so we expect netlink to > report 4 IRQs with unique NAPI IDs: > > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \ > --dump napi-get --json='{"ifindex": 2}' > [{'id': 8196, 'ifindex': 2, 'irq': 148}, > {'id': 8195, 'ifindex': 2, 'irq': 147}, > {'id': 8194, 'ifindex': 2, 'irq': 146}, > {'id': 8193, 'ifindex': 2, 'irq': 145}] > > Now we examine which queues these NAPIs are associated with, expecting > that since IGC_FLAG_QUEUE_PAIRS is disabled each RX and TX queue will > have its own NAPI instance: > > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \ > --dump queue-get --json='{"ifindex": 2}' > [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'}, > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'}, > {'id': 0, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'}, > {'id': 1, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}] > > Signed-off-by: Joe Damato <jdamato@fastly.com> Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com> Cheers, -- Vinicius
© 2016 - 2024 Red Hat, Inc.