Link queues to NAPI instances via netdev-genl API so that users can
query this information with netlink. Handle a few cases in the driver:
1. Link/unlink the NAPIs when XDP is enabled/disabled
2. Handle IGC_FLAG_QUEUE_PAIRS enabled and disabled
Example output when IGC_FLAG_QUEUE_PAIRS is enabled:
$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
--dump queue-get --json='{"ifindex": 2}'
[{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'rx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'rx'},
{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'tx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}]
Since IGC_FLAG_QUEUE_PAIRS is enabled, you'll note that the same NAPI ID
is present for both rx and tx queues at the same index, for example
index 0:
{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'},
To test IGC_FLAG_QUEUE_PAIRS disabled, a test system was booted using
the grub command line option "maxcpus=2" to force
igc_set_interrupt_capability to disable IGC_FLAG_QUEUE_PAIRS.
Example output when IGC_FLAG_QUEUE_PAIRS is disabled:
$ lscpu | grep "On-line CPU"
On-line CPU(s) list: 0,2
$ ethtool -l enp86s0 | tail -5
Current hardware settings:
RX: n/a
TX: n/a
Other: 1
Combined: 2
$ cat /proc/interrupts | grep enp
144: [...] enp86s0
145: [...] enp86s0-rx-0
146: [...] enp86s0-rx-1
147: [...] enp86s0-tx-0
148: [...] enp86s0-tx-1
1 "other" IRQ, and 2 IRQs for each of RX and Tx, so we expect netlink to
report 4 IRQs with unique NAPI IDs:
$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
--dump napi-get --json='{"ifindex": 2}'
[{'id': 8196, 'ifindex': 2, 'irq': 148},
{'id': 8195, 'ifindex': 2, 'irq': 147},
{'id': 8194, 'ifindex': 2, 'irq': 146},
{'id': 8193, 'ifindex': 2, 'irq': 145}]
Now we examine which queues these NAPIs are associated with, expecting
that since IGC_FLAG_QUEUE_PAIRS is disabled each RX and TX queue will
have its own NAPI instance:
$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
--dump queue-get --json='{"ifindex": 2}'
[{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'},
{'id': 0, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}]
Signed-off-by: Joe Damato <jdamato@fastly.com>
Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
v4:
- Add rtnl_lock/rtnl_unlock in two paths: igc_resume and
igc_io_error_detected. The code added to the latter is inspired by
a similar implementation in ixgbe's ixgbe_io_error_detected.
v3:
- Replace igc_unset_queue_napi with igc_set_queue_napi(adapater, i,
NULL), as suggested by Vinicius Costa Gomes
- Simplify implemention of igc_set_queue_napi as suggested by Kurt
Kanzenbach, with a tweak to use ring->queue_index
v2:
- Update commit message to include tests for IGC_FLAG_QUEUE_PAIRS
disabled
- Refactored code to move napi queue mapping and unmapping to helper
functions igc_set_queue_napi and igc_unset_queue_napi
- Adjust the code to handle IGC_FLAG_QUEUE_PAIRS disabled
- Call helpers to map/unmap queues to NAPIs in igc_up, __igc_open,
igc_xdp_enable_pool, and igc_xdp_disable_pool
drivers/net/ethernet/intel/igc/igc.h | 2 ++
drivers/net/ethernet/intel/igc/igc_main.c | 41 ++++++++++++++++++++---
drivers/net/ethernet/intel/igc/igc_xdp.c | 2 ++
3 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index eac0f966e0e4..b8111ad9a9a8 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -337,6 +337,8 @@ struct igc_adapter {
struct igc_led_classdev *leds;
};
+void igc_set_queue_napi(struct igc_adapter *adapter, int q_idx,
+ struct napi_struct *napi);
void igc_up(struct igc_adapter *adapter);
void igc_down(struct igc_adapter *adapter);
int igc_open(struct net_device *netdev);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 7964bbedb16c..04aa216ef612 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4948,6 +4948,22 @@ static int igc_sw_init(struct igc_adapter *adapter)
return 0;
}
+void igc_set_queue_napi(struct igc_adapter *adapter, int vector,
+ struct napi_struct *napi)
+{
+ struct igc_q_vector *q_vector = adapter->q_vector[vector];
+
+ if (q_vector->rx.ring)
+ netif_queue_set_napi(adapter->netdev,
+ q_vector->rx.ring->queue_index,
+ NETDEV_QUEUE_TYPE_RX, napi);
+
+ if (q_vector->tx.ring)
+ netif_queue_set_napi(adapter->netdev,
+ q_vector->tx.ring->queue_index,
+ NETDEV_QUEUE_TYPE_TX, napi);
+}
+
/**
* igc_up - Open the interface and prepare it to handle traffic
* @adapter: board private structure
@@ -4955,6 +4971,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
void igc_up(struct igc_adapter *adapter)
{
struct igc_hw *hw = &adapter->hw;
+ struct napi_struct *napi;
int i = 0;
/* hardware has been reset, we need to reload some things */
@@ -4962,8 +4979,11 @@ void igc_up(struct igc_adapter *adapter)
clear_bit(__IGC_DOWN, &adapter->state);
- for (i = 0; i < adapter->num_q_vectors; i++)
- napi_enable(&adapter->q_vector[i]->napi);
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ napi = &adapter->q_vector[i]->napi;
+ napi_enable(napi);
+ igc_set_queue_napi(adapter, i, napi);
+ }
if (adapter->msix_entries)
igc_configure_msix(adapter);
@@ -5192,6 +5212,7 @@ void igc_down(struct igc_adapter *adapter)
for (i = 0; i < adapter->num_q_vectors; i++) {
if (adapter->q_vector[i]) {
napi_synchronize(&adapter->q_vector[i]->napi);
+ igc_set_queue_napi(adapter, i, NULL);
napi_disable(&adapter->q_vector[i]->napi);
}
}
@@ -6021,6 +6042,7 @@ static int __igc_open(struct net_device *netdev, bool resuming)
struct igc_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = adapter->pdev;
struct igc_hw *hw = &adapter->hw;
+ struct napi_struct *napi;
int err = 0;
int i = 0;
@@ -6056,8 +6078,11 @@ static int __igc_open(struct net_device *netdev, bool resuming)
clear_bit(__IGC_DOWN, &adapter->state);
- for (i = 0; i < adapter->num_q_vectors; i++)
- napi_enable(&adapter->q_vector[i]->napi);
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ napi = &adapter->q_vector[i]->napi;
+ napi_enable(napi);
+ igc_set_queue_napi(adapter, i, napi);
+ }
/* Clear any pending interrupts. */
rd32(IGC_ICR);
@@ -7385,7 +7410,9 @@ static int igc_resume(struct device *dev)
wr32(IGC_WUS, ~0);
if (netif_running(netdev)) {
+ rtnl_lock();
err = __igc_open(netdev, true);
+ rtnl_unlock();
if (!err)
netif_device_attach(netdev);
}
@@ -7440,14 +7467,18 @@ static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
struct net_device *netdev = pci_get_drvdata(pdev);
struct igc_adapter *adapter = netdev_priv(netdev);
+ rtnl_lock();
netif_device_detach(netdev);
- if (state == pci_channel_io_perm_failure)
+ if (state == pci_channel_io_perm_failure) {
+ rtnl_unlock();
return PCI_ERS_RESULT_DISCONNECT;
+ }
if (netif_running(netdev))
igc_down(adapter);
pci_disable_device(pdev);
+ rtnl_unlock();
/* Request a slot reset. */
return PCI_ERS_RESULT_NEED_RESET;
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c
index e27af72aada8..4da633430b80 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.c
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.c
@@ -84,6 +84,7 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter,
napi_disable(napi);
}
+ igc_set_queue_napi(adapter, queue_id, NULL);
set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
set_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
@@ -133,6 +134,7 @@ static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
+ igc_set_queue_napi(adapter, queue_id, napi);
if (needs_reset) {
napi_enable(napi);
--
2.25.1
On 10/23/2024 12:52 AM, Joe Damato wrote:
> Link queues to NAPI instances via netdev-genl API so that users can
> query this information with netlink. Handle a few cases in the driver:
> 1. Link/unlink the NAPIs when XDP is enabled/disabled
> 2. Handle IGC_FLAG_QUEUE_PAIRS enabled and disabled
>
> Example output when IGC_FLAG_QUEUE_PAIRS is enabled:
>
> $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
> --dump queue-get --json='{"ifindex": 2}'
>
> [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
> {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'},
> {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'rx'},
> {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'rx'},
> {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'},
> {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'tx'},
> {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'},
> {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}]
>
> Since IGC_FLAG_QUEUE_PAIRS is enabled, you'll note that the same NAPI ID
> is present for both rx and tx queues at the same index, for example
> index 0:
>
> {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
> {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'},
>
> To test IGC_FLAG_QUEUE_PAIRS disabled, a test system was booted using
> the grub command line option "maxcpus=2" to force
> igc_set_interrupt_capability to disable IGC_FLAG_QUEUE_PAIRS.
>
> Example output when IGC_FLAG_QUEUE_PAIRS is disabled:
>
> $ lscpu | grep "On-line CPU"
> On-line CPU(s) list: 0,2
>
> $ ethtool -l enp86s0 | tail -5
> Current hardware settings:
> RX: n/a
> TX: n/a
> Other: 1
> Combined: 2
>
> $ cat /proc/interrupts | grep enp
> 144: [...] enp86s0
> 145: [...] enp86s0-rx-0
> 146: [...] enp86s0-rx-1
> 147: [...] enp86s0-tx-0
> 148: [...] enp86s0-tx-1
>
> 1 "other" IRQ, and 2 IRQs for each of RX and Tx, so we expect netlink to
> report 4 IRQs with unique NAPI IDs:
>
> $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
> --dump napi-get --json='{"ifindex": 2}'
> [{'id': 8196, 'ifindex': 2, 'irq': 148},
> {'id': 8195, 'ifindex': 2, 'irq': 147},
> {'id': 8194, 'ifindex': 2, 'irq': 146},
> {'id': 8193, 'ifindex': 2, 'irq': 145}]
>
> Now we examine which queues these NAPIs are associated with, expecting
> that since IGC_FLAG_QUEUE_PAIRS is disabled each RX and TX queue will
> have its own NAPI instance:
>
> $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
> --dump queue-get --json='{"ifindex": 2}'
> [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
> {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'},
> {'id': 0, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'},
> {'id': 1, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}]
>
> Signed-off-by: Joe Damato <jdamato@fastly.com>
> Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
> ---
> v4:
> - Add rtnl_lock/rtnl_unlock in two paths: igc_resume and
> igc_io_error_detected. The code added to the latter is inspired by
> a similar implementation in ixgbe's ixgbe_io_error_detected.
>
> v3:
> - Replace igc_unset_queue_napi with igc_set_queue_napi(adapater, i,
> NULL), as suggested by Vinicius Costa Gomes
> - Simplify implemention of igc_set_queue_napi as suggested by Kurt
> Kanzenbach, with a tweak to use ring->queue_index
>
> v2:
> - Update commit message to include tests for IGC_FLAG_QUEUE_PAIRS
> disabled
> - Refactored code to move napi queue mapping and unmapping to helper
> functions igc_set_queue_napi and igc_unset_queue_napi
> - Adjust the code to handle IGC_FLAG_QUEUE_PAIRS disabled
> - Call helpers to map/unmap queues to NAPIs in igc_up, __igc_open,
> igc_xdp_enable_pool, and igc_xdp_disable_pool
>
> drivers/net/ethernet/intel/igc/igc.h | 2 ++
> drivers/net/ethernet/intel/igc/igc_main.c | 41 ++++++++++++++++++++---
> drivers/net/ethernet/intel/igc/igc_xdp.c | 2 ++
> 3 files changed, 40 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
> index eac0f966e0e4..b8111ad9a9a8 100644
> --- a/drivers/net/ethernet/intel/igc/igc.h
> +++ b/drivers/net/ethernet/intel/igc/igc.h
> @@ -337,6 +337,8 @@ struct igc_adapter {
> struct igc_led_classdev *leds;
> };
>
> +void igc_set_queue_napi(struct igc_adapter *adapter, int q_idx,
> + struct napi_struct *napi);
> void igc_up(struct igc_adapter *adapter);
> void igc_down(struct igc_adapter *adapter);
> int igc_open(struct net_device *netdev);
> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
> index 7964bbedb16c..04aa216ef612 100644
> --- a/drivers/net/ethernet/intel/igc/igc_main.c
> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> @@ -4948,6 +4948,22 @@ static int igc_sw_init(struct igc_adapter *adapter)
> return 0;
> }
>
> +void igc_set_queue_napi(struct igc_adapter *adapter, int vector,
> + struct napi_struct *napi)
> +{
> + struct igc_q_vector *q_vector = adapter->q_vector[vector];
> +
> + if (q_vector->rx.ring)
> + netif_queue_set_napi(adapter->netdev,
> + q_vector->rx.ring->queue_index,
> + NETDEV_QUEUE_TYPE_RX, napi);
> +
> + if (q_vector->tx.ring)
> + netif_queue_set_napi(adapter->netdev,
> + q_vector->tx.ring->queue_index,
> + NETDEV_QUEUE_TYPE_TX, napi);
> +}
> +
> /**
> * igc_up - Open the interface and prepare it to handle traffic
> * @adapter: board private structure
> @@ -4955,6 +4971,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
> void igc_up(struct igc_adapter *adapter)
> {
> struct igc_hw *hw = &adapter->hw;
> + struct napi_struct *napi;
> int i = 0;
>
> /* hardware has been reset, we need to reload some things */
> @@ -4962,8 +4979,11 @@ void igc_up(struct igc_adapter *adapter)
>
> clear_bit(__IGC_DOWN, &adapter->state);
>
> - for (i = 0; i < adapter->num_q_vectors; i++)
> - napi_enable(&adapter->q_vector[i]->napi);
> + for (i = 0; i < adapter->num_q_vectors; i++) {
> + napi = &adapter->q_vector[i]->napi;
> + napi_enable(napi);
> + igc_set_queue_napi(adapter, i, napi);
> + }
>
> if (adapter->msix_entries)
> igc_configure_msix(adapter);
> @@ -5192,6 +5212,7 @@ void igc_down(struct igc_adapter *adapter)
> for (i = 0; i < adapter->num_q_vectors; i++) {
> if (adapter->q_vector[i]) {
> napi_synchronize(&adapter->q_vector[i]->napi);
> + igc_set_queue_napi(adapter, i, NULL);
> napi_disable(&adapter->q_vector[i]->napi);
> }
> }
> @@ -6021,6 +6042,7 @@ static int __igc_open(struct net_device *netdev, bool resuming)
> struct igc_adapter *adapter = netdev_priv(netdev);
> struct pci_dev *pdev = adapter->pdev;
> struct igc_hw *hw = &adapter->hw;
> + struct napi_struct *napi;
> int err = 0;
> int i = 0;
>
> @@ -6056,8 +6078,11 @@ static int __igc_open(struct net_device *netdev, bool resuming)
>
> clear_bit(__IGC_DOWN, &adapter->state);
>
> - for (i = 0; i < adapter->num_q_vectors; i++)
> - napi_enable(&adapter->q_vector[i]->napi);
> + for (i = 0; i < adapter->num_q_vectors; i++) {
> + napi = &adapter->q_vector[i]->napi;
> + napi_enable(napi);
> + igc_set_queue_napi(adapter, i, napi);
> + }
>
> /* Clear any pending interrupts. */
> rd32(IGC_ICR);
> @@ -7385,7 +7410,9 @@ static int igc_resume(struct device *dev)
> wr32(IGC_WUS, ~0);
>
> if (netif_running(netdev)) {
> + rtnl_lock();
This change will bring back the deadlock issue that was fixed in commit:
6f31d6b: "igc: Refactor runtime power management flow".
> err = __igc_open(netdev, true);
> + rtnl_unlock();
> if (!err)
> netif_device_attach(netdev);
> }
> @@ -7440,14 +7467,18 @@ static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
> struct net_device *netdev = pci_get_drvdata(pdev);
> struct igc_adapter *adapter = netdev_priv(netdev);
>
> + rtnl_lock();
> netif_device_detach(netdev);
>
> - if (state == pci_channel_io_perm_failure)
> + if (state == pci_channel_io_perm_failure) {
> + rtnl_unlock();
> return PCI_ERS_RESULT_DISCONNECT;
> + }
>
> if (netif_running(netdev))
> igc_down(adapter);
> pci_disable_device(pdev);
> + rtnl_unlock();
>
> /* Request a slot reset. */
> return PCI_ERS_RESULT_NEED_RESET;
> diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c
> index e27af72aada8..4da633430b80 100644
> --- a/drivers/net/ethernet/intel/igc/igc_xdp.c
> +++ b/drivers/net/ethernet/intel/igc/igc_xdp.c
> @@ -84,6 +84,7 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter,
> napi_disable(napi);
> }
>
> + igc_set_queue_napi(adapter, queue_id, NULL);
> set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
> set_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
>
> @@ -133,6 +134,7 @@ static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
> xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
> clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
> clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
> + igc_set_queue_napi(adapter, queue_id, napi);
>
> if (needs_reset) {
> napi_enable(napi);
>
Hi Joe,
The current version will cause a regression, a possible deadlock, due to
the addition of the rtnl_lock in igc_resume that was fixed previously.
You can refer to the following link:
https://github.com/torvalds/linux/commit/6f31d6b643a32cc126cf86093fca1ea575948bf0#diff-d5b32b873e9902b496280a5f42c246043c8f0691d8b3a6bbd56df99ce8ceb394L7190
On Sun, Oct 27, 2024 at 11:49:33AM +0200, Lifshits, Vitaly wrote:
>
> On 10/23/2024 12:52 AM, Joe Damato wrote:
> > Link queues to NAPI instances via netdev-genl API so that users can
> > query this information with netlink. Handle a few cases in the driver:
> > 1. Link/unlink the NAPIs when XDP is enabled/disabled
> > 2. Handle IGC_FLAG_QUEUE_PAIRS enabled and disabled
> >
> > Example output when IGC_FLAG_QUEUE_PAIRS is enabled:
> >
> > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
> > --dump queue-get --json='{"ifindex": 2}'
> >
> > [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
> > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'},
> > {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'rx'},
> > {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'rx'},
> > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'},
> > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'tx'},
> > {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'},
> > {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}]
> >
> > Since IGC_FLAG_QUEUE_PAIRS is enabled, you'll note that the same NAPI ID
> > is present for both rx and tx queues at the same index, for example
> > index 0:
> >
> > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
> > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'},
> >
> > To test IGC_FLAG_QUEUE_PAIRS disabled, a test system was booted using
> > the grub command line option "maxcpus=2" to force
> > igc_set_interrupt_capability to disable IGC_FLAG_QUEUE_PAIRS.
> >
> > Example output when IGC_FLAG_QUEUE_PAIRS is disabled:
> >
> > $ lscpu | grep "On-line CPU"
> > On-line CPU(s) list: 0,2
> >
> > $ ethtool -l enp86s0 | tail -5
> > Current hardware settings:
> > RX: n/a
> > TX: n/a
> > Other: 1
> > Combined: 2
> >
> > $ cat /proc/interrupts | grep enp
> > 144: [...] enp86s0
> > 145: [...] enp86s0-rx-0
> > 146: [...] enp86s0-rx-1
> > 147: [...] enp86s0-tx-0
> > 148: [...] enp86s0-tx-1
> >
> > 1 "other" IRQ, and 2 IRQs for each of RX and Tx, so we expect netlink to
> > report 4 IRQs with unique NAPI IDs:
> >
> > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
> > --dump napi-get --json='{"ifindex": 2}'
> > [{'id': 8196, 'ifindex': 2, 'irq': 148},
> > {'id': 8195, 'ifindex': 2, 'irq': 147},
> > {'id': 8194, 'ifindex': 2, 'irq': 146},
> > {'id': 8193, 'ifindex': 2, 'irq': 145}]
> >
> > Now we examine which queues these NAPIs are associated with, expecting
> > that since IGC_FLAG_QUEUE_PAIRS is disabled each RX and TX queue will
> > have its own NAPI instance:
> >
> > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
> > --dump queue-get --json='{"ifindex": 2}'
> > [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
> > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'},
> > {'id': 0, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'},
> > {'id': 1, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}]
> >
> > Signed-off-by: Joe Damato <jdamato@fastly.com>
> > Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
> > ---
> > v4:
> > - Add rtnl_lock/rtnl_unlock in two paths: igc_resume and
> > igc_io_error_detected. The code added to the latter is inspired by
> > a similar implementation in ixgbe's ixgbe_io_error_detected.
> >
> > v3:
> > - Replace igc_unset_queue_napi with igc_set_queue_napi(adapater, i,
> > NULL), as suggested by Vinicius Costa Gomes
> > - Simplify implemention of igc_set_queue_napi as suggested by Kurt
> > Kanzenbach, with a tweak to use ring->queue_index
> >
> > v2:
> > - Update commit message to include tests for IGC_FLAG_QUEUE_PAIRS
> > disabled
> > - Refactored code to move napi queue mapping and unmapping to helper
> > functions igc_set_queue_napi and igc_unset_queue_napi
> > - Adjust the code to handle IGC_FLAG_QUEUE_PAIRS disabled
> > - Call helpers to map/unmap queues to NAPIs in igc_up, __igc_open,
> > igc_xdp_enable_pool, and igc_xdp_disable_pool
> >
> > drivers/net/ethernet/intel/igc/igc.h | 2 ++
> > drivers/net/ethernet/intel/igc/igc_main.c | 41 ++++++++++++++++++++---
> > drivers/net/ethernet/intel/igc/igc_xdp.c | 2 ++
> > 3 files changed, 40 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
> > index eac0f966e0e4..b8111ad9a9a8 100644
> > --- a/drivers/net/ethernet/intel/igc/igc.h
> > +++ b/drivers/net/ethernet/intel/igc/igc.h
> > @@ -337,6 +337,8 @@ struct igc_adapter {
> > struct igc_led_classdev *leds;
> > };
> > +void igc_set_queue_napi(struct igc_adapter *adapter, int q_idx,
> > + struct napi_struct *napi);
> > void igc_up(struct igc_adapter *adapter);
> > void igc_down(struct igc_adapter *adapter);
> > int igc_open(struct net_device *netdev);
> > diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
> > index 7964bbedb16c..04aa216ef612 100644
> > --- a/drivers/net/ethernet/intel/igc/igc_main.c
> > +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> > @@ -4948,6 +4948,22 @@ static int igc_sw_init(struct igc_adapter *adapter)
> > return 0;
> > }
> > +void igc_set_queue_napi(struct igc_adapter *adapter, int vector,
> > + struct napi_struct *napi)
> > +{
> > + struct igc_q_vector *q_vector = adapter->q_vector[vector];
> > +
> > + if (q_vector->rx.ring)
> > + netif_queue_set_napi(adapter->netdev,
> > + q_vector->rx.ring->queue_index,
> > + NETDEV_QUEUE_TYPE_RX, napi);
> > +
> > + if (q_vector->tx.ring)
> > + netif_queue_set_napi(adapter->netdev,
> > + q_vector->tx.ring->queue_index,
> > + NETDEV_QUEUE_TYPE_TX, napi);
> > +}
> > +
> > /**
> > * igc_up - Open the interface and prepare it to handle traffic
> > * @adapter: board private structure
> > @@ -4955,6 +4971,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
> > void igc_up(struct igc_adapter *adapter)
> > {
> > struct igc_hw *hw = &adapter->hw;
> > + struct napi_struct *napi;
> > int i = 0;
> > /* hardware has been reset, we need to reload some things */
> > @@ -4962,8 +4979,11 @@ void igc_up(struct igc_adapter *adapter)
> > clear_bit(__IGC_DOWN, &adapter->state);
> > - for (i = 0; i < adapter->num_q_vectors; i++)
> > - napi_enable(&adapter->q_vector[i]->napi);
> > + for (i = 0; i < adapter->num_q_vectors; i++) {
> > + napi = &adapter->q_vector[i]->napi;
> > + napi_enable(napi);
> > + igc_set_queue_napi(adapter, i, napi);
> > + }
> > if (adapter->msix_entries)
> > igc_configure_msix(adapter);
> > @@ -5192,6 +5212,7 @@ void igc_down(struct igc_adapter *adapter)
> > for (i = 0; i < adapter->num_q_vectors; i++) {
> > if (adapter->q_vector[i]) {
> > napi_synchronize(&adapter->q_vector[i]->napi);
> > + igc_set_queue_napi(adapter, i, NULL);
> > napi_disable(&adapter->q_vector[i]->napi);
> > }
> > }
> > @@ -6021,6 +6042,7 @@ static int __igc_open(struct net_device *netdev, bool resuming)
> > struct igc_adapter *adapter = netdev_priv(netdev);
> > struct pci_dev *pdev = adapter->pdev;
> > struct igc_hw *hw = &adapter->hw;
> > + struct napi_struct *napi;
> > int err = 0;
> > int i = 0;
> > @@ -6056,8 +6078,11 @@ static int __igc_open(struct net_device *netdev, bool resuming)
> > clear_bit(__IGC_DOWN, &adapter->state);
> > - for (i = 0; i < adapter->num_q_vectors; i++)
> > - napi_enable(&adapter->q_vector[i]->napi);
> > + for (i = 0; i < adapter->num_q_vectors; i++) {
> > + napi = &adapter->q_vector[i]->napi;
> > + napi_enable(napi);
> > + igc_set_queue_napi(adapter, i, napi);
> > + }
> > /* Clear any pending interrupts. */
> > rd32(IGC_ICR);
> > @@ -7385,7 +7410,9 @@ static int igc_resume(struct device *dev)
> > wr32(IGC_WUS, ~0);
> > if (netif_running(netdev)) {
> > + rtnl_lock();
>
> This change will bring back the deadlock issue that was fixed in commit:
> 6f31d6b: "igc: Refactor runtime power management flow".
OK, thanks for letting me know.
I think I better understand what the issue is. It seems that:
- igc_resume can be called with rtnl held via ethtool (which I
didn't know), which calls __igc_open
- __igc_open re-enables NAPIs and re-links queues to NAPI IDs (which
requires rtnl)
so, it seems like the rtnl_lock() I've added to igc_resume is
unnecessary.
I suppose I don't know all of the paths where the pm functions can
be called -- are there others where RTNL is _not_ already held?
I looked at e1000e and it seems that driver does not re-enable NAPIs
in its resume path and thus does not suffer from the same issue as
igc.
So my questions are:
1. Are there are other contexts where igc_resume is called where
RTNL is not held?
2. If the answer is that RTNL is always held when igc_resume is
called, then I can send a v5 that removes the
rtnl_lock/rtnl_unlock. What do you think?
[...]
>
> Hi Joe,
>
>
> The current version will cause a regression, a possible deadlock, due to the
> addition of the rtnl_lock in igc_resume that was fixed previously.
>
> You can refer to the following link:
>
> https://github.com/torvalds/linux/commit/6f31d6b643a32cc126cf86093fca1ea575948bf0#diff-d5b32b873e9902b496280a5f42c246043c8f0691d8b3a6bbd56df99ce8ceb394L7190
Thanks for the link.
On Mon, Oct 28, 2024 at 08:50:38AM -0700, Joe Damato wrote:
> On Sun, Oct 27, 2024 at 11:49:33AM +0200, Lifshits, Vitaly wrote:
> >
> > On 10/23/2024 12:52 AM, Joe Damato wrote:
> > > Link queues to NAPI instances via netdev-genl API so that users can
> > > query this information with netlink. Handle a few cases in the driver:
> > > 1. Link/unlink the NAPIs when XDP is enabled/disabled
> > > 2. Handle IGC_FLAG_QUEUE_PAIRS enabled and disabled
> > >
> > > Example output when IGC_FLAG_QUEUE_PAIRS is enabled:
> > >
> > > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
> > > --dump queue-get --json='{"ifindex": 2}'
> > >
> > > [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
> > > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'},
> > > {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'rx'},
> > > {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'rx'},
> > > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'},
> > > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'tx'},
> > > {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'},
> > > {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}]
> > >
> > > Since IGC_FLAG_QUEUE_PAIRS is enabled, you'll note that the same NAPI ID
> > > is present for both rx and tx queues at the same index, for example
> > > index 0:
> > >
> > > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
> > > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'},
> > >
> > > To test IGC_FLAG_QUEUE_PAIRS disabled, a test system was booted using
> > > the grub command line option "maxcpus=2" to force
> > > igc_set_interrupt_capability to disable IGC_FLAG_QUEUE_PAIRS.
> > >
> > > Example output when IGC_FLAG_QUEUE_PAIRS is disabled:
> > >
> > > $ lscpu | grep "On-line CPU"
> > > On-line CPU(s) list: 0,2
> > >
> > > $ ethtool -l enp86s0 | tail -5
> > > Current hardware settings:
> > > RX: n/a
> > > TX: n/a
> > > Other: 1
> > > Combined: 2
> > >
> > > $ cat /proc/interrupts | grep enp
> > > 144: [...] enp86s0
> > > 145: [...] enp86s0-rx-0
> > > 146: [...] enp86s0-rx-1
> > > 147: [...] enp86s0-tx-0
> > > 148: [...] enp86s0-tx-1
> > >
> > > 1 "other" IRQ, and 2 IRQs for each of RX and Tx, so we expect netlink to
> > > report 4 IRQs with unique NAPI IDs:
> > >
> > > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
> > > --dump napi-get --json='{"ifindex": 2}'
> > > [{'id': 8196, 'ifindex': 2, 'irq': 148},
> > > {'id': 8195, 'ifindex': 2, 'irq': 147},
> > > {'id': 8194, 'ifindex': 2, 'irq': 146},
> > > {'id': 8193, 'ifindex': 2, 'irq': 145}]
> > >
> > > Now we examine which queues these NAPIs are associated with, expecting
> > > that since IGC_FLAG_QUEUE_PAIRS is disabled each RX and TX queue will
> > > have its own NAPI instance:
> > >
> > > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
> > > --dump queue-get --json='{"ifindex": 2}'
> > > [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
> > > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'},
> > > {'id': 0, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'},
> > > {'id': 1, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}]
> > >
> > > Signed-off-by: Joe Damato <jdamato@fastly.com>
> > > Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
> > > ---
> > > v4:
> > > - Add rtnl_lock/rtnl_unlock in two paths: igc_resume and
> > > igc_io_error_detected. The code added to the latter is inspired by
> > > a similar implementation in ixgbe's ixgbe_io_error_detected.
> > >
> > > v3:
> > > - Replace igc_unset_queue_napi with igc_set_queue_napi(adapater, i,
> > > NULL), as suggested by Vinicius Costa Gomes
> > > - Simplify implemention of igc_set_queue_napi as suggested by Kurt
> > > Kanzenbach, with a tweak to use ring->queue_index
> > >
> > > v2:
> > > - Update commit message to include tests for IGC_FLAG_QUEUE_PAIRS
> > > disabled
> > > - Refactored code to move napi queue mapping and unmapping to helper
> > > functions igc_set_queue_napi and igc_unset_queue_napi
> > > - Adjust the code to handle IGC_FLAG_QUEUE_PAIRS disabled
> > > - Call helpers to map/unmap queues to NAPIs in igc_up, __igc_open,
> > > igc_xdp_enable_pool, and igc_xdp_disable_pool
> > >
> > > drivers/net/ethernet/intel/igc/igc.h | 2 ++
> > > drivers/net/ethernet/intel/igc/igc_main.c | 41 ++++++++++++++++++++---
> > > drivers/net/ethernet/intel/igc/igc_xdp.c | 2 ++
> > > 3 files changed, 40 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
> > > index eac0f966e0e4..b8111ad9a9a8 100644
> > > --- a/drivers/net/ethernet/intel/igc/igc.h
> > > +++ b/drivers/net/ethernet/intel/igc/igc.h
> > > @@ -337,6 +337,8 @@ struct igc_adapter {
> > > struct igc_led_classdev *leds;
> > > };
> > > +void igc_set_queue_napi(struct igc_adapter *adapter, int q_idx,
> > > + struct napi_struct *napi);
> > > void igc_up(struct igc_adapter *adapter);
> > > void igc_down(struct igc_adapter *adapter);
> > > int igc_open(struct net_device *netdev);
> > > diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
> > > index 7964bbedb16c..04aa216ef612 100644
> > > --- a/drivers/net/ethernet/intel/igc/igc_main.c
> > > +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> > > @@ -4948,6 +4948,22 @@ static int igc_sw_init(struct igc_adapter *adapter)
> > > return 0;
> > > }
> > > +void igc_set_queue_napi(struct igc_adapter *adapter, int vector,
> > > + struct napi_struct *napi)
> > > +{
> > > + struct igc_q_vector *q_vector = adapter->q_vector[vector];
> > > +
> > > + if (q_vector->rx.ring)
> > > + netif_queue_set_napi(adapter->netdev,
> > > + q_vector->rx.ring->queue_index,
> > > + NETDEV_QUEUE_TYPE_RX, napi);
> > > +
> > > + if (q_vector->tx.ring)
> > > + netif_queue_set_napi(adapter->netdev,
> > > + q_vector->tx.ring->queue_index,
> > > + NETDEV_QUEUE_TYPE_TX, napi);
> > > +}
> > > +
> > > /**
> > > * igc_up - Open the interface and prepare it to handle traffic
> > > * @adapter: board private structure
> > > @@ -4955,6 +4971,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
> > > void igc_up(struct igc_adapter *adapter)
> > > {
> > > struct igc_hw *hw = &adapter->hw;
> > > + struct napi_struct *napi;
> > > int i = 0;
> > > /* hardware has been reset, we need to reload some things */
> > > @@ -4962,8 +4979,11 @@ void igc_up(struct igc_adapter *adapter)
> > > clear_bit(__IGC_DOWN, &adapter->state);
> > > - for (i = 0; i < adapter->num_q_vectors; i++)
> > > - napi_enable(&adapter->q_vector[i]->napi);
> > > + for (i = 0; i < adapter->num_q_vectors; i++) {
> > > + napi = &adapter->q_vector[i]->napi;
> > > + napi_enable(napi);
> > > + igc_set_queue_napi(adapter, i, napi);
> > > + }
> > > if (adapter->msix_entries)
> > > igc_configure_msix(adapter);
> > > @@ -5192,6 +5212,7 @@ void igc_down(struct igc_adapter *adapter)
> > > for (i = 0; i < adapter->num_q_vectors; i++) {
> > > if (adapter->q_vector[i]) {
> > > napi_synchronize(&adapter->q_vector[i]->napi);
> > > + igc_set_queue_napi(adapter, i, NULL);
> > > napi_disable(&adapter->q_vector[i]->napi);
> > > }
> > > }
> > > @@ -6021,6 +6042,7 @@ static int __igc_open(struct net_device *netdev, bool resuming)
> > > struct igc_adapter *adapter = netdev_priv(netdev);
> > > struct pci_dev *pdev = adapter->pdev;
> > > struct igc_hw *hw = &adapter->hw;
> > > + struct napi_struct *napi;
> > > int err = 0;
> > > int i = 0;
> > > @@ -6056,8 +6078,11 @@ static int __igc_open(struct net_device *netdev, bool resuming)
> > > clear_bit(__IGC_DOWN, &adapter->state);
> > > - for (i = 0; i < adapter->num_q_vectors; i++)
> > > - napi_enable(&adapter->q_vector[i]->napi);
> > > + for (i = 0; i < adapter->num_q_vectors; i++) {
> > > + napi = &adapter->q_vector[i]->napi;
> > > + napi_enable(napi);
> > > + igc_set_queue_napi(adapter, i, napi);
> > > + }
> > > /* Clear any pending interrupts. */
> > > rd32(IGC_ICR);
> > > @@ -7385,7 +7410,9 @@ static int igc_resume(struct device *dev)
> > > wr32(IGC_WUS, ~0);
> > > if (netif_running(netdev)) {
> > > + rtnl_lock();
> >
> > This change will bring back the deadlock issue that was fixed in commit:
> > 6f31d6b: "igc: Refactor runtime power management flow".
>
> OK, thanks for letting me know.
>
> I think I better understand what the issue is. It seems that:
>
> - igc_resume can be called with rtnl held via ethtool (which I
> didn't know), which calls __igc_open
> - __igc_open re-enables NAPIs and re-links queues to NAPI IDs (which
> requires rtnl)
>
> so, it seems like the rtnl_lock() I've added to igc_resume is
> unnecessary.
>
> I suppose I don't know all of the paths where the pm functions can
> be called -- are there others where RTNL is _not_ already held?
>
> I looked at e1000e and it seems that driver does not re-enable NAPIs
> in its resume path and thus does not suffer from the same issue as
> igc.
>
> So my questions are:
>
> 1. Are there are other contexts where igc_resume is called where
> RTNL is not held?
>
> 2. If the answer is that RTNL is always held when igc_resume is
> called, then I can send a v5 that removes the
> rtnl_lock/rtnl_unlock. What do you think?
I see, so it looks like there is:
- resume
- runtime_resume
The bug I am reintroducing is runtime_resume already holding RTNL
before my added call to rtnl_lock.
OK.
Does resume also hold rtnl before the driver's igc_resume is called?
I am asking because I don't know much about how PM works.
If resume does not hold RTNL (but runtime resume does, as the bug
you pointed out shows), it seems like a wrapper can be added to tell
the code whether rtnl should be held or not based on which resume is
happening.
Does anyone know if: resume (not runtime_resume) already holds RTNL?
I'll try to take a look and see, but I am not very familiar with PM.
On 10/28/2024 9:00 AM, Joe Damato wrote: > > I see, so it looks like there is: > - resume > - runtime_resume > > The bug I am reintroducing is runtime_resume already holding RTNL > before my added call to rtnl_lock. > > OK. > > Does resume also hold rtnl before the driver's igc_resume is called? > I am asking because I don't know much about how PM works. > > If resume does not hold RTNL (but runtime resume does, as the bug > you pointed out shows), it seems like a wrapper can be added to tell > the code whether rtnl should be held or not based on which resume is > happening. > > Does anyone know if: resume (not runtime_resume) already holds RTNL? > I'll try to take a look and see, but I am not very familiar with PM. I believe the resume doesn't hold RTNL, as its part of the core device code, which is not networking specific. It shouldn't be acquiring RTNL since that is a network specific lock. I believe the code you posted as v5 should resolve this, and makes sense to me. Thanks for digging into this :) -Jake
On Mon, Oct 28, 2024 at 11:53:55AM -0700, Jacob Keller wrote: > > > On 10/28/2024 9:00 AM, Joe Damato wrote: > > > > I see, so it looks like there is: > > - resume > > - runtime_resume > > > > The bug I am reintroducing is runtime_resume already holding RTNL > > before my added call to rtnl_lock. > > > > OK. > > > > Does resume also hold rtnl before the driver's igc_resume is called? > > I am asking because I don't know much about how PM works. > > > > If resume does not hold RTNL (but runtime resume does, as the bug > > you pointed out shows), it seems like a wrapper can be added to tell > > the code whether rtnl should be held or not based on which resume is > > happening. > > > > Does anyone know if: resume (not runtime_resume) already holds RTNL? > > I'll try to take a look and see, but I am not very familiar with PM. > > I believe the resume doesn't hold RTNL, as its part of the core device > code, which is not networking specific. It shouldn't be acquiring RTNL > since that is a network specific lock. > > I believe the code you posted as v5 should resolve this, and makes sense > to me. > > Thanks for digging into this :) No problem; sorry for all the back and forth on this one and I really appreciate your patience and reviews. Thanks, Joe
On Mon, Oct 28, 2024 at 09:00:06AM -0700, Joe Damato wrote:
> On Mon, Oct 28, 2024 at 08:50:38AM -0700, Joe Damato wrote:
> > On Sun, Oct 27, 2024 at 11:49:33AM +0200, Lifshits, Vitaly wrote:
> > >
> > > On 10/23/2024 12:52 AM, Joe Damato wrote:
> > > > Link queues to NAPI instances via netdev-genl API so that users can
> > > > query this information with netlink. Handle a few cases in the driver:
> > > > 1. Link/unlink the NAPIs when XDP is enabled/disabled
> > > > 2. Handle IGC_FLAG_QUEUE_PAIRS enabled and disabled
> > > >
> > > > Example output when IGC_FLAG_QUEUE_PAIRS is enabled:
> > > >
> > > > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
> > > > --dump queue-get --json='{"ifindex": 2}'
> > > >
> > > > [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
> > > > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'},
> > > > {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'rx'},
> > > > {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'rx'},
> > > > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'},
> > > > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'tx'},
> > > > {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'},
> > > > {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}]
> > > >
> > > > Since IGC_FLAG_QUEUE_PAIRS is enabled, you'll note that the same NAPI ID
> > > > is present for both rx and tx queues at the same index, for example
> > > > index 0:
> > > >
> > > > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
> > > > {'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'tx'},
> > > >
> > > > To test IGC_FLAG_QUEUE_PAIRS disabled, a test system was booted using
> > > > the grub command line option "maxcpus=2" to force
> > > > igc_set_interrupt_capability to disable IGC_FLAG_QUEUE_PAIRS.
> > > >
> > > > Example output when IGC_FLAG_QUEUE_PAIRS is disabled:
> > > >
> > > > $ lscpu | grep "On-line CPU"
> > > > On-line CPU(s) list: 0,2
> > > >
> > > > $ ethtool -l enp86s0 | tail -5
> > > > Current hardware settings:
> > > > RX: n/a
> > > > TX: n/a
> > > > Other: 1
> > > > Combined: 2
> > > >
> > > > $ cat /proc/interrupts | grep enp
> > > > 144: [...] enp86s0
> > > > 145: [...] enp86s0-rx-0
> > > > 146: [...] enp86s0-rx-1
> > > > 147: [...] enp86s0-tx-0
> > > > 148: [...] enp86s0-tx-1
> > > >
> > > > 1 "other" IRQ, and 2 IRQs for each of RX and Tx, so we expect netlink to
> > > > report 4 IRQs with unique NAPI IDs:
> > > >
> > > > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
> > > > --dump napi-get --json='{"ifindex": 2}'
> > > > [{'id': 8196, 'ifindex': 2, 'irq': 148},
> > > > {'id': 8195, 'ifindex': 2, 'irq': 147},
> > > > {'id': 8194, 'ifindex': 2, 'irq': 146},
> > > > {'id': 8193, 'ifindex': 2, 'irq': 145}]
> > > >
> > > > Now we examine which queues these NAPIs are associated with, expecting
> > > > that since IGC_FLAG_QUEUE_PAIRS is disabled each RX and TX queue will
> > > > have its own NAPI instance:
> > > >
> > > > $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \
> > > > --dump queue-get --json='{"ifindex": 2}'
> > > > [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'},
> > > > {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'},
> > > > {'id': 0, 'ifindex': 2, 'napi-id': 8195, 'type': 'tx'},
> > > > {'id': 1, 'ifindex': 2, 'napi-id': 8196, 'type': 'tx'}]
> > > >
> > > > Signed-off-by: Joe Damato <jdamato@fastly.com>
> > > > Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
> > > > ---
> > > > v4:
> > > > - Add rtnl_lock/rtnl_unlock in two paths: igc_resume and
> > > > igc_io_error_detected. The code added to the latter is inspired by
> > > > a similar implementation in ixgbe's ixgbe_io_error_detected.
> > > >
> > > > v3:
> > > > - Replace igc_unset_queue_napi with igc_set_queue_napi(adapater, i,
> > > > NULL), as suggested by Vinicius Costa Gomes
> > > > - Simplify implemention of igc_set_queue_napi as suggested by Kurt
> > > > Kanzenbach, with a tweak to use ring->queue_index
> > > >
> > > > v2:
> > > > - Update commit message to include tests for IGC_FLAG_QUEUE_PAIRS
> > > > disabled
> > > > - Refactored code to move napi queue mapping and unmapping to helper
> > > > functions igc_set_queue_napi and igc_unset_queue_napi
> > > > - Adjust the code to handle IGC_FLAG_QUEUE_PAIRS disabled
> > > > - Call helpers to map/unmap queues to NAPIs in igc_up, __igc_open,
> > > > igc_xdp_enable_pool, and igc_xdp_disable_pool
> > > >
> > > > drivers/net/ethernet/intel/igc/igc.h | 2 ++
> > > > drivers/net/ethernet/intel/igc/igc_main.c | 41 ++++++++++++++++++++---
> > > > drivers/net/ethernet/intel/igc/igc_xdp.c | 2 ++
> > > > 3 files changed, 40 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
> > > > index eac0f966e0e4..b8111ad9a9a8 100644
> > > > --- a/drivers/net/ethernet/intel/igc/igc.h
> > > > +++ b/drivers/net/ethernet/intel/igc/igc.h
> > > > @@ -337,6 +337,8 @@ struct igc_adapter {
> > > > struct igc_led_classdev *leds;
> > > > };
> > > > +void igc_set_queue_napi(struct igc_adapter *adapter, int q_idx,
> > > > + struct napi_struct *napi);
> > > > void igc_up(struct igc_adapter *adapter);
> > > > void igc_down(struct igc_adapter *adapter);
> > > > int igc_open(struct net_device *netdev);
> > > > diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
> > > > index 7964bbedb16c..04aa216ef612 100644
> > > > --- a/drivers/net/ethernet/intel/igc/igc_main.c
> > > > +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> > > > @@ -4948,6 +4948,22 @@ static int igc_sw_init(struct igc_adapter *adapter)
> > > > return 0;
> > > > }
> > > > +void igc_set_queue_napi(struct igc_adapter *adapter, int vector,
> > > > + struct napi_struct *napi)
> > > > +{
> > > > + struct igc_q_vector *q_vector = adapter->q_vector[vector];
> > > > +
> > > > + if (q_vector->rx.ring)
> > > > + netif_queue_set_napi(adapter->netdev,
> > > > + q_vector->rx.ring->queue_index,
> > > > + NETDEV_QUEUE_TYPE_RX, napi);
> > > > +
> > > > + if (q_vector->tx.ring)
> > > > + netif_queue_set_napi(adapter->netdev,
> > > > + q_vector->tx.ring->queue_index,
> > > > + NETDEV_QUEUE_TYPE_TX, napi);
> > > > +}
> > > > +
> > > > /**
> > > > * igc_up - Open the interface and prepare it to handle traffic
> > > > * @adapter: board private structure
> > > > @@ -4955,6 +4971,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
> > > > void igc_up(struct igc_adapter *adapter)
> > > > {
> > > > struct igc_hw *hw = &adapter->hw;
> > > > + struct napi_struct *napi;
> > > > int i = 0;
> > > > /* hardware has been reset, we need to reload some things */
> > > > @@ -4962,8 +4979,11 @@ void igc_up(struct igc_adapter *adapter)
> > > > clear_bit(__IGC_DOWN, &adapter->state);
> > > > - for (i = 0; i < adapter->num_q_vectors; i++)
> > > > - napi_enable(&adapter->q_vector[i]->napi);
> > > > + for (i = 0; i < adapter->num_q_vectors; i++) {
> > > > + napi = &adapter->q_vector[i]->napi;
> > > > + napi_enable(napi);
> > > > + igc_set_queue_napi(adapter, i, napi);
> > > > + }
> > > > if (adapter->msix_entries)
> > > > igc_configure_msix(adapter);
> > > > @@ -5192,6 +5212,7 @@ void igc_down(struct igc_adapter *adapter)
> > > > for (i = 0; i < adapter->num_q_vectors; i++) {
> > > > if (adapter->q_vector[i]) {
> > > > napi_synchronize(&adapter->q_vector[i]->napi);
> > > > + igc_set_queue_napi(adapter, i, NULL);
> > > > napi_disable(&adapter->q_vector[i]->napi);
> > > > }
> > > > }
> > > > @@ -6021,6 +6042,7 @@ static int __igc_open(struct net_device *netdev, bool resuming)
> > > > struct igc_adapter *adapter = netdev_priv(netdev);
> > > > struct pci_dev *pdev = adapter->pdev;
> > > > struct igc_hw *hw = &adapter->hw;
> > > > + struct napi_struct *napi;
> > > > int err = 0;
> > > > int i = 0;
> > > > @@ -6056,8 +6078,11 @@ static int __igc_open(struct net_device *netdev, bool resuming)
> > > > clear_bit(__IGC_DOWN, &adapter->state);
> > > > - for (i = 0; i < adapter->num_q_vectors; i++)
> > > > - napi_enable(&adapter->q_vector[i]->napi);
> > > > + for (i = 0; i < adapter->num_q_vectors; i++) {
> > > > + napi = &adapter->q_vector[i]->napi;
> > > > + napi_enable(napi);
> > > > + igc_set_queue_napi(adapter, i, napi);
> > > > + }
> > > > /* Clear any pending interrupts. */
> > > > rd32(IGC_ICR);
> > > > @@ -7385,7 +7410,9 @@ static int igc_resume(struct device *dev)
> > > > wr32(IGC_WUS, ~0);
> > > > if (netif_running(netdev)) {
> > > > + rtnl_lock();
> > >
> > > This change will bring back the deadlock issue that was fixed in commit:
> > > 6f31d6b: "igc: Refactor runtime power management flow".
> >
> > OK, thanks for letting me know.
> >
> > I think I better understand what the issue is. It seems that:
> >
> > - igc_resume can be called with rtnl held via ethtool (which I
> > didn't know), which calls __igc_open
> > - __igc_open re-enables NAPIs and re-links queues to NAPI IDs (which
> > requires rtnl)
> >
> > so, it seems like the rtnl_lock() I've added to igc_resume is
> > unnecessary.
> >
> > I suppose I don't know all of the paths where the pm functions can
> > be called -- are there others where RTNL is _not_ already held?
> >
> > I looked at e1000e and it seems that driver does not re-enable NAPIs
> > in its resume path and thus does not suffer from the same issue as
> > igc.
> >
> > So my questions are:
> >
> > 1. Are there are other contexts where igc_resume is called where
> > RTNL is not held?
> >
> > 2. If the answer is that RTNL is always held when igc_resume is
> > called, then I can send a v5 that removes the
> > rtnl_lock/rtnl_unlock. What do you think?
>
> I see, so it looks like there is:
> - resume
> - runtime_resume
>
> The bug I am reintroducing is runtime_resume already holding RTNL
> before my added call to rtnl_lock.
>
> OK.
>
> Does resume also hold rtnl before the driver's igc_resume is called?
> I am asking because I don't know much about how PM works.
>
> If resume does not hold RTNL (but runtime resume does, as the bug
> you pointed out shows), it seems like a wrapper can be added to tell
> the code whether rtnl should be held or not based on which resume is
> happening.
>
> Does anyone know if: resume (not runtime_resume) already holds RTNL?
> I'll try to take a look and see, but I am not very familiar with PM.
Well, I took a look and I'm probably wrong, but here's my
assessment:
- runtime_suspend can happen via ethtool or netlink when rtnl is
held, so rtnl_lock will deadlock as pointed out above
- suspend happens via device_suspend in kernel/power/main.c, so I
think taking rtnl is safe for "regular" suspend. Other drivers
(like bnxt) seem to take rtnl in their "regular" suspend
callbacks.
If the above assessment is correct, I think this change should fix
the issue Vitaly mentioned and I'll submit this as part of v5. It
adds a wrapper to tell igc_resume to either hold rtnl or not
depending on whether it's called from runtime_suspend or suspend.
I'll submit this as v5 shortly, and my apologies on my lack of
knowledge of PM; I am happy to perform any sort of testing on my igc
device you folks think would help verify that this is working
properly.
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 04aa216ef612..051a0cdb1143 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -7367,7 +7367,7 @@ static void igc_deliver_wake_packet(struct net_device *netdev)
netif_rx(skb);
}
-static int igc_resume(struct device *dev)
+static int __igc_do_resume(struct device *dev, bool need_rtnl)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct net_device *netdev = pci_get_drvdata(pdev);
@@ -7410,9 +7410,11 @@ static int igc_resume(struct device *dev)
wr32(IGC_WUS, ~0);
if (netif_running(netdev)) {
- rtnl_lock();
+ if (need_rtnl)
+ rtnl_lock();
err = __igc_open(netdev, true);
- rtnl_unlock();
+ if (need_rtnl)
+ rtnl_unlock();
if (!err)
netif_device_attach(netdev);
}
@@ -7420,9 +7422,14 @@ static int igc_resume(struct device *dev)
return err;
}
+static int igc_resume(struct device *dev)
+{
+ return __igc_do_resume(dev, true);
+}
+
static int igc_runtime_resume(struct device *dev)
{
- return igc_resume(dev);
+ return __igc_do_resume(dev, false);
}
static int igc_suspend(struct device *dev)
© 2016 - 2026 Red Hat, Inc.