The functions qcaspi_netdev_open/close are responsible of request &
free of the SPI interrupt, which wasn't the best choice. Currently
it's possible to trigger a double free of the interrupt by calling
qcaspi_netdev_close() after qcaspi_netdev_open() has failed.
So let us split IRQ allocation & enabling, so we can take advantage
of a device managed IRQ and also fix the issue.
Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000")
Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
---
drivers/net/ethernet/qualcomm/qca_spi.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index b11a998b2456..b2573eea8a7a 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -674,7 +674,6 @@ static int
qcaspi_netdev_open(struct net_device *dev)
{
struct qcaspi *qca = netdev_priv(dev);
- int ret = 0;
if (!qca)
return -EINVAL;
@@ -684,13 +683,7 @@ qcaspi_netdev_open(struct net_device *dev)
qca->sync = QCASPI_SYNC_UNKNOWN;
qcafrm_fsm_init_spi(&qca->frm_handle);
- ret = request_irq(qca->spi_dev->irq, qcaspi_intr_handler, 0,
- dev->name, qca);
- if (ret) {
- netdev_err(dev, "%s: unable to get IRQ %d (irqval=%d).\n",
- QCASPI_DRV_NAME, qca->spi_dev->irq, ret);
- return ret;
- }
+ enable_irq(qca->spi_dev->irq);
/* SPI thread takes care of TX queue */
kthread_unpark(qca->spi_thread);
@@ -707,7 +700,7 @@ qcaspi_netdev_close(struct net_device *dev)
netif_stop_queue(dev);
qcaspi_write_register(qca, SPI_REG_INTR_ENABLE, 0, wr_verify);
- free_irq(qca->spi_dev->irq, qca);
+ disable_irq(qca->spi_dev->irq);
kthread_park(qca->spi_thread);
qcaspi_flush_tx_ring(qca);
@@ -977,6 +970,15 @@ qca_spi_probe(struct spi_device *spi)
spi_set_drvdata(spi, qcaspi_devs);
+ ret = devm_request_irq(&spi->dev, spi->irq, qcaspi_intr_handler,
+ IRQF_NO_AUTOEN, qca->net_dev->name, qca);
+ if (ret) {
+ dev_err(&spi->dev, "Unable to get IRQ %d (irqval=%d).\n",
+ spi->irq, ret);
+ free_netdev(qcaspi_devs);
+ return ret;
+ }
+
ret = of_get_ethdev_address(spi->dev.of_node, qca->net_dev);
if (ret) {
eth_hw_addr_random(qca->net_dev);
--
2.34.1
On Tue, 2023-11-21 at 17:30 +0100, Stefan Wahren wrote:
> The functions qcaspi_netdev_open/close are responsible of request &
> free of the SPI interrupt, which wasn't the best choice. Currently
> it's possible to trigger a double free of the interrupt by calling
> qcaspi_netdev_close() after qcaspi_netdev_open() has failed.
> So let us split IRQ allocation & enabling, so we can take advantage
> of a device managed IRQ and also fix the issue.
>
> Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000")
> Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
The change makes sense, but the changelog is confusing.
qcaspi_netdev_close() and qcaspi_netdev_open() are invoked only via
ndo_open and ndo_close(), right? So qcaspi_netdev_close() will never be
invoked qcaspi_netdev_open(), failure - that is when IFF_UP is not set.
Cheers,
Paolo
Hi Paolo,
Am 23.11.23 um 12:37 schrieb Paolo Abeni:
> On Tue, 2023-11-21 at 17:30 +0100, Stefan Wahren wrote:
>> The functions qcaspi_netdev_open/close are responsible of request &
>> free of the SPI interrupt, which wasn't the best choice. Currently
>> it's possible to trigger a double free of the interrupt by calling
>> qcaspi_netdev_close() after qcaspi_netdev_open() has failed.
>> So let us split IRQ allocation & enabling, so we can take advantage
>> of a device managed IRQ and also fix the issue.
>>
>> Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000")
>> Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
> The change makes sense, but the changelog is confusing.
>
> qcaspi_netdev_close() and qcaspi_netdev_open() are invoked only via
> ndo_open and ndo_close(), right? So qcaspi_netdev_close() will never be
> invoked qcaspi_netdev_open(), failure - that is when IFF_UP is not set.
sorry, i missed to mention an important part. This issue is partly
connected to patch 3.
Please look at qcaspi_set_ringparam() which also call ndo_close() and
ndo_open(). If you only apply patch 3 you could trigger this issue by
running the following script, interrupt via Strg+C and start again:
#!/bin/bash
while [ true ]; do
ethtool -G eth1 tx 8
ethtool -g eth1
ethtool -G eth1 tx 10
done
[ 75.713471] qcaspi spi1.0 eth1: SPI thread exit
[ 75.721814] qcaspi spi1.0 eth1: SPI thread created
[ 76.795239] qcaspi spi1.0 eth1: SPI thread exit
[ 76.815801] qcaspi spi1.0 eth1: SPI thread created
[ 77.915872] qcaspi spi1.0 eth1: SPI thread exit
[ 77.933982] qcaspi spi1.0 eth1: SPI thread created
[ 79.036024] qcaspi spi1.0 eth1: SPI thread exit
[ 79.055595] qcaspi spi1.0 eth1: SPI thread created
[ 80.076223] qcaspi spi1.0 eth1: SPI thread exit
[ 80.097305] qcaspi spi1.0 eth1: SPI thread created
[ 81.196471] qcaspi spi1.0 eth1: SPI thread exit
[ 81.217351] qcaspi spi1.0 eth1: SPI thread created
[ 82.316592] qcaspi spi1.0 eth1: SPI thread exit
[ 82.336963] qcaspi spi1.0 eth1: SPI thread created
[ 83.436864] qcaspi spi1.0 eth1: SPI thread exit
[ 83.461252] qcaspi spi1.0 eth1: SPI thread created
[ 84.556950] qcaspi spi1.0 eth1: SPI thread exit
[ 84.575897] qcaspi spi1.0 eth1: SPI thread created
[ 85.677105] qcaspi spi1.0 eth1: SPI thread exit
[ 85.695061] qcaspi spi1.0 eth1: SPI thread created
[ 86.717215] qcaspi spi1.0 eth1: SPI thread exit
[ 86.739535] qcaspi spi1.0 eth1: SPI thread created
[ 87.837355] qcaspi spi1.0 eth1: SPI thread exit
<-- Strg + C
[ 87.841072] qcaspi spi1.0 eth1: qcaspi: unable to start kernel thread.
root@tarragon:/srv# ./test_ring_fast.sh
------------[ cut here ]------------
WARNING: CPU: 0 PID: 724 at kernel/irq/manage.c:1887 free_irq+0x23c/0x288
Trying to free already-free IRQ 73
CPU: 0 PID: 724 Comm: ethtool Not tainted
6.1.49-chargebyte-00029-g8c38d497af8a-dirty #108
Hardware name: Freescale i.MX6 Ultralite (Device Tree)
unwind_backtrace from show_stack+0x10/0x14
show_stack from dump_stack_lvl+0x24/0x2c
dump_stack_lvl from __warn+0x74/0xbc
__warn from warn_slowpath_fmt+0xc8/0x120
warn_slowpath_fmt from free_irq+0x23c/0x288
free_irq from qcaspi_netdev_close+0x38/0x5c
qcaspi_netdev_close from qcaspi_set_ringparam+0x48/0x90
qcaspi_set_ringparam from ethnl_set_rings+0x2dc/0x320
ethnl_set_rings from genl_rcv_msg+0x2c4/0x344
genl_rcv_msg from netlink_rcv_skb+0x98/0xfc
netlink_rcv_skb from genl_rcv+0x20/0x34
genl_rcv from netlink_unicast+0x114/0x1a4
netlink_unicast from netlink_sendmsg+0x314/0x340
netlink_sendmsg from sock_sendmsg_nosec+0x14/0x24
sock_sendmsg_nosec from __sys_sendto+0xc4/0xf8
__sys_sendto from ret_fast_syscall+0x0/0x54
Exception stack(0xe115dfa8 to 0xe115dff0)
dfa0: b6ed24dc 0000000c 00000003 005c4238 0000002c
00000000
dfc0: b6ed24dc 0000000c b6f6a5a0 00000122 00472e04 005c41f0 00436b60
005c4190
dfe0: 00000122 bec50b68 b6e5f841 b6dd1ae6
---[ end trace 0000000000000000 ]---
>
> Cheers,
>
> Paolo
>
On Fri, 2023-11-24 at 15:01 +0100, Stefan Wahren wrote:
> Hi Paolo,
>
> Am 23.11.23 um 12:37 schrieb Paolo Abeni:
> > On Tue, 2023-11-21 at 17:30 +0100, Stefan Wahren wrote:
> > > The functions qcaspi_netdev_open/close are responsible of request &
> > > free of the SPI interrupt, which wasn't the best choice. Currently
> > > it's possible to trigger a double free of the interrupt by calling
> > > qcaspi_netdev_close() after qcaspi_netdev_open() has failed.
> > > So let us split IRQ allocation & enabling, so we can take advantage
> > > of a device managed IRQ and also fix the issue.
> > >
> > > Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000")
> > > Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
> > The change makes sense, but the changelog is confusing.
> >
> > qcaspi_netdev_close() and qcaspi_netdev_open() are invoked only via
> > ndo_open and ndo_close(), right? So qcaspi_netdev_close() will never be
> > invoked qcaspi_netdev_open(), failure - that is when IFF_UP is not set.
> sorry, i missed to mention an important part. This issue is partly
> connected to patch 3.
> Please look at qcaspi_set_ringparam() which also call ndo_close() and
> ndo_open().
Ah, I see it now. IMHO root cause of the problem is there. The ethtool
op should not flip the device state.
A more narrow fix would be to park/unpark the thread inside
set_ringparam() - instead of the whole patch 1 && 2 I suspect.
IMHO the changes in this still make sense - a refactor for net-next.
Cheers,
Paolo
Hi Paolo,
Am 24.11.23 um 16:33 schrieb Paolo Abeni:
> On Fri, 2023-11-24 at 15:01 +0100, Stefan Wahren wrote:
>> Hi Paolo,
>>
>> Am 23.11.23 um 12:37 schrieb Paolo Abeni:
>>> On Tue, 2023-11-21 at 17:30 +0100, Stefan Wahren wrote:
>>>> The functions qcaspi_netdev_open/close are responsible of request &
>>>> free of the SPI interrupt, which wasn't the best choice. Currently
>>>> it's possible to trigger a double free of the interrupt by calling
>>>> qcaspi_netdev_close() after qcaspi_netdev_open() has failed.
>>>> So let us split IRQ allocation & enabling, so we can take advantage
>>>> of a device managed IRQ and also fix the issue.
>>>>
>>>> Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000")
>>>> Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
>>> The change makes sense, but the changelog is confusing.
>>>
>>> qcaspi_netdev_close() and qcaspi_netdev_open() are invoked only via
>>> ndo_open and ndo_close(), right? So qcaspi_netdev_close() will never be
>>> invoked qcaspi_netdev_open(), failure - that is when IFF_UP is not set.
>> sorry, i missed to mention an important part. This issue is partly
>> connected to patch 3.
>> Please look at qcaspi_set_ringparam() which also call ndo_close() and
>> ndo_open().
> Ah, I see it now. IMHO root cause of the problem is there. The ethtool
> op should not flip the device state.
>
> A more narrow fix would be to park/unpark the thread inside
> set_ringparam() - instead of the whole patch 1 && 2 I suspect.
before i send a complete new version of this series, could you please
have a look at this replacement for patch 1 & 2:
qca_debug: Prevent crash on TX ring changes
The qca_spi driver stop and restart the SPI kernel thread
(via ndo_stop & ndo_open) in case of TX ring changes. This is
a big issue because it allows userspace to prevent restart of
the SPI kernel thread (via signals). A subsequent change of
TX ring wrongly assume a valid spi_thread pointer which result
in a crash.
So prevent this by stopping the network queue and temporary park
the SPI thread. Because this could happen during transmission
we also need to call qcaspi_flush_tx_ring().
Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for
QCA7000")
Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
---
drivers/net/ethernet/qualcomm/qca_debug.c | 17 ++++++++++++-----
drivers/net/ethernet/qualcomm/qca_spi.c | 7 ++++++-
drivers/net/ethernet/qualcomm/qca_spi.h | 2 ++
3 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/qualcomm/qca_debug.c
b/drivers/net/ethernet/qualcomm/qca_debug.c
index f62c39544e08..478ab3ce949d 100644
--- a/drivers/net/ethernet/qualcomm/qca_debug.c
+++ b/drivers/net/ethernet/qualcomm/qca_debug.c
@@ -263,22 +263,29 @@ qcaspi_set_ringparam(struct net_device *dev,
struct ethtool_ringparam *ring,
struct kernel_ethtool_ringparam *kernel_ring,
struct netlink_ext_ack *extack)
{
- const struct net_device_ops *ops = dev->netdev_ops;
struct qcaspi *qca = netdev_priv(dev);
+ bool queue_active = !netif_queue_stopped(dev);
if ((ring->rx_pending) ||
(ring->rx_mini_pending) ||
(ring->rx_jumbo_pending))
return -EINVAL;
- if (netif_running(dev))
- ops->ndo_stop(dev);
+ if (queue_active)
+ netif_stop_queue(dev);
+ if (qca->spi_thread)
+ kthread_park(qca->spi_thread);
+
+ qcaspi_flush_tx_ring(qca);
qca->txr.count = max_t(u32, ring->tx_pending, TX_RING_MIN_LEN);
qca->txr.count = min_t(u16, qca->txr.count, TX_RING_MAX_LEN);
- if (netif_running(dev))
- ops->ndo_open(dev);
+ if (qca->spi_thread)
+ kthread_unpark(qca->spi_thread);
+
+ if (queue_active)
+ netif_wake_queue(dev);
return 0;
}
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c
b/drivers/net/ethernet/qualcomm/qca_spi.c
index d0578530dfbc..2ebe9834a1d3 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -468,7 +468,7 @@ qcaspi_tx_ring_has_space(struct tx_ring *txr)
* call from the qcaspi_spi_thread.
*/
-static void
+void
qcaspi_flush_tx_ring(struct qcaspi *qca)
{
int i;
@@ -581,6 +581,11 @@ qcaspi_spi_thread(void *data)
netdev_info(qca->net_dev, "SPI thread created\n");
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
+ if (kthread_should_park()) {
+ kthread_parkme();
+ continue;
+ }
+
if ((qca->intr_req == qca->intr_svc) &&
!qca->txr.skb[qca->txr.head])
schedule();
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h
b/drivers/net/ethernet/qualcomm/qca_spi.h
index 3067356106f0..95d7306e58e9 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.h
+++ b/drivers/net/ethernet/qualcomm/qca_spi.h
@@ -107,4 +107,6 @@ struct qcaspi {
u16 burst_len;
};
+void qcaspi_flush_tx_ring(struct qcaspi *qca);
+
#endif /* _QCA_SPI_H */
--
2.34.1
>
> IMHO the changes in this still make sense - a refactor for net-next.
>
> Cheers,
>
> Paolo
>
© 2016 - 2025 Red Hat, Inc.