The xgbe_powerdown() and xgbe_powerup() functions use spinlocks
(spin_lock_irqsave) while calling functions that may sleep:
- napi_disable() can sleep waiting for NAPI polling to complete
- flush_workqueue() can sleep waiting for pending work items
This causes a "BUG: scheduling while atomic" error during suspend/resume
cycles on systems using the AMD XGBE Ethernet controller.
The spinlock protection in these functions is unnecessary because:
1. The functions are called from suspend/resume paths which are already
serialized by the PM core
2. The caller parameter was used to differentiate contexts, but the
only current usage is from the driver context (suspend/resume)
3. The power_down flag provides sufficient synchronization
Fix this by:
- Removing the spinlock from xgbe_powerdown() and xgbe_powerup()
- Simplifying the function signatures by removing the unused caller
parameter
- Removing the unused XGMAC_DRIVER_CONTEXT and XGMAC_IOCTL_CONTEXT macros
- Reordering operations in xgbe_powerdown() to disable NAPI before
stopping TX/RX (matching the order used in xgbe_stop())
Fixes: c5aa9e3b8156 ("amd-xgbe: Initial AMD 10GbE platform driver")
Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
---
drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 57 +++++++------------
drivers/net/ethernet/amd/xgbe/xgbe-pci.c | 10 ++--
drivers/net/ethernet/amd/xgbe/xgbe-platform.c | 4 +-
drivers/net/ethernet/amd/xgbe/xgbe.h | 8 +--
4 files changed, 29 insertions(+), 50 deletions(-)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 62bb4b8a68e1..d57daf6306e1 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1116,79 +1116,62 @@ static int xgbe_phy_reset(struct xgbe_prv_data *pdata)
return pdata->phy_if.phy_reset(pdata);
}
-int xgbe_powerdown(struct net_device *netdev, unsigned int caller)
+int xgbe_powerdown(struct net_device *netdev)
{
struct xgbe_prv_data *pdata = netdev_priv(netdev);
struct xgbe_hw_if *hw_if = &pdata->hw_if;
- unsigned long flags;
- DBGPR("-->xgbe_powerdown\n");
-
- if (!netif_running(netdev) ||
- (caller == XGMAC_IOCTL_CONTEXT && pdata->power_down)) {
- netdev_alert(netdev, "Device is already powered down\n");
- DBGPR("<--xgbe_powerdown\n");
+ if (!netif_running(netdev)) {
+ netdev_dbg(netdev, "Device is not running, skipping powerdown\n");
return -EINVAL;
}
- spin_lock_irqsave(&pdata->lock, flags);
-
- if (caller == XGMAC_DRIVER_CONTEXT)
- netif_device_detach(netdev);
+ if (pdata->power_down) {
+ netdev_dbg(netdev, "Device is already powered down\n");
+ return -EINVAL;
+ }
+ netif_device_detach(netdev);
netif_tx_stop_all_queues(netdev);
xgbe_stop_timers(pdata);
flush_workqueue(pdata->dev_workqueue);
+ xgbe_napi_disable(pdata, 0);
+
hw_if->powerdown_tx(pdata);
hw_if->powerdown_rx(pdata);
- xgbe_napi_disable(pdata, 0);
-
pdata->power_down = 1;
- spin_unlock_irqrestore(&pdata->lock, flags);
-
- DBGPR("<--xgbe_powerdown\n");
-
return 0;
}
-int xgbe_powerup(struct net_device *netdev, unsigned int caller)
+int xgbe_powerup(struct net_device *netdev)
{
struct xgbe_prv_data *pdata = netdev_priv(netdev);
struct xgbe_hw_if *hw_if = &pdata->hw_if;
- unsigned long flags;
-
- DBGPR("-->xgbe_powerup\n");
- if (!netif_running(netdev) ||
- (caller == XGMAC_IOCTL_CONTEXT && !pdata->power_down)) {
- netdev_alert(netdev, "Device is already powered up\n");
- DBGPR("<--xgbe_powerup\n");
+ if (!netif_running(netdev)) {
+ netdev_dbg(netdev, "Device is not running, skipping powerup\n");
return -EINVAL;
}
- spin_lock_irqsave(&pdata->lock, flags);
-
- pdata->power_down = 0;
-
- xgbe_napi_enable(pdata, 0);
+ if (!pdata->power_down) {
+ netdev_dbg(netdev, "Device is already powered up\n");
+ return -EINVAL;
+ }
hw_if->powerup_tx(pdata);
hw_if->powerup_rx(pdata);
- if (caller == XGMAC_DRIVER_CONTEXT)
- netif_device_attach(netdev);
+ xgbe_napi_enable(pdata, 0);
netif_tx_start_all_queues(netdev);
-
xgbe_start_timers(pdata);
+ netif_device_attach(netdev);
- spin_unlock_irqrestore(&pdata->lock, flags);
-
- DBGPR("<--xgbe_powerup\n");
+ pdata->power_down = 0;
return 0;
}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index e3e1dca9856a..112d7697174c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -352,14 +352,14 @@ static void xgbe_pci_remove(struct pci_dev *pdev)
xgbe_free_pdata(pdata);
}
-static int __maybe_unused xgbe_pci_suspend(struct device *dev)
+static int xgbe_pci_suspend(struct device *dev)
{
struct xgbe_prv_data *pdata = dev_get_drvdata(dev);
struct net_device *netdev = pdata->netdev;
int ret = 0;
if (netif_running(netdev))
- ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT);
+ ret = xgbe_powerdown(netdev);
pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER;
@@ -368,7 +368,7 @@ static int __maybe_unused xgbe_pci_suspend(struct device *dev)
return ret;
}
-static int __maybe_unused xgbe_pci_resume(struct device *dev)
+static int xgbe_pci_resume(struct device *dev)
{
struct xgbe_prv_data *pdata = dev_get_drvdata(dev);
struct net_device *netdev = pdata->netdev;
@@ -380,7 +380,7 @@ static int __maybe_unused xgbe_pci_resume(struct device *dev)
XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
if (netif_running(netdev)) {
- ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT);
+ ret = xgbe_powerup(netdev);
/* Schedule a restart in case the link or phy state changed
* while we were powered down.
@@ -453,7 +453,7 @@ static const struct pci_device_id xgbe_pci_table[] = {
};
MODULE_DEVICE_TABLE(pci, xgbe_pci_table);
-static SIMPLE_DEV_PM_OPS(xgbe_pci_pm_ops, xgbe_pci_suspend, xgbe_pci_resume);
+DEFINE_SIMPLE_DEV_PM_OPS(xgbe_pci_pm_ops, xgbe_pci_suspend, xgbe_pci_resume);
static struct pci_driver xgbe_driver = {
.name = XGBE_DRV_NAME,
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
index 47d53e59ccf6..98b03a3f3a95 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
@@ -384,7 +384,7 @@ static int xgbe_platform_suspend(struct device *dev)
DBGPR("-->xgbe_suspend\n");
if (netif_running(netdev))
- ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT);
+ ret = xgbe_powerdown(netdev);
pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER;
@@ -407,7 +407,7 @@ static int xgbe_platform_resume(struct device *dev)
XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
if (netif_running(netdev)) {
- ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT);
+ ret = xgbe_powerup(netdev);
/* Schedule a restart in case the link or phy state changed
* while we were powered down.
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 1269b8ce9249..4e1ab4172abb 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -146,10 +146,6 @@
#define XGBE_MAX_PPS_OUT 4
#define XGBE_MAX_AUX_SNAP 4
-/* Driver PMT macros */
-#define XGMAC_DRIVER_CONTEXT 1
-#define XGMAC_IOCTL_CONTEXT 2
-
#define XGMAC_FIFO_MIN_ALLOC 2048
#define XGMAC_FIFO_UNIT 256
#define XGMAC_FIFO_ALIGN(_x) \
@@ -1289,8 +1285,8 @@ void xgbe_dump_rx_desc(struct xgbe_prv_data *, struct xgbe_ring *,
unsigned int);
void xgbe_print_pkt(struct net_device *, struct sk_buff *, bool);
void xgbe_get_all_hw_features(struct xgbe_prv_data *);
-int xgbe_powerup(struct net_device *, unsigned int);
-int xgbe_powerdown(struct net_device *, unsigned int);
+int xgbe_powerup(struct net_device *netdev);
+int xgbe_powerdown(struct net_device *netdev);
void xgbe_init_rx_coalesce(struct xgbe_prv_data *);
void xgbe_init_tx_coalesce(struct xgbe_prv_data *);
void xgbe_restart_dev(struct xgbe_prv_data *pdata);
--
2.34.1
On Wed, Feb 25, 2026 at 04:30:00PM +0530, Raju Rangoju wrote:
...
> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
...
> @@ -453,7 +453,7 @@ static const struct pci_device_id xgbe_pci_table[] = {
> };
> MODULE_DEVICE_TABLE(pci, xgbe_pci_table);
>
> -static SIMPLE_DEV_PM_OPS(xgbe_pci_pm_ops, xgbe_pci_suspend, xgbe_pci_resume);
> +DEFINE_SIMPLE_DEV_PM_OPS(xgbe_pci_pm_ops, xgbe_pci_suspend, xgbe_pci_resume);
Sorry for not noticing this before sending my previous email.
Sparse flags that xgbe_pci_pm_ops should (still) be static.
>
> static struct pci_driver xgbe_driver = {
> .name = XGBE_DRV_NAME,
...
On 2/26/2026 6:10 PM, Simon Horman wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
>
>
> On Wed, Feb 25, 2026 at 04:30:00PM +0530, Raju Rangoju wrote:
>
> ...
>
>> diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
>
> ...
>
>> @@ -453,7 +453,7 @@ static const struct pci_device_id xgbe_pci_table[] = {
>> };
>> MODULE_DEVICE_TABLE(pci, xgbe_pci_table);
>>
>> -static SIMPLE_DEV_PM_OPS(xgbe_pci_pm_ops, xgbe_pci_suspend, xgbe_pci_resume);
>> +DEFINE_SIMPLE_DEV_PM_OPS(xgbe_pci_pm_ops, xgbe_pci_suspend, xgbe_pci_resume);
>
> Sorry for not noticing this before sending my previous email.
>
> Sparse flags that xgbe_pci_pm_ops should (still) be static.
Noted. Thank you. Will fix it in the clean-up part.
>
>>
>> static struct pci_driver xgbe_driver = {
>> .name = XGBE_DRV_NAME,
>
> ...
On Wed, Feb 25, 2026 at 04:30:00PM +0530, Raju Rangoju wrote:
> The xgbe_powerdown() and xgbe_powerup() functions use spinlocks
> (spin_lock_irqsave) while calling functions that may sleep:
> - napi_disable() can sleep waiting for NAPI polling to complete
> - flush_workqueue() can sleep waiting for pending work items
>
> This causes a "BUG: scheduling while atomic" error during suspend/resume
> cycles on systems using the AMD XGBE Ethernet controller.
>
> The spinlock protection in these functions is unnecessary because:
> 1. The functions are called from suspend/resume paths which are already
> serialized by the PM core
> 2. The caller parameter was used to differentiate contexts, but the
> only current usage is from the driver context (suspend/resume)
> 3. The power_down flag provides sufficient synchronization
>
> Fix this by:
> - Removing the spinlock from xgbe_powerdown() and xgbe_powerup()
> - Simplifying the function signatures by removing the unused caller
> parameter
> - Removing the unused XGMAC_DRIVER_CONTEXT and XGMAC_IOCTL_CONTEXT macros
> - Reordering operations in xgbe_powerdown() to disable NAPI before
> stopping TX/RX (matching the order used in xgbe_stop())
I don't think that all of these changes are necessary to fix the issue at hand.
If so, please separate the fix(es) from other changes. And submit only
the fixes to net - ideally one patch per fix if there is more than one
discrete fix.
OTOH, enhancements and clean-ups should be submitted to net-next.
If there are dependencies on or conflicts with the fixes, then let
them go into net first. net is merged into net-next each Thursday or Friday.
>
> Fixes: c5aa9e3b8156 ("amd-xgbe: Initial AMD 10GbE platform driver")
> Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
...
--
pw-bot: changes-requested
On 2/26/2026 6:07 PM, Simon Horman wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
>
>
> On Wed, Feb 25, 2026 at 04:30:00PM +0530, Raju Rangoju wrote:
>> The xgbe_powerdown() and xgbe_powerup() functions use spinlocks
>> (spin_lock_irqsave) while calling functions that may sleep:
>> - napi_disable() can sleep waiting for NAPI polling to complete
>> - flush_workqueue() can sleep waiting for pending work items
>>
>> This causes a "BUG: scheduling while atomic" error during suspend/resume
>> cycles on systems using the AMD XGBE Ethernet controller.
>>
>> The spinlock protection in these functions is unnecessary because:
>> 1. The functions are called from suspend/resume paths which are already
>> serialized by the PM core
>> 2. The caller parameter was used to differentiate contexts, but the
>> only current usage is from the driver context (suspend/resume)
>> 3. The power_down flag provides sufficient synchronization
>>
>> Fix this by:
>> - Removing the spinlock from xgbe_powerdown() and xgbe_powerup()
>> - Simplifying the function signatures by removing the unused caller
>> parameter
>> - Removing the unused XGMAC_DRIVER_CONTEXT and XGMAC_IOCTL_CONTEXT macros
>> - Reordering operations in xgbe_powerdown() to disable NAPI before
>> stopping TX/RX (matching the order used in xgbe_stop())
>
> I don't think that all of these changes are necessary to fix the issue at hand.
> If so, please separate the fix(es) from other changes. And submit only
> the fixes to net - ideally one patch per fix if there is more than one
> discrete fix.
>
> OTOH, enhancements and clean-ups should be submitted to net-next.
Sure, I'll separate the fixes from cleanup and submit.
Since the clean-ups have some dependency on fixes, will let the fixes go
first.
> If there are dependencies on or conflicts with the fixes, then let
> them go into net first. net is merged into net-next each Thursday or Friday.
>
>>
>> Fixes: c5aa9e3b8156 ("amd-xgbe: Initial AMD 10GbE platform driver")
>> Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
>
> ...
>
> --
> pw-bot: changes-requested
© 2016 - 2026 Red Hat, Inc.