From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
It is not recommended to enable/disable the ASPM states on the back of the
PCI core directly using the LNKCTL register. It will break the PCI core's
knowledge about the device ASPM states. So use the APIs exposed by the PCI
core to enable/disable ASPM states.
Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3
Reported-by: Qiang Yu <qiang.yu@oss.qualcomm.com>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
---
drivers/net/wireless/ath/ath.h | 14 ++++++++++++++
drivers/net/wireless/ath/ath12k/pci.c | 10 ++++------
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h
index 34654f710d8a1e63f65a47d4602e2035262a4d9e..ef685123b66bf4f41428fec67c1967f242a9ef27 100644
--- a/drivers/net/wireless/ath/ath.h
+++ b/drivers/net/wireless/ath/ath.h
@@ -21,6 +21,8 @@
#include <linux/skbuff.h>
#include <linux/if_ether.h>
#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
#include <net/mac80211.h>
/*
@@ -336,4 +338,16 @@ static inline const char *ath_bus_type_to_string(enum ath_bus_type bustype)
return ath_bus_type_strings[bustype];
}
+static inline int ath_pci_aspm_state(u16 lnkctl)
+{
+ int state = 0;
+
+ if (lnkctl & PCI_EXP_LNKCTL_ASPM_L0S)
+ state |= PCIE_LINK_STATE_L0S;
+ if (lnkctl & PCI_EXP_LNKCTL_ASPM_L1)
+ state |= PCIE_LINK_STATE_L1;
+
+ return state;
+}
+
#endif /* ATH_H */
diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c
index 489d546390fcdab8f615cc9184006a958d9f140a..a5e11509e3ab8faad6638ff78ce6a8a5e9c3cbbd 100644
--- a/drivers/net/wireless/ath/ath12k/pci.c
+++ b/drivers/net/wireless/ath/ath12k/pci.c
@@ -16,6 +16,8 @@
#include "mhi.h"
#include "debug.h"
+#include "../ath.h"
+
#define ATH12K_PCI_BAR_NUM 0
#define ATH12K_PCI_DMA_MASK 36
@@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci)
u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1));
/* disable L0s and L1 */
- pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC);
+ pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1);
set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags);
}
@@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci)
{
if (ab_pci->ab->hw_params->supports_aspm &&
test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags))
- pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC,
- ab_pci->link_ctl &
- PCI_EXP_LNKCTL_ASPMC);
+ pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl));
}
static void ath12k_pci_cancel_workqueue(struct ath12k_base *ab)
--
2.45.2
On Wed, 16 Jul 2025, Manivannan Sadhasivam via B4 Relay wrote: > From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > > It is not recommended to enable/disable the ASPM states on the back of the > PCI core directly using the LNKCTL register. It will break the PCI core's > knowledge about the device ASPM states. So use the APIs exposed by the PCI > core to enable/disable ASPM states. > > Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 > > Reported-by: Qiang Yu <qiang.yu@oss.qualcomm.com> > Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > --- > drivers/net/wireless/ath/ath.h | 14 ++++++++++++++ > drivers/net/wireless/ath/ath12k/pci.c | 10 ++++------ > 2 files changed, 18 insertions(+), 6 deletions(-) > > diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h > index 34654f710d8a1e63f65a47d4602e2035262a4d9e..ef685123b66bf4f41428fec67c1967f242a9ef27 100644 > --- a/drivers/net/wireless/ath/ath.h > +++ b/drivers/net/wireless/ath/ath.h > @@ -21,6 +21,8 @@ > #include <linux/skbuff.h> > #include <linux/if_ether.h> > #include <linux/spinlock.h> > +#include <linux/pci.h> > +#include <linux/pci_regs.h> > #include <net/mac80211.h> > > /* > @@ -336,4 +338,16 @@ static inline const char *ath_bus_type_to_string(enum ath_bus_type bustype) > return ath_bus_type_strings[bustype]; > } > > +static inline int ath_pci_aspm_state(u16 lnkctl) > +{ > + int state = 0; > + > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) > + state |= PCIE_LINK_STATE_L0S; > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L1) > + state |= PCIE_LINK_STATE_L1; > + > + return state; > +} > + > #endif /* ATH_H */ > diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c > index 489d546390fcdab8f615cc9184006a958d9f140a..a5e11509e3ab8faad6638ff78ce6a8a5e9c3cbbd 100644 > --- a/drivers/net/wireless/ath/ath12k/pci.c > +++ b/drivers/net/wireless/ath/ath12k/pci.c > @@ -16,6 +16,8 @@ > #include "mhi.h" > #include "debug.h" > > +#include "../ath.h" > + > #define ATH12K_PCI_BAR_NUM 0 > #define ATH12K_PCI_DMA_MASK 36 > > @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > > /* disable L0s and L1 */ > - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > - PCI_EXP_LNKCTL_ASPMC); > + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); I'd remove to comment too as the code is self-explanatory after this change. > > set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > } > @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > { > if (ab_pci->ab->hw_params->supports_aspm && > test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > - PCI_EXP_LNKCTL_ASPMC, > - ab_pci->link_ctl & > - PCI_EXP_LNKCTL_ASPMC); > + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > } > > static void ath12k_pci_cancel_workqueue(struct ath12k_base *ab) As you now depend on ASPM driver being there, these should also add to Kconfig: depends on PCIEASPM -- i.
On Mon, Jul 21, 2025 at 11:04:10AM GMT, Ilpo Järvinen wrote: > On Wed, 16 Jul 2025, Manivannan Sadhasivam via B4 Relay wrote: > > > From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > > > > It is not recommended to enable/disable the ASPM states on the back of the > > PCI core directly using the LNKCTL register. It will break the PCI core's > > knowledge about the device ASPM states. So use the APIs exposed by the PCI > > core to enable/disable ASPM states. > > > > Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 > > > > Reported-by: Qiang Yu <qiang.yu@oss.qualcomm.com> > > Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > > --- > > drivers/net/wireless/ath/ath.h | 14 ++++++++++++++ > > drivers/net/wireless/ath/ath12k/pci.c | 10 ++++------ > > 2 files changed, 18 insertions(+), 6 deletions(-) > > > > diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h > > index 34654f710d8a1e63f65a47d4602e2035262a4d9e..ef685123b66bf4f41428fec67c1967f242a9ef27 100644 > > --- a/drivers/net/wireless/ath/ath.h > > +++ b/drivers/net/wireless/ath/ath.h > > @@ -21,6 +21,8 @@ > > #include <linux/skbuff.h> > > #include <linux/if_ether.h> > > #include <linux/spinlock.h> > > +#include <linux/pci.h> > > +#include <linux/pci_regs.h> > > #include <net/mac80211.h> > > > > /* > > @@ -336,4 +338,16 @@ static inline const char *ath_bus_type_to_string(enum ath_bus_type bustype) > > return ath_bus_type_strings[bustype]; > > } > > > > +static inline int ath_pci_aspm_state(u16 lnkctl) > > +{ > > + int state = 0; > > + > > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) > > + state |= PCIE_LINK_STATE_L0S; > > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L1) > > + state |= PCIE_LINK_STATE_L1; > > + > > + return state; > > +} > > + > > #endif /* ATH_H */ > > diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c > > index 489d546390fcdab8f615cc9184006a958d9f140a..a5e11509e3ab8faad6638ff78ce6a8a5e9c3cbbd 100644 > > --- a/drivers/net/wireless/ath/ath12k/pci.c > > +++ b/drivers/net/wireless/ath/ath12k/pci.c > > @@ -16,6 +16,8 @@ > > #include "mhi.h" > > #include "debug.h" > > > > +#include "../ath.h" > > + > > #define ATH12K_PCI_BAR_NUM 0 > > #define ATH12K_PCI_DMA_MASK 36 > > > > @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > > u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > > > > /* disable L0s and L1 */ > > - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > - PCI_EXP_LNKCTL_ASPMC); > > + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > > I'd remove to comment too as the code is self-explanatory after this > change. > Ack > > > > set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > > } > > @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > > { > > if (ab_pci->ab->hw_params->supports_aspm && > > test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > > - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > - PCI_EXP_LNKCTL_ASPMC, > > - ab_pci->link_ctl & > > - PCI_EXP_LNKCTL_ASPMC); > > + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > > } > > > > static void ath12k_pci_cancel_workqueue(struct ath12k_base *ab) > > As you now depend on ASPM driver being there, these should also add to > Kconfig: > > depends on PCIEASPM > I thought about it, but since this driver doesn't necessarily enable ASPM for all the devices it supports, I didn't add the dependency. But looking at it again, I think makes sense to add the dependency since the driver cannot work reliably without disabling ASPM (for the supported devices). - Mani -- மணிவண்ணன் சதாசிவம்
On Mon, 21 Jul 2025, Manivannan Sadhasivam wrote: > On Mon, Jul 21, 2025 at 11:04:10AM GMT, Ilpo Järvinen wrote: > > On Wed, 16 Jul 2025, Manivannan Sadhasivam via B4 Relay wrote: > > > From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > > > > > > It is not recommended to enable/disable the ASPM states on the back of the > > > PCI core directly using the LNKCTL register. It will break the PCI core's > > > knowledge about the device ASPM states. So use the APIs exposed by the PCI > > > core to enable/disable ASPM states. > > > > > > Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 > > > > > > Reported-by: Qiang Yu <qiang.yu@oss.qualcomm.com> > > > Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > > > --- > > > drivers/net/wireless/ath/ath.h | 14 ++++++++++++++ > > > drivers/net/wireless/ath/ath12k/pci.c | 10 ++++------ > > > 2 files changed, 18 insertions(+), 6 deletions(-) > > > > > > diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h > > > index 34654f710d8a1e63f65a47d4602e2035262a4d9e..ef685123b66bf4f41428fec67c1967f242a9ef27 100644 > > > --- a/drivers/net/wireless/ath/ath.h > > > +++ b/drivers/net/wireless/ath/ath.h > > > @@ -21,6 +21,8 @@ > > > #include <linux/skbuff.h> > > > #include <linux/if_ether.h> > > > #include <linux/spinlock.h> > > > +#include <linux/pci.h> > > > +#include <linux/pci_regs.h> > > > #include <net/mac80211.h> > > > > > > /* > > > @@ -336,4 +338,16 @@ static inline const char *ath_bus_type_to_string(enum ath_bus_type bustype) > > > return ath_bus_type_strings[bustype]; > > > } > > > > > > +static inline int ath_pci_aspm_state(u16 lnkctl) > > > +{ > > > + int state = 0; > > > + > > > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) > > > + state |= PCIE_LINK_STATE_L0S; > > > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L1) > > > + state |= PCIE_LINK_STATE_L1; > > > + > > > + return state; > > > +} > > > + > > > #endif /* ATH_H */ > > > diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c > > > index 489d546390fcdab8f615cc9184006a958d9f140a..a5e11509e3ab8faad6638ff78ce6a8a5e9c3cbbd 100644 > > > --- a/drivers/net/wireless/ath/ath12k/pci.c > > > +++ b/drivers/net/wireless/ath/ath12k/pci.c > > > @@ -16,6 +16,8 @@ > > > #include "mhi.h" > > > #include "debug.h" > > > > > > +#include "../ath.h" > > > + > > > #define ATH12K_PCI_BAR_NUM 0 > > > #define ATH12K_PCI_DMA_MASK 36 > > > > > > @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > > > u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > > > > > > /* disable L0s and L1 */ > > > - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > > - PCI_EXP_LNKCTL_ASPMC); > > > + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > > > > I'd remove to comment too as the code is self-explanatory after this > > change. > > > > Ack > > > > > > > set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > > > } > > > @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > > > { > > > if (ab_pci->ab->hw_params->supports_aspm && > > > test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > > > - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > > - PCI_EXP_LNKCTL_ASPMC, > > > - ab_pci->link_ctl & > > > - PCI_EXP_LNKCTL_ASPMC); > > > + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > > > } > > > > > > static void ath12k_pci_cancel_workqueue(struct ath12k_base *ab) > > > > As you now depend on ASPM driver being there, these should also add to > > Kconfig: > > > > depends on PCIEASPM > > > > I thought about it, but since this driver doesn't necessarily enable ASPM for > all the devices it supports, I didn't add the dependency. But looking at it > again, I think makes sense to add the dependency since the driver cannot work > reliably without disabling ASPM (for the supported devices). PCIEASPM is already default y and if EXPERT so it is not something that is expected to be disabled. You also no longer need to move the ASPM link state defines LKP found out about after adding the depends on. I'm a bit worried this series will regress in the cases where OS doesn't control ASPM so it might be necessary to include something along the lines of the patch below too (the earlier discussion on this is in Link tags): ----- From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> Subject: [PATCH] PCI/ASPM: Always disable ASPM when driver requests it PCI core/ASPM service driver allows controlling ASPM state through pci_disable_link_state() API. It was decided earlier (see the Link below), to not allow ASPM changes when OS does not have control over it but only log a warning about the problem (commit 2add0ec14c25 ("PCI/ASPM: Warn when driver asks to disable ASPM, but we can't do it")). A number of drivers have added workarounds to force ASPM off with own writes into the Link Control Register (some even with comments explaining why PCI core does not disable it under some circumstances). According to the comments, some drivers require ASPM to be off for reliable operation. Having custom ASPM handling in drivers is problematic because the state kept in the ASPM service driver is not updated by the changes made outside the link state management API. As the first step to address this issue, make pci_disable_link_state() to unconditionally disable ASPM so the motivation for drivers to come up with custom ASPM handling code is eliminated. To fully take advantage of the ASPM handling core provides, the drivers that need to quirk ASPM have to be altered depend on PCIEASPM and the custom ASPM code is removed. This is to be done separately. As PCIEASPM is already behind EXPERT, it should be no problem to limit disabling it for configurations that do not require touching ASPM. Make pci_disable_link_state() function comment to comply kerneldoc formatting while changing the description. Link: https://lore.kernel.org/all/CANUX_P3F5YhbZX3WGU-j1AGpbXb_T9Bis2ErhvKkFMtDvzatVQ@mail.gmail.com/ Link: https://lore.kernel.org/all/20230511131441.45704-1-ilpo.jarvinen@linux.intel.com/ Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> --- drivers/pci/pcie/aspm.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 5721ebfdea71..11732031e342 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1382,16 +1382,23 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool locked return -EINVAL; /* * A driver requested that ASPM be disabled on this device, but - * if we don't have permission to manage ASPM (e.g., on ACPI + * if we might not have permission to manage ASPM (e.g., on ACPI * systems we have to observe the FADT ACPI_FADT_NO_ASPM bit and - * the _OSC method), we can't honor that request. Windows has - * a similar mechanism using "PciASPMOptOut", which is also - * ignored in this situation. + * the _OSC method), previously we chose to not honor disable + * request in that case. Windows has a similar mechanism using + * "PciASPMOptOut", which is also ignored in this situation. + * + * Not honoring the requests to disable ASPM, however, led to + * drivers forcing ASPM off on their own. As such changes of ASPM + * state are not tracked by this service driver, the state kept here + * became out of sync. + * + * Therefore, honor ASPM disable requests even when OS does not have + * ASPM control. Plain disable for ASPM is assumed to be slightly + * safer than fully managing it. */ - if (aspm_disabled) { - pci_warn(pdev, "can't disable ASPM; OS doesn't have ASPM control\n"); - return -EPERM; - } + if (aspm_disabled) + pci_warn(pdev, "OS doesn't have ASPM control, disabling ASPM anyway\n"); if (!locked) down_read(&pci_bus_sem); @@ -1418,13 +1425,13 @@ int pci_disable_link_state_locked(struct pci_dev *pdev, int state) EXPORT_SYMBOL(pci_disable_link_state_locked); /** - * pci_disable_link_state - Disable device's link state, so the link will - * never enter specific states. Note that if the BIOS didn't grant ASPM - * control to the OS, this does nothing because we can't touch the LNKCTL - * register. Returns 0 or a negative errno. - * + * pci_disable_link_state - Disable device's link state * @pdev: PCI device * @state: ASPM link state to disable + * + * Disable device's link state so the link will never enter specific states. + * + * Return: 0 or a negative errno */ int pci_disable_link_state(struct pci_dev *pdev, int state) { -- tg: (9f4972a5d481..) aspm/disable-always (depends on: pci/set-default-comment2)
On Mon, Jul 21, 2025 at 01:09:05PM GMT, Ilpo Järvinen wrote: > On Mon, 21 Jul 2025, Manivannan Sadhasivam wrote: > > On Mon, Jul 21, 2025 at 11:04:10AM GMT, Ilpo Järvinen wrote: > > > On Wed, 16 Jul 2025, Manivannan Sadhasivam via B4 Relay wrote: > > > > From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > > > > > > > > It is not recommended to enable/disable the ASPM states on the back of the > > > > PCI core directly using the LNKCTL register. It will break the PCI core's > > > > knowledge about the device ASPM states. So use the APIs exposed by the PCI > > > > core to enable/disable ASPM states. > > > > > > > > Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 > > > > > > > > Reported-by: Qiang Yu <qiang.yu@oss.qualcomm.com> > > > > Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > > > > --- > > > > drivers/net/wireless/ath/ath.h | 14 ++++++++++++++ > > > > drivers/net/wireless/ath/ath12k/pci.c | 10 ++++------ > > > > 2 files changed, 18 insertions(+), 6 deletions(-) > > > > > > > > diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h > > > > index 34654f710d8a1e63f65a47d4602e2035262a4d9e..ef685123b66bf4f41428fec67c1967f242a9ef27 100644 > > > > --- a/drivers/net/wireless/ath/ath.h > > > > +++ b/drivers/net/wireless/ath/ath.h > > > > @@ -21,6 +21,8 @@ > > > > #include <linux/skbuff.h> > > > > #include <linux/if_ether.h> > > > > #include <linux/spinlock.h> > > > > +#include <linux/pci.h> > > > > +#include <linux/pci_regs.h> > > > > #include <net/mac80211.h> > > > > > > > > /* > > > > @@ -336,4 +338,16 @@ static inline const char *ath_bus_type_to_string(enum ath_bus_type bustype) > > > > return ath_bus_type_strings[bustype]; > > > > } > > > > > > > > +static inline int ath_pci_aspm_state(u16 lnkctl) > > > > +{ > > > > + int state = 0; > > > > + > > > > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) > > > > + state |= PCIE_LINK_STATE_L0S; > > > > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L1) > > > > + state |= PCIE_LINK_STATE_L1; > > > > + > > > > + return state; > > > > +} > > > > + > > > > #endif /* ATH_H */ > > > > diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c > > > > index 489d546390fcdab8f615cc9184006a958d9f140a..a5e11509e3ab8faad6638ff78ce6a8a5e9c3cbbd 100644 > > > > --- a/drivers/net/wireless/ath/ath12k/pci.c > > > > +++ b/drivers/net/wireless/ath/ath12k/pci.c > > > > @@ -16,6 +16,8 @@ > > > > #include "mhi.h" > > > > #include "debug.h" > > > > > > > > +#include "../ath.h" > > > > + > > > > #define ATH12K_PCI_BAR_NUM 0 > > > > #define ATH12K_PCI_DMA_MASK 36 > > > > > > > > @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > > > > u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > > > > > > > > /* disable L0s and L1 */ > > > > - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > > > - PCI_EXP_LNKCTL_ASPMC); > > > > + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > > > > > > I'd remove to comment too as the code is self-explanatory after this > > > change. > > > > > > > Ack > > > > > > > > > > set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > > > > } > > > > @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > > > > { > > > > if (ab_pci->ab->hw_params->supports_aspm && > > > > test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > > > > - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > > > - PCI_EXP_LNKCTL_ASPMC, > > > > - ab_pci->link_ctl & > > > > - PCI_EXP_LNKCTL_ASPMC); > > > > + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > > > > } > > > > > > > > static void ath12k_pci_cancel_workqueue(struct ath12k_base *ab) > > > > > > As you now depend on ASPM driver being there, these should also add to > > > Kconfig: > > > > > > depends on PCIEASPM > > > > > > > I thought about it, but since this driver doesn't necessarily enable ASPM for > > all the devices it supports, I didn't add the dependency. But looking at it > > again, I think makes sense to add the dependency since the driver cannot work > > reliably without disabling ASPM (for the supported devices). > > PCIEASPM is already default y and if EXPERT so it is not something > that is expected to be disabled. > > You also no longer need to move the ASPM link state defines LKP found out > about after adding the depends on. > Yes, it will fix the reported issue, but guarding the definitions feels wrong to me still. Maybe that's something we can worry later. > I'm a bit worried this series will regress in the cases where OS doesn't > control ASPM so it might be necessary to include something along the > lines of the patch below too (the earlier discussion on this is in Link > tags): > atheros drivers didn't have such comment (why they were manually changing the LNKCTL register), but I agree that there is a chance that they could cause issue on platforms where BIOS didn't give ASPM control to the OS. But as a non-ACPI developer, I don't know what does 'ACPI doesn't give permission to manage ASPM' mean exactly. Does ACPI allow to disable ASPM but not enable it? - Mani > ----- > From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> > Subject: [PATCH] PCI/ASPM: Always disable ASPM when driver requests it > > PCI core/ASPM service driver allows controlling ASPM state through > pci_disable_link_state() API. It was decided earlier (see the Link > below), to not allow ASPM changes when OS does not have control over it > but only log a warning about the problem (commit 2add0ec14c25 > ("PCI/ASPM: Warn when driver asks to disable ASPM, but we can't do > it")). > > A number of drivers have added workarounds to force ASPM off with own > writes into the Link Control Register (some even with comments > explaining why PCI core does not disable it under some circumstances). > According to the comments, some drivers require ASPM to be off for > reliable operation. > > Having custom ASPM handling in drivers is problematic because the state > kept in the ASPM service driver is not updated by the changes made > outside the link state management API. > > As the first step to address this issue, make pci_disable_link_state() > to unconditionally disable ASPM so the motivation for drivers to come > up with custom ASPM handling code is eliminated. > > To fully take advantage of the ASPM handling core provides, the drivers > that need to quirk ASPM have to be altered depend on PCIEASPM and the > custom ASPM code is removed. This is to be done separately. As PCIEASPM > is already behind EXPERT, it should be no problem to limit disabling it > for configurations that do not require touching ASPM. > > Make pci_disable_link_state() function comment to comply kerneldoc > formatting while changing the description. > > Link: https://lore.kernel.org/all/CANUX_P3F5YhbZX3WGU-j1AGpbXb_T9Bis2ErhvKkFMtDvzatVQ@mail.gmail.com/ > Link: https://lore.kernel.org/all/20230511131441.45704-1-ilpo.jarvinen@linux.intel.com/ > Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> > > --- > drivers/pci/pcie/aspm.c | 33 ++++++++++++++++++++------------- > 1 file changed, 20 insertions(+), 13 deletions(-) > > diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c > index 5721ebfdea71..11732031e342 100644 > --- a/drivers/pci/pcie/aspm.c > +++ b/drivers/pci/pcie/aspm.c > @@ -1382,16 +1382,23 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool locked > return -EINVAL; > /* > * A driver requested that ASPM be disabled on this device, but > - * if we don't have permission to manage ASPM (e.g., on ACPI > + * if we might not have permission to manage ASPM (e.g., on ACPI > * systems we have to observe the FADT ACPI_FADT_NO_ASPM bit and > - * the _OSC method), we can't honor that request. Windows has > - * a similar mechanism using "PciASPMOptOut", which is also > - * ignored in this situation. > + * the _OSC method), previously we chose to not honor disable > + * request in that case. Windows has a similar mechanism using > + * "PciASPMOptOut", which is also ignored in this situation. > + * > + * Not honoring the requests to disable ASPM, however, led to > + * drivers forcing ASPM off on their own. As such changes of ASPM > + * state are not tracked by this service driver, the state kept here > + * became out of sync. > + * > + * Therefore, honor ASPM disable requests even when OS does not have > + * ASPM control. Plain disable for ASPM is assumed to be slightly > + * safer than fully managing it. > */ > - if (aspm_disabled) { > - pci_warn(pdev, "can't disable ASPM; OS doesn't have ASPM control\n"); > - return -EPERM; > - } > + if (aspm_disabled) > + pci_warn(pdev, "OS doesn't have ASPM control, disabling ASPM anyway\n"); > > if (!locked) > down_read(&pci_bus_sem); > @@ -1418,13 +1425,13 @@ int pci_disable_link_state_locked(struct pci_dev *pdev, int state) > EXPORT_SYMBOL(pci_disable_link_state_locked); > > /** > - * pci_disable_link_state - Disable device's link state, so the link will > - * never enter specific states. Note that if the BIOS didn't grant ASPM > - * control to the OS, this does nothing because we can't touch the LNKCTL > - * register. Returns 0 or a negative errno. > - * > + * pci_disable_link_state - Disable device's link state > * @pdev: PCI device > * @state: ASPM link state to disable > + * > + * Disable device's link state so the link will never enter specific states. > + * > + * Return: 0 or a negative errno > */ > int pci_disable_link_state(struct pci_dev *pdev, int state) > { > > -- > tg: (9f4972a5d481..) aspm/disable-always (depends on: pci/set-default-comment2) -- மணிவண்ணன் சதாசிவம்
On Mon, 21 Jul 2025, Manivannan Sadhasivam wrote: > On Mon, Jul 21, 2025 at 01:09:05PM GMT, Ilpo Järvinen wrote: > > On Mon, 21 Jul 2025, Manivannan Sadhasivam wrote: > > > On Mon, Jul 21, 2025 at 11:04:10AM GMT, Ilpo Järvinen wrote: > > > > On Wed, 16 Jul 2025, Manivannan Sadhasivam via B4 Relay wrote: > > > > > From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > > > > > > > > > > It is not recommended to enable/disable the ASPM states on the back of the > > > > > PCI core directly using the LNKCTL register. It will break the PCI core's > > > > > knowledge about the device ASPM states. So use the APIs exposed by the PCI > > > > > core to enable/disable ASPM states. > > > > > > > > > > Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 > > > > > > > > > > Reported-by: Qiang Yu <qiang.yu@oss.qualcomm.com> > > > > > Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > > > > > --- > > > > > drivers/net/wireless/ath/ath.h | 14 ++++++++++++++ > > > > > drivers/net/wireless/ath/ath12k/pci.c | 10 ++++------ > > > > > 2 files changed, 18 insertions(+), 6 deletions(-) > > > > > > > > > > diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h > > > > > index 34654f710d8a1e63f65a47d4602e2035262a4d9e..ef685123b66bf4f41428fec67c1967f242a9ef27 100644 > > > > > --- a/drivers/net/wireless/ath/ath.h > > > > > +++ b/drivers/net/wireless/ath/ath.h > > > > > @@ -21,6 +21,8 @@ > > > > > #include <linux/skbuff.h> > > > > > #include <linux/if_ether.h> > > > > > #include <linux/spinlock.h> > > > > > +#include <linux/pci.h> > > > > > +#include <linux/pci_regs.h> > > > > > #include <net/mac80211.h> > > > > > > > > > > /* > > > > > @@ -336,4 +338,16 @@ static inline const char *ath_bus_type_to_string(enum ath_bus_type bustype) > > > > > return ath_bus_type_strings[bustype]; > > > > > } > > > > > > > > > > +static inline int ath_pci_aspm_state(u16 lnkctl) > > > > > +{ > > > > > + int state = 0; > > > > > + > > > > > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) > > > > > + state |= PCIE_LINK_STATE_L0S; > > > > > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L1) > > > > > + state |= PCIE_LINK_STATE_L1; > > > > > + > > > > > + return state; > > > > > +} > > > > > + > > > > > #endif /* ATH_H */ > > > > > diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c > > > > > index 489d546390fcdab8f615cc9184006a958d9f140a..a5e11509e3ab8faad6638ff78ce6a8a5e9c3cbbd 100644 > > > > > --- a/drivers/net/wireless/ath/ath12k/pci.c > > > > > +++ b/drivers/net/wireless/ath/ath12k/pci.c > > > > > @@ -16,6 +16,8 @@ > > > > > #include "mhi.h" > > > > > #include "debug.h" > > > > > > > > > > +#include "../ath.h" > > > > > + > > > > > #define ATH12K_PCI_BAR_NUM 0 > > > > > #define ATH12K_PCI_DMA_MASK 36 > > > > > > > > > > @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > > > > > u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > > > > > > > > > > /* disable L0s and L1 */ > > > > > - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > > > > - PCI_EXP_LNKCTL_ASPMC); > > > > > + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > > > > > > > > I'd remove to comment too as the code is self-explanatory after this > > > > change. > > > > > > > > > > Ack > > > > > > > > > > > > > set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > > > > > } > > > > > @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > > > > > { > > > > > if (ab_pci->ab->hw_params->supports_aspm && > > > > > test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > > > > > - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > > > > - PCI_EXP_LNKCTL_ASPMC, > > > > > - ab_pci->link_ctl & > > > > > - PCI_EXP_LNKCTL_ASPMC); > > > > > + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > > > > > } > > > > > > > > > > static void ath12k_pci_cancel_workqueue(struct ath12k_base *ab) > > > > > > > > As you now depend on ASPM driver being there, these should also add to > > > > Kconfig: > > > > > > > > depends on PCIEASPM > > > > > > > > > > I thought about it, but since this driver doesn't necessarily enable ASPM for > > > all the devices it supports, I didn't add the dependency. But looking at it > > > again, I think makes sense to add the dependency since the driver cannot work > > > reliably without disabling ASPM (for the supported devices). > > > > PCIEASPM is already default y and if EXPERT so it is not something > > that is expected to be disabled. > > > > You also no longer need to move the ASPM link state defines LKP found out > > about after adding the depends on. > > > > Yes, it will fix the reported issue, but guarding the definitions feels wrong to > me still. Maybe that's something we can worry later. > > > I'm a bit worried this series will regress in the cases where OS doesn't > > control ASPM so it might be necessary to include something along the > > lines of the patch below too (the earlier discussion on this is in Link > > tags): > > > > atheros drivers didn't have such comment (why they were manually changing the > LNKCTL register), but I agree that there is a chance that they could cause issue > on platforms where BIOS didn't give ASPM control to the OS. > > But as a non-ACPI developer, I don't know what does 'ACPI doesn't give > permission to manage ASPM' mean exactly. Does ACPI allow to disable ASPM but not > enable it? While others are likely better qualified to answer this, my impression is that even disabling ASPM is not allowed when OS does has not been granted control over ASPM (OS should not change the value of ASPM Control in LNKCTL at all). The obvious trouble then is, if a driver/hw needs ASPM to be disabled over certain period of its operation or entirely to ensure stable operation, and ASPM is enabled, we're between a rock and a hard place when changes to ASPM Control field are disallowed. Because the ASPM driver took a hard stance of conformance here and did not let touching the ASPM Control field, we ended up having drivers that then write ASPM Control on their own to work around HW problems (see e.g. the comments in drivers/net/ethernet/intel/e1000e/netdev.c that make it very clear it was intentional from the driver) so it was considered that allowing disabling ASPM might be acceptable compromise over drivers doing it on their own (and leaving the ASPM driver out of the loop because it cannot be relied to disable ASPM consistently in all cases). -- i. > > ----- > > From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> > > Subject: [PATCH] PCI/ASPM: Always disable ASPM when driver requests it > > > > PCI core/ASPM service driver allows controlling ASPM state through > > pci_disable_link_state() API. It was decided earlier (see the Link > > below), to not allow ASPM changes when OS does not have control over it > > but only log a warning about the problem (commit 2add0ec14c25 > > ("PCI/ASPM: Warn when driver asks to disable ASPM, but we can't do > > it")). > > > > A number of drivers have added workarounds to force ASPM off with own > > writes into the Link Control Register (some even with comments > > explaining why PCI core does not disable it under some circumstances). > > According to the comments, some drivers require ASPM to be off for > > reliable operation. > > > > Having custom ASPM handling in drivers is problematic because the state > > kept in the ASPM service driver is not updated by the changes made > > outside the link state management API. > > > > As the first step to address this issue, make pci_disable_link_state() > > to unconditionally disable ASPM so the motivation for drivers to come > > up with custom ASPM handling code is eliminated. > > > > To fully take advantage of the ASPM handling core provides, the drivers > > that need to quirk ASPM have to be altered depend on PCIEASPM and the > > custom ASPM code is removed. This is to be done separately. As PCIEASPM > > is already behind EXPERT, it should be no problem to limit disabling it > > for configurations that do not require touching ASPM. > > > > Make pci_disable_link_state() function comment to comply kerneldoc > > formatting while changing the description. > > > > Link: https://lore.kernel.org/all/CANUX_P3F5YhbZX3WGU-j1AGpbXb_T9Bis2ErhvKkFMtDvzatVQ@mail.gmail.com/ > > Link: https://lore.kernel.org/all/20230511131441.45704-1-ilpo.jarvinen@linux.intel.com/ > > Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> > > > > --- > > drivers/pci/pcie/aspm.c | 33 ++++++++++++++++++++------------- > > 1 file changed, 20 insertions(+), 13 deletions(-) > > > > diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c > > index 5721ebfdea71..11732031e342 100644 > > --- a/drivers/pci/pcie/aspm.c > > +++ b/drivers/pci/pcie/aspm.c > > @@ -1382,16 +1382,23 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool locked > > return -EINVAL; > > /* > > * A driver requested that ASPM be disabled on this device, but > > - * if we don't have permission to manage ASPM (e.g., on ACPI > > + * if we might not have permission to manage ASPM (e.g., on ACPI > > * systems we have to observe the FADT ACPI_FADT_NO_ASPM bit and > > - * the _OSC method), we can't honor that request. Windows has > > - * a similar mechanism using "PciASPMOptOut", which is also > > - * ignored in this situation. > > + * the _OSC method), previously we chose to not honor disable > > + * request in that case. Windows has a similar mechanism using > > + * "PciASPMOptOut", which is also ignored in this situation. > > + * > > + * Not honoring the requests to disable ASPM, however, led to > > + * drivers forcing ASPM off on their own. As such changes of ASPM > > + * state are not tracked by this service driver, the state kept here > > + * became out of sync. > > + * > > + * Therefore, honor ASPM disable requests even when OS does not have > > + * ASPM control. Plain disable for ASPM is assumed to be slightly > > + * safer than fully managing it. > > */ > > - if (aspm_disabled) { > > - pci_warn(pdev, "can't disable ASPM; OS doesn't have ASPM control\n"); > > - return -EPERM; > > - } > > + if (aspm_disabled) > > + pci_warn(pdev, "OS doesn't have ASPM control, disabling ASPM anyway\n"); > > > > if (!locked) > > down_read(&pci_bus_sem); > > @@ -1418,13 +1425,13 @@ int pci_disable_link_state_locked(struct pci_dev *pdev, int state) > > EXPORT_SYMBOL(pci_disable_link_state_locked); > > > > /** > > - * pci_disable_link_state - Disable device's link state, so the link will > > - * never enter specific states. Note that if the BIOS didn't grant ASPM > > - * control to the OS, this does nothing because we can't touch the LNKCTL > > - * register. Returns 0 or a negative errno. > > - * > > + * pci_disable_link_state - Disable device's link state > > * @pdev: PCI device > > * @state: ASPM link state to disable > > + * > > + * Disable device's link state so the link will never enter specific states. > > + * > > + * Return: 0 or a negative errno > > */ > > int pci_disable_link_state(struct pci_dev *pdev, int state) > > { > > > > -- > > tg: (9f4972a5d481..) aspm/disable-always (depends on: pci/set-default-comment2) > > >
On Mon, Jul 21, 2025 at 02:28:24PM GMT, Ilpo Järvinen wrote: > On Mon, 21 Jul 2025, Manivannan Sadhasivam wrote: > > > On Mon, Jul 21, 2025 at 01:09:05PM GMT, Ilpo Järvinen wrote: > > > On Mon, 21 Jul 2025, Manivannan Sadhasivam wrote: > > > > On Mon, Jul 21, 2025 at 11:04:10AM GMT, Ilpo Järvinen wrote: > > > > > On Wed, 16 Jul 2025, Manivannan Sadhasivam via B4 Relay wrote: > > > > > > From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > > > > > > > > > > > > It is not recommended to enable/disable the ASPM states on the back of the > > > > > > PCI core directly using the LNKCTL register. It will break the PCI core's > > > > > > knowledge about the device ASPM states. So use the APIs exposed by the PCI > > > > > > core to enable/disable ASPM states. > > > > > > > > > > > > Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 > > > > > > > > > > > > Reported-by: Qiang Yu <qiang.yu@oss.qualcomm.com> > > > > > > Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > > > > > > --- > > > > > > drivers/net/wireless/ath/ath.h | 14 ++++++++++++++ > > > > > > drivers/net/wireless/ath/ath12k/pci.c | 10 ++++------ > > > > > > 2 files changed, 18 insertions(+), 6 deletions(-) > > > > > > > > > > > > diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h > > > > > > index 34654f710d8a1e63f65a47d4602e2035262a4d9e..ef685123b66bf4f41428fec67c1967f242a9ef27 100644 > > > > > > --- a/drivers/net/wireless/ath/ath.h > > > > > > +++ b/drivers/net/wireless/ath/ath.h > > > > > > @@ -21,6 +21,8 @@ > > > > > > #include <linux/skbuff.h> > > > > > > #include <linux/if_ether.h> > > > > > > #include <linux/spinlock.h> > > > > > > +#include <linux/pci.h> > > > > > > +#include <linux/pci_regs.h> > > > > > > #include <net/mac80211.h> > > > > > > > > > > > > /* > > > > > > @@ -336,4 +338,16 @@ static inline const char *ath_bus_type_to_string(enum ath_bus_type bustype) > > > > > > return ath_bus_type_strings[bustype]; > > > > > > } > > > > > > > > > > > > +static inline int ath_pci_aspm_state(u16 lnkctl) > > > > > > +{ > > > > > > + int state = 0; > > > > > > + > > > > > > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) > > > > > > + state |= PCIE_LINK_STATE_L0S; > > > > > > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L1) > > > > > > + state |= PCIE_LINK_STATE_L1; > > > > > > + > > > > > > + return state; > > > > > > +} > > > > > > + > > > > > > #endif /* ATH_H */ > > > > > > diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c > > > > > > index 489d546390fcdab8f615cc9184006a958d9f140a..a5e11509e3ab8faad6638ff78ce6a8a5e9c3cbbd 100644 > > > > > > --- a/drivers/net/wireless/ath/ath12k/pci.c > > > > > > +++ b/drivers/net/wireless/ath/ath12k/pci.c > > > > > > @@ -16,6 +16,8 @@ > > > > > > #include "mhi.h" > > > > > > #include "debug.h" > > > > > > > > > > > > +#include "../ath.h" > > > > > > + > > > > > > #define ATH12K_PCI_BAR_NUM 0 > > > > > > #define ATH12K_PCI_DMA_MASK 36 > > > > > > > > > > > > @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > > > > > > u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > > > > > > > > > > > > /* disable L0s and L1 */ > > > > > > - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > > > > > - PCI_EXP_LNKCTL_ASPMC); > > > > > > + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > > > > > > > > > > I'd remove to comment too as the code is self-explanatory after this > > > > > change. > > > > > > > > > > > > > Ack > > > > > > > > > > > > > > > > set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > > > > > > } > > > > > > @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > > > > > > { > > > > > > if (ab_pci->ab->hw_params->supports_aspm && > > > > > > test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > > > > > > - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > > > > > - PCI_EXP_LNKCTL_ASPMC, > > > > > > - ab_pci->link_ctl & > > > > > > - PCI_EXP_LNKCTL_ASPMC); > > > > > > + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > > > > > > } > > > > > > > > > > > > static void ath12k_pci_cancel_workqueue(struct ath12k_base *ab) > > > > > > > > > > As you now depend on ASPM driver being there, these should also add to > > > > > Kconfig: > > > > > > > > > > depends on PCIEASPM > > > > > > > > > > > > > I thought about it, but since this driver doesn't necessarily enable ASPM for > > > > all the devices it supports, I didn't add the dependency. But looking at it > > > > again, I think makes sense to add the dependency since the driver cannot work > > > > reliably without disabling ASPM (for the supported devices). > > > > > > PCIEASPM is already default y and if EXPERT so it is not something > > > that is expected to be disabled. > > > > > > You also no longer need to move the ASPM link state defines LKP found out > > > about after adding the depends on. > > > > > > > Yes, it will fix the reported issue, but guarding the definitions feels wrong to > > me still. Maybe that's something we can worry later. > > > > > I'm a bit worried this series will regress in the cases where OS doesn't > > > control ASPM so it might be necessary to include something along the > > > lines of the patch below too (the earlier discussion on this is in Link > > > tags): > > > > > > > atheros drivers didn't have such comment (why they were manually changing the > > LNKCTL register), but I agree that there is a chance that they could cause issue > > on platforms where BIOS didn't give ASPM control to the OS. > > > > But as a non-ACPI developer, I don't know what does 'ACPI doesn't give > > permission to manage ASPM' mean exactly. Does ACPI allow to disable ASPM but not > > enable it? > > While others are likely better qualified to answer this, my impression is > that even disabling ASPM is not allowed when OS does has not been granted > control over ASPM (OS should not change the value of ASPM Control in > LNKCTL at all). > > The obvious trouble then is, if a driver/hw needs ASPM to be disabled over > certain period of its operation or entirely to ensure stable operation, > and ASPM is enabled, we're between a rock and a hard place when changes to > ASPM Control field are disallowed. > > Because the ASPM driver took a hard stance of conformance here and did > not let touching the ASPM Control field, we ended up having drivers that > then write ASPM Control on their own to work around HW problems (see e.g. > the comments in drivers/net/ethernet/intel/e1000e/netdev.c that make it > very clear it was intentional from the driver) so it was considered that > allowing disabling ASPM might be acceptable compromise over drivers doing > it on their own (and leaving the ASPM driver out of the loop because it > cannot be relied to disable ASPM consistently in all cases). > Since there was no comments from ACPI folks, I will go ahead with your patch. I will also CC them in the next version so that they can yell at me if they want. - Mani -- மணிவண்ணன் சதாசிவம்
On 7/16/2025 8:56 PM, Manivannan Sadhasivam via B4 Relay wrote: > From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > > It is not recommended to enable/disable the ASPM states on the back of the > PCI core directly using the LNKCTL register. It will break the PCI core's > knowledge about the device ASPM states. So use the APIs exposed by the PCI > core to enable/disable ASPM states. > > Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 > > Reported-by: Qiang Yu <qiang.yu@oss.qualcomm.com> > Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com> > --- > drivers/net/wireless/ath/ath.h | 14 ++++++++++++++ > drivers/net/wireless/ath/ath12k/pci.c | 10 ++++------ > 2 files changed, 18 insertions(+), 6 deletions(-) > > diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h > index 34654f710d8a1e63f65a47d4602e2035262a4d9e..ef685123b66bf4f41428fec67c1967f242a9ef27 100644 > --- a/drivers/net/wireless/ath/ath.h > +++ b/drivers/net/wireless/ath/ath.h > @@ -21,6 +21,8 @@ > #include <linux/skbuff.h> > #include <linux/if_ether.h> > #include <linux/spinlock.h> > +#include <linux/pci.h> > +#include <linux/pci_regs.h> > #include <net/mac80211.h> > > /* > @@ -336,4 +338,16 @@ static inline const char *ath_bus_type_to_string(enum ath_bus_type bustype) > return ath_bus_type_strings[bustype]; > } > > +static inline int ath_pci_aspm_state(u16 lnkctl) > +{ > + int state = 0; > + > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) > + state |= PCIE_LINK_STATE_L0S; > + if (lnkctl & PCI_EXP_LNKCTL_ASPM_L1) > + state |= PCIE_LINK_STATE_L1; > + > + return state; > +} > + > #endif /* ATH_H */ > diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c > index 489d546390fcdab8f615cc9184006a958d9f140a..a5e11509e3ab8faad6638ff78ce6a8a5e9c3cbbd 100644 > --- a/drivers/net/wireless/ath/ath12k/pci.c > +++ b/drivers/net/wireless/ath/ath12k/pci.c I add some logs: diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index de5a4059a7a9..5a52093e0226 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -1161,16 +1161,28 @@ void ath12k_pci_stop(struct ath12k_base *ab) ath12k_ce_cleanup_pipes(ab); } +static void ath12k_pci_dump_pcie_link_ctrl(struct ath12k_pci *ab_pci, const char *str1, u16 line) +{ + u16 link_ctl = 0; + + pcie_capability_read_word(ab_pci->pdev, PCI_EXP_LNKCTL, + &link_ctl); + + pr_info("%s %u: link_ctl 0x%x\n", str1, line, link_ctl); +} + int ath12k_pci_start(struct ath12k_base *ab) { struct ath12k_pci *ab_pci = ath12k_pci_priv(ab); set_bit(ATH12K_PCI_FLAG_INIT_DONE, &ab_pci->flags); + ath12k_pci_dump_pcie_link_ctrl(ab_pci, __func__, __LINE__); if (test_bit(ATH12K_PCI_FLAG_MULTI_MSI_VECTORS, &ab_pci->flags)) ath12k_pci_aspm_restore(ab_pci); else ath12k_info(ab, "leaving PCI ASPM disabled to avoid MHI M2 problems\n"); + ath12k_pci_dump_pcie_link_ctrl(ab_pci, __func__, __LINE__); ath12k_pci_ce_irqs_enable(ab); ath12k_ce_rx_post_buf(ab); @@ -1460,11 +1472,15 @@ int ath12k_pci_power_up(struct ath12k_base *ab) clear_bit(ATH12K_PCI_FLAG_INIT_DONE, &ab_pci->flags); ath12k_pci_sw_reset(ab_pci->ab, true); + ath12k_pci_dump_pcie_link_ctrl(ab_pci, __func__, __LINE__); + /* Disable ASPM during firmware download due to problems switching * to AMSS state. */ ath12k_pci_aspm_disable(ab_pci); + ath12k_pci_dump_pcie_link_ctrl(ab_pci, __func__, __LINE__); + ath12k_pci_msi_enable(ab_pci); if (ath12k_fw_feature_supported(ab, ATH12K_FW_FEATURE_MULTI_QRTR_ID)) > @@ -16,6 +16,8 @@ > #include "mhi.h" > #include "debug.h" > > +#include "../ath.h" > + > #define ATH12K_PCI_BAR_NUM 0 > #define ATH12K_PCI_DMA_MASK 36 > > @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > > /* disable L0s and L1 */ > - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > - PCI_EXP_LNKCTL_ASPMC); > + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); Not always, but sometimes seems the 'disable' does not work: [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable > > set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > } > @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > { > if (ab_pci->ab->hw_params->supports_aspm && > test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > - PCI_EXP_LNKCTL_ASPMC, > - ab_pci->link_ctl & > - PCI_EXP_LNKCTL_ASPMC); > + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); always, the 'enable' is not working: [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore > } > > static void ath12k_pci_cancel_workqueue(struct ath12k_base *ab) > In addition, frequently I can see below AER warnings: [ 280.383143] aer_ratelimit: 30 callbacks suppressed [ 280.383151] pcieport 0000:00:1c.0: AER: Correctable error message received from 0000:00:1c.0 [ 280.383177] pcieport 0000:00:1c.0: PCIe Bus Error: severity=Correctable, type=Data Link Layer, (Transmitter ID) [ 280.383184] pcieport 0000:00:1c.0: device [8086:7ab8] error status/mask=00001000/00002000 [ 280.383193] pcieport 0000:00:1c.0: [12] Timeout
On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: [...] > > @@ -16,6 +16,8 @@ > > #include "mhi.h" > > #include "debug.h" > > > > +#include "../ath.h" > > + > > #define ATH12K_PCI_BAR_NUM 0 > > #define ATH12K_PCI_DMA_MASK 36 > > > > @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > > u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > > > > /* disable L0s and L1 */ > > - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > - PCI_EXP_LNKCTL_ASPMC); > > + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > > Not always, but sometimes seems the 'disable' does not work: > > [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable > [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable > > > > > > set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > > } > > @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > > { > > if (ab_pci->ab->hw_params->supports_aspm && > > test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > > - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > - PCI_EXP_LNKCTL_ASPMC, > > - ab_pci->link_ctl & > > - PCI_EXP_LNKCTL_ASPMC); > > + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > > always, the 'enable' is not working: > > [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore > [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore > Interesting! I applied your diff and I never see this issue so far (across 10+ reboots): [ 3.758239] ath12k_pci_power_up 1475: link_ctl 0x42 [ 3.758315] ath12k_pci_power_up 1480: link_ctl 0x40 [ 4.383900] ath12k_pci_start 1180: link_ctl 0x40 [ 4.384026] ath12k_pci_start 1185: link_ctl 0x42 Are you sure that you applied all the 6 patches in the series and not just the ath patches? Because, the first 3 PCI core patches are required to make the API work as intended. > > > } > > > > static void ath12k_pci_cancel_workqueue(struct ath12k_base *ab) > > > > In addition, frequently I can see below AER warnings: > > [ 280.383143] aer_ratelimit: 30 callbacks suppressed > [ 280.383151] pcieport 0000:00:1c.0: AER: Correctable error message received from > 0000:00:1c.0 > [ 280.383177] pcieport 0000:00:1c.0: PCIe Bus Error: severity=Correctable, type=Data Link > Layer, (Transmitter ID) > [ 280.383184] pcieport 0000:00:1c.0: device [8086:7ab8] error status/mask=00001000/00002000 > [ 280.383193] pcieport 0000:00:1c.0: [12] Timeout > I don't see any AER errors either. - Mani -- மணிவண்ணன் சதாசிவம்
On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: > On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: > > [...] > >>> @@ -16,6 +16,8 @@ >>> #include "mhi.h" >>> #include "debug.h" >>> >>> +#include "../ath.h" >>> + >>> #define ATH12K_PCI_BAR_NUM 0 >>> #define ATH12K_PCI_DMA_MASK 36 >>> >>> @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) >>> u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); >>> >>> /* disable L0s and L1 */ >>> - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, >>> - PCI_EXP_LNKCTL_ASPMC); >>> + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); >> >> Not always, but sometimes seems the 'disable' does not work: >> >> [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable >> [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable >> >> >>> >>> set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); >>> } >>> @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) >>> { >>> if (ab_pci->ab->hw_params->supports_aspm && >>> test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) >>> - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, >>> - PCI_EXP_LNKCTL_ASPMC, >>> - ab_pci->link_ctl & >>> - PCI_EXP_LNKCTL_ASPMC); >>> + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); >> >> always, the 'enable' is not working: >> >> [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore >> [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore >> > > Interesting! I applied your diff and I never see this issue so far (across 10+ > reboots): I was not testing reboot. Here is what I am doing: step1: rmmod ath12k step2: force LinkCtrl using setpci (make sure it is 0x43, which seems more likely to see the issue) sudo setpci -s 02:00.0 0x80.B=0x43 step3: insmod ath12k and check linkctrl > > [ 3.758239] ath12k_pci_power_up 1475: link_ctl 0x42 > [ 3.758315] ath12k_pci_power_up 1480: link_ctl 0x40 > [ 4.383900] ath12k_pci_start 1180: link_ctl 0x40 > [ 4.384026] ath12k_pci_start 1185: link_ctl 0x42 > > Are you sure that you applied all the 6 patches in the series and not just the > ath patches? Because, the first 3 PCI core patches are required to make the API > work as intended. pretty sure all of them: $ git log --oneline 07387d1bc17f (HEAD -> VALIDATE-pci-enable-link-state-behavior) wifi: ath12k: dump linkctrl reg dbb3e5a7828b wifi: ath10k: Use pci_{enable/disable}_link_state() APIs to enable/disable ASPM states 392d7b3486b3 wifi: ath11k: Use pci_{enable/disable}_link_state() APIs to enable/disable ASPM states f2b0685c456d wifi: ath12k: Use pci_{enable/disable}_link_state() APIs to enable/disable ASPM states b1c8fad998f1 PCI/ASPM: Improve the kernel-doc for pci_disable_link_state*() APIs b8f5204ba4b0 PCI/ASPM: Transition the device to D0 (if required) inside pci_enable_link_state_locked() API 186b1bbd4c62 PCI/ASPM: Fix the behavior of pci_enable_link_state*() APIs 5a1ad8faaa16 (tag: ath-202507151704, origin/master, origin/main, origin/HEAD) Add localversion-wireless-testing-ath > >> >>> } >>> >>> static void ath12k_pci_cancel_workqueue(struct ath12k_base *ab) >>> >> >> In addition, frequently I can see below AER warnings: >> >> [ 280.383143] aer_ratelimit: 30 callbacks suppressed >> [ 280.383151] pcieport 0000:00:1c.0: AER: Correctable error message received from >> 0000:00:1c.0 >> [ 280.383177] pcieport 0000:00:1c.0: PCIe Bus Error: severity=Correctable, type=Data Link >> Layer, (Transmitter ID) >> [ 280.383184] pcieport 0000:00:1c.0: device [8086:7ab8] error status/mask=00001000/00002000 >> [ 280.383193] pcieport 0000:00:1c.0: [12] Timeout >> > > I don't see any AER errors either. My WLAN chip is attached via a PCIe-to-M.2 adapter, maybe some hardware issue? However I never saw them until your changes applied. > > - Mani >
On Thu, Jul 17, 2025 at 06:46:12PM GMT, Baochen Qiang wrote: > > > On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: > > On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: > > > > [...] > > > >>> @@ -16,6 +16,8 @@ > >>> #include "mhi.h" > >>> #include "debug.h" > >>> > >>> +#include "../ath.h" > >>> + > >>> #define ATH12K_PCI_BAR_NUM 0 > >>> #define ATH12K_PCI_DMA_MASK 36 > >>> > >>> @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > >>> u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > >>> > >>> /* disable L0s and L1 */ > >>> - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > >>> - PCI_EXP_LNKCTL_ASPMC); > >>> + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > >> > >> Not always, but sometimes seems the 'disable' does not work: > >> > >> [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable > >> [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable > >> > >> > >>> > >>> set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > >>> } > >>> @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > >>> { > >>> if (ab_pci->ab->hw_params->supports_aspm && > >>> test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > >>> - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > >>> - PCI_EXP_LNKCTL_ASPMC, > >>> - ab_pci->link_ctl & > >>> - PCI_EXP_LNKCTL_ASPMC); > >>> + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > >> > >> always, the 'enable' is not working: > >> > >> [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore > >> [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore > >> > > > > Interesting! I applied your diff and I never see this issue so far (across 10+ > > reboots): > > I was not testing reboot. Here is what I am doing: > > step1: rmmod ath12k > step2: force LinkCtrl using setpci (make sure it is 0x43, which seems more likely to see > the issue) > > sudo setpci -s 02:00.0 0x80.B=0x43 > > step3: insmod ath12k and check linkctrl > So I did the same and got: [ 3283.363569] ath12k_pci_power_up 1475: link_ctl 0x43 [ 3283.363769] ath12k_pci_power_up 1480: link_ctl 0x40 [ 3284.007661] ath12k_pci_start 1180: link_ctl 0x40 [ 3284.007826] ath12k_pci_start 1185: link_ctl 0x42 My host machine is Qcom based Thinkpad T14s and it doesn't support L0s. So that's why the lnkctl value once enabled becomes 0x42. This is exactly the reason why the drivers should not muck around LNKCTL register manually. > > > > [ 3.758239] ath12k_pci_power_up 1475: link_ctl 0x42 > > [ 3.758315] ath12k_pci_power_up 1480: link_ctl 0x40 > > [ 4.383900] ath12k_pci_start 1180: link_ctl 0x40 > > [ 4.384026] ath12k_pci_start 1185: link_ctl 0x42 > > > > Are you sure that you applied all the 6 patches in the series and not just the > > ath patches? Because, the first 3 PCI core patches are required to make the API > > work as intended. > > pretty sure all of them: > > $ git log --oneline > 07387d1bc17f (HEAD -> VALIDATE-pci-enable-link-state-behavior) wifi: ath12k: dump linkctrl reg > dbb3e5a7828b wifi: ath10k: Use pci_{enable/disable}_link_state() APIs to enable/disable > ASPM states > 392d7b3486b3 wifi: ath11k: Use pci_{enable/disable}_link_state() APIs to enable/disable > ASPM states > f2b0685c456d wifi: ath12k: Use pci_{enable/disable}_link_state() APIs to enable/disable > ASPM states > b1c8fad998f1 PCI/ASPM: Improve the kernel-doc for pci_disable_link_state*() APIs > b8f5204ba4b0 PCI/ASPM: Transition the device to D0 (if required) inside > pci_enable_link_state_locked() API > 186b1bbd4c62 PCI/ASPM: Fix the behavior of pci_enable_link_state*() APIs > 5a1ad8faaa16 (tag: ath-202507151704, origin/master, origin/main, origin/HEAD) Add > localversion-wireless-testing-ath > Ok! > > > > >> > >>> } > >>> > >>> static void ath12k_pci_cancel_workqueue(struct ath12k_base *ab) > >>> > >> > >> In addition, frequently I can see below AER warnings: > >> > >> [ 280.383143] aer_ratelimit: 30 callbacks suppressed > >> [ 280.383151] pcieport 0000:00:1c.0: AER: Correctable error message received from > >> 0000:00:1c.0 > >> [ 280.383177] pcieport 0000:00:1c.0: PCIe Bus Error: severity=Correctable, type=Data Link > >> Layer, (Transmitter ID) > >> [ 280.383184] pcieport 0000:00:1c.0: device [8086:7ab8] error status/mask=00001000/00002000 > >> [ 280.383193] pcieport 0000:00:1c.0: [12] Timeout > >> > > > > I don't see any AER errors either. > > My WLAN chip is attached via a PCIe-to-M.2 adapter, maybe some hardware issue? However I > never saw them until your changes applied. > I don't think it should matter. I have an Intel NUC lying around with QCA6390 attached via M.2. Let me test this change on that and report back the result. - Mani -- மணிவண்ணன் சதாசிவம்
On 7/17/2025 7:29 PM, Manivannan Sadhasivam wrote: > On Thu, Jul 17, 2025 at 06:46:12PM GMT, Baochen Qiang wrote: >> >> >> On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: >>> On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: >>> >>> [...] >>> >>>>> @@ -16,6 +16,8 @@ >>>>> #include "mhi.h" >>>>> #include "debug.h" >>>>> >>>>> +#include "../ath.h" >>>>> + >>>>> #define ATH12K_PCI_BAR_NUM 0 >>>>> #define ATH12K_PCI_DMA_MASK 36 >>>>> >>>>> @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) >>>>> u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); >>>>> >>>>> /* disable L0s and L1 */ >>>>> - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, >>>>> - PCI_EXP_LNKCTL_ASPMC); >>>>> + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); >>>> >>>> Not always, but sometimes seems the 'disable' does not work: >>>> >>>> [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable >>>> [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable >>>> >>>> >>>>> >>>>> set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); >>>>> } >>>>> @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) >>>>> { >>>>> if (ab_pci->ab->hw_params->supports_aspm && >>>>> test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) >>>>> - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, >>>>> - PCI_EXP_LNKCTL_ASPMC, >>>>> - ab_pci->link_ctl & >>>>> - PCI_EXP_LNKCTL_ASPMC); >>>>> + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); >>>> >>>> always, the 'enable' is not working: >>>> >>>> [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore >>>> [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore >>>> >>> >>> Interesting! I applied your diff and I never see this issue so far (across 10+ >>> reboots): >> >> I was not testing reboot. Here is what I am doing: >> >> step1: rmmod ath12k >> step2: force LinkCtrl using setpci (make sure it is 0x43, which seems more likely to see >> the issue) >> >> sudo setpci -s 02:00.0 0x80.B=0x43 >> >> step3: insmod ath12k and check linkctrl >> > > So I did the same and got: > > [ 3283.363569] ath12k_pci_power_up 1475: link_ctl 0x43 > [ 3283.363769] ath12k_pci_power_up 1480: link_ctl 0x40 > [ 3284.007661] ath12k_pci_start 1180: link_ctl 0x40 > [ 3284.007826] ath12k_pci_start 1185: link_ctl 0x42 > > My host machine is Qcom based Thinkpad T14s and it doesn't support L0s. So > that's why the lnkctl value once enabled becomes 0x42. This is exactly the > reason why the drivers should not muck around LNKCTL register manually. Thanks, then the 0x43 -> 0x40 -> 0x40 -> 0x42 sequence should not be a concern. But still the random 0x43 -> 0x43 -> 0x43 -> 0x42 sequence seems problematic. How many iterations have you done with above steps? From my side it seems random so better to do some stress test. > >>> >>> [ 3.758239] ath12k_pci_power_up 1475: link_ctl 0x42 >>> [ 3.758315] ath12k_pci_power_up 1480: link_ctl 0x40 >>> [ 4.383900] ath12k_pci_start 1180: link_ctl 0x40 >>> [ 4.384026] ath12k_pci_start 1185: link_ctl 0x42 >>> >>> Are you sure that you applied all the 6 patches in the series and not just the >>> ath patches? Because, the first 3 PCI core patches are required to make the API >>> work as intended. >> >> pretty sure all of them: >> >> $ git log --oneline >> 07387d1bc17f (HEAD -> VALIDATE-pci-enable-link-state-behavior) wifi: ath12k: dump linkctrl reg >> dbb3e5a7828b wifi: ath10k: Use pci_{enable/disable}_link_state() APIs to enable/disable >> ASPM states >> 392d7b3486b3 wifi: ath11k: Use pci_{enable/disable}_link_state() APIs to enable/disable >> ASPM states >> f2b0685c456d wifi: ath12k: Use pci_{enable/disable}_link_state() APIs to enable/disable >> ASPM states >> b1c8fad998f1 PCI/ASPM: Improve the kernel-doc for pci_disable_link_state*() APIs >> b8f5204ba4b0 PCI/ASPM: Transition the device to D0 (if required) inside >> pci_enable_link_state_locked() API >> 186b1bbd4c62 PCI/ASPM: Fix the behavior of pci_enable_link_state*() APIs >> 5a1ad8faaa16 (tag: ath-202507151704, origin/master, origin/main, origin/HEAD) Add >> localversion-wireless-testing-ath >> > > Ok! > >> >>> >>>> >>>>> } >>>>> >>>>> static void ath12k_pci_cancel_workqueue(struct ath12k_base *ab) >>>>> >>>> >>>> In addition, frequently I can see below AER warnings: >>>> >>>> [ 280.383143] aer_ratelimit: 30 callbacks suppressed >>>> [ 280.383151] pcieport 0000:00:1c.0: AER: Correctable error message received from >>>> 0000:00:1c.0 >>>> [ 280.383177] pcieport 0000:00:1c.0: PCIe Bus Error: severity=Correctable, type=Data Link >>>> Layer, (Transmitter ID) >>>> [ 280.383184] pcieport 0000:00:1c.0: device [8086:7ab8] error status/mask=00001000/00002000 >>>> [ 280.383193] pcieport 0000:00:1c.0: [12] Timeout >>>> >>> >>> I don't see any AER errors either. >> >> My WLAN chip is attached via a PCIe-to-M.2 adapter, maybe some hardware issue? However I >> never saw them until your changes applied. >> > > I don't think it should matter. I have an Intel NUC lying around with QCA6390 > attached via M.2. Let me test this change on that and report back the result. > > - Mani >
On Fri, Jul 18, 2025 at 10:05:02AM GMT, Baochen Qiang wrote: > > > On 7/17/2025 7:29 PM, Manivannan Sadhasivam wrote: > > On Thu, Jul 17, 2025 at 06:46:12PM GMT, Baochen Qiang wrote: > >> > >> > >> On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: > >>> On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: > >>> > >>> [...] > >>> > >>>>> @@ -16,6 +16,8 @@ > >>>>> #include "mhi.h" > >>>>> #include "debug.h" > >>>>> > >>>>> +#include "../ath.h" > >>>>> + > >>>>> #define ATH12K_PCI_BAR_NUM 0 > >>>>> #define ATH12K_PCI_DMA_MASK 36 > >>>>> > >>>>> @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > >>>>> u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > >>>>> > >>>>> /* disable L0s and L1 */ > >>>>> - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > >>>>> - PCI_EXP_LNKCTL_ASPMC); > >>>>> + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > >>>> > >>>> Not always, but sometimes seems the 'disable' does not work: > >>>> > >>>> [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable > >>>> [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable > >>>> > >>>> > >>>>> > >>>>> set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > >>>>> } > >>>>> @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > >>>>> { > >>>>> if (ab_pci->ab->hw_params->supports_aspm && > >>>>> test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > >>>>> - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > >>>>> - PCI_EXP_LNKCTL_ASPMC, > >>>>> - ab_pci->link_ctl & > >>>>> - PCI_EXP_LNKCTL_ASPMC); > >>>>> + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > >>>> > >>>> always, the 'enable' is not working: > >>>> > >>>> [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore > >>>> [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore > >>>> > >>> > >>> Interesting! I applied your diff and I never see this issue so far (across 10+ > >>> reboots): > >> > >> I was not testing reboot. Here is what I am doing: > >> > >> step1: rmmod ath12k > >> step2: force LinkCtrl using setpci (make sure it is 0x43, which seems more likely to see > >> the issue) > >> > >> sudo setpci -s 02:00.0 0x80.B=0x43 > >> > >> step3: insmod ath12k and check linkctrl > >> > > > > So I did the same and got: > > > > [ 3283.363569] ath12k_pci_power_up 1475: link_ctl 0x43 > > [ 3283.363769] ath12k_pci_power_up 1480: link_ctl 0x40 > > [ 3284.007661] ath12k_pci_start 1180: link_ctl 0x40 > > [ 3284.007826] ath12k_pci_start 1185: link_ctl 0x42 > > > > My host machine is Qcom based Thinkpad T14s and it doesn't support L0s. So > > that's why the lnkctl value once enabled becomes 0x42. This is exactly the > > reason why the drivers should not muck around LNKCTL register manually. > > Thanks, then the 0x43 -> 0x40 -> 0x40 -> 0x42 sequence should not be a concern. But still > the random 0x43 -> 0x43 -> 0x43 -> 0x42 sequence seems problematic. > > How many iterations have you done with above steps? From my side it seems random so better > to do some stress test. > So I ran the modprobe for about 50 times on the Intel NUC that has QCA6390, but didn't spot the disparity. This is the script I used: for i in {1..50} ;do echo "Loop $i"; sudo setpci -s 01:00.0 0x80.B=0x43;\ sudo modprobe -r ath11k_pci; sleep 1; sudo modprobe ath11k_pci; sleep 1;done And I always got: [ 5862.388083] ath11k_pci_aspm_disable: 609 lnkctrl: 0x43 [ 5862.388124] ath11k_pci_aspm_disable: 614 lnkctrl: 0x40 [ 5862.876291] ath11k_pci_start: 880 lnkctrl: 0x40 [ 5862.876346] ath11k_pci_start: 886 lnkctrl: 0x42 Also no AER messages. TBH, I'm not sure how you were able to see the random issues with these APIs. That looks like a race, which is scary. I do not want to ignore your scenario, but would like to reproduce and get to the bottom of it. - Mani -- மணிவண்ணன் சதாசிவம்
On Fri, Jul 18, 2025 at 01:27:27PM GMT, Manivannan Sadhasivam wrote: > On Fri, Jul 18, 2025 at 10:05:02AM GMT, Baochen Qiang wrote: > > > > > > On 7/17/2025 7:29 PM, Manivannan Sadhasivam wrote: > > > On Thu, Jul 17, 2025 at 06:46:12PM GMT, Baochen Qiang wrote: > > >> > > >> > > >> On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: > > >>> On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: > > >>> > > >>> [...] > > >>> > > >>>>> @@ -16,6 +16,8 @@ > > >>>>> #include "mhi.h" > > >>>>> #include "debug.h" > > >>>>> > > >>>>> +#include "../ath.h" > > >>>>> + > > >>>>> #define ATH12K_PCI_BAR_NUM 0 > > >>>>> #define ATH12K_PCI_DMA_MASK 36 > > >>>>> > > >>>>> @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > > >>>>> u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > > >>>>> > > >>>>> /* disable L0s and L1 */ > > >>>>> - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > >>>>> - PCI_EXP_LNKCTL_ASPMC); > > >>>>> + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > > >>>> > > >>>> Not always, but sometimes seems the 'disable' does not work: > > >>>> > > >>>> [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable > > >>>> [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable > > >>>> > > >>>> > > >>>>> > > >>>>> set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > > >>>>> } > > >>>>> @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > > >>>>> { > > >>>>> if (ab_pci->ab->hw_params->supports_aspm && > > >>>>> test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > > >>>>> - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > >>>>> - PCI_EXP_LNKCTL_ASPMC, > > >>>>> - ab_pci->link_ctl & > > >>>>> - PCI_EXP_LNKCTL_ASPMC); > > >>>>> + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > > >>>> > > >>>> always, the 'enable' is not working: > > >>>> > > >>>> [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore > > >>>> [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore > > >>>> > > >>> > > >>> Interesting! I applied your diff and I never see this issue so far (across 10+ > > >>> reboots): > > >> > > >> I was not testing reboot. Here is what I am doing: > > >> > > >> step1: rmmod ath12k > > >> step2: force LinkCtrl using setpci (make sure it is 0x43, which seems more likely to see > > >> the issue) > > >> > > >> sudo setpci -s 02:00.0 0x80.B=0x43 > > >> > > >> step3: insmod ath12k and check linkctrl > > >> > > > > > > So I did the same and got: > > > > > > [ 3283.363569] ath12k_pci_power_up 1475: link_ctl 0x43 > > > [ 3283.363769] ath12k_pci_power_up 1480: link_ctl 0x40 > > > [ 3284.007661] ath12k_pci_start 1180: link_ctl 0x40 > > > [ 3284.007826] ath12k_pci_start 1185: link_ctl 0x42 > > > > > > My host machine is Qcom based Thinkpad T14s and it doesn't support L0s. So > > > that's why the lnkctl value once enabled becomes 0x42. This is exactly the > > > reason why the drivers should not muck around LNKCTL register manually. > > > > Thanks, then the 0x43 -> 0x40 -> 0x40 -> 0x42 sequence should not be a concern. But still > > the random 0x43 -> 0x43 -> 0x43 -> 0x42 sequence seems problematic. > > > > How many iterations have you done with above steps? From my side it seems random so better > > to do some stress test. > > > > So I ran the modprobe for about 50 times on the Intel NUC that has QCA6390, but > didn't spot the disparity. This is the script I used: > > for i in {1..50} ;do echo "Loop $i"; sudo setpci -s 01:00.0 0x80.B=0x43;\ > sudo modprobe -r ath11k_pci; sleep 1; sudo modprobe ath11k_pci; sleep 1;done > > And I always got: > > [ 5862.388083] ath11k_pci_aspm_disable: 609 lnkctrl: 0x43 > [ 5862.388124] ath11k_pci_aspm_disable: 614 lnkctrl: 0x40 > [ 5862.876291] ath11k_pci_start: 880 lnkctrl: 0x40 > [ 5862.876346] ath11k_pci_start: 886 lnkctrl: 0x42 > > Also no AER messages. TBH, I'm not sure how you were able to see the random > issues with these APIs. That looks like a race, which is scary. > > I do not want to ignore your scenario, but would like to reproduce and get to > the bottom of it. > I synced with Baochen internally and able to repro the issue. Ths issue is due to hand modifying the LNKCTL register from userspace. The PCI core maintains the ASPM state internally and uses it to change the state when the pci_{enable/disable}_link_state*() APIs are called. So if the userspace or a client driver modifies the LNKCTL register manually, it makes the PCI cached ASPM states invalid. So while this series fixes the driver from doing that, nothing prevents userspace from doing so using 'setpci' and other tools. Userspace should only use sysfs attributes to change the state and avoid modifying the PCI registers when the PCI core is controlling the device. So this is the reason behind the errantic behavior of the API and it is not due to the issue with the API or the PCI core. - Mani -- மணிவண்ணன் சதாசிவம்
On 7/18/2025 6:20 PM, Manivannan Sadhasivam wrote: > On Fri, Jul 18, 2025 at 01:27:27PM GMT, Manivannan Sadhasivam wrote: >> On Fri, Jul 18, 2025 at 10:05:02AM GMT, Baochen Qiang wrote: >>> >>> >>> On 7/17/2025 7:29 PM, Manivannan Sadhasivam wrote: >>>> On Thu, Jul 17, 2025 at 06:46:12PM GMT, Baochen Qiang wrote: >>>>> >>>>> >>>>> On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: >>>>>> On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: >>>>>> >>>>>> [...] >>>>>> >>>>>>>> @@ -16,6 +16,8 @@ >>>>>>>> #include "mhi.h" >>>>>>>> #include "debug.h" >>>>>>>> >>>>>>>> +#include "../ath.h" >>>>>>>> + >>>>>>>> #define ATH12K_PCI_BAR_NUM 0 >>>>>>>> #define ATH12K_PCI_DMA_MASK 36 >>>>>>>> >>>>>>>> @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) >>>>>>>> u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); >>>>>>>> >>>>>>>> /* disable L0s and L1 */ >>>>>>>> - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, >>>>>>>> - PCI_EXP_LNKCTL_ASPMC); >>>>>>>> + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); >>>>>>> >>>>>>> Not always, but sometimes seems the 'disable' does not work: >>>>>>> >>>>>>> [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable >>>>>>> [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable >>>>>>> >>>>>>> >>>>>>>> >>>>>>>> set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); >>>>>>>> } >>>>>>>> @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) >>>>>>>> { >>>>>>>> if (ab_pci->ab->hw_params->supports_aspm && >>>>>>>> test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) >>>>>>>> - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, >>>>>>>> - PCI_EXP_LNKCTL_ASPMC, >>>>>>>> - ab_pci->link_ctl & >>>>>>>> - PCI_EXP_LNKCTL_ASPMC); >>>>>>>> + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); >>>>>>> >>>>>>> always, the 'enable' is not working: >>>>>>> >>>>>>> [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore >>>>>>> [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore >>>>>>> >>>>>> >>>>>> Interesting! I applied your diff and I never see this issue so far (across 10+ >>>>>> reboots): >>>>> >>>>> I was not testing reboot. Here is what I am doing: >>>>> >>>>> step1: rmmod ath12k >>>>> step2: force LinkCtrl using setpci (make sure it is 0x43, which seems more likely to see >>>>> the issue) >>>>> >>>>> sudo setpci -s 02:00.0 0x80.B=0x43 >>>>> >>>>> step3: insmod ath12k and check linkctrl >>>>> >>>> >>>> So I did the same and got: >>>> >>>> [ 3283.363569] ath12k_pci_power_up 1475: link_ctl 0x43 >>>> [ 3283.363769] ath12k_pci_power_up 1480: link_ctl 0x40 >>>> [ 3284.007661] ath12k_pci_start 1180: link_ctl 0x40 >>>> [ 3284.007826] ath12k_pci_start 1185: link_ctl 0x42 >>>> >>>> My host machine is Qcom based Thinkpad T14s and it doesn't support L0s. So >>>> that's why the lnkctl value once enabled becomes 0x42. This is exactly the >>>> reason why the drivers should not muck around LNKCTL register manually. >>> >>> Thanks, then the 0x43 -> 0x40 -> 0x40 -> 0x42 sequence should not be a concern. But still >>> the random 0x43 -> 0x43 -> 0x43 -> 0x42 sequence seems problematic. >>> >>> How many iterations have you done with above steps? From my side it seems random so better >>> to do some stress test. >>> >> >> So I ran the modprobe for about 50 times on the Intel NUC that has QCA6390, but >> didn't spot the disparity. This is the script I used: >> >> for i in {1..50} ;do echo "Loop $i"; sudo setpci -s 01:00.0 0x80.B=0x43;\ >> sudo modprobe -r ath11k_pci; sleep 1; sudo modprobe ath11k_pci; sleep 1;done >> >> And I always got: >> >> [ 5862.388083] ath11k_pci_aspm_disable: 609 lnkctrl: 0x43 >> [ 5862.388124] ath11k_pci_aspm_disable: 614 lnkctrl: 0x40 >> [ 5862.876291] ath11k_pci_start: 880 lnkctrl: 0x40 >> [ 5862.876346] ath11k_pci_start: 886 lnkctrl: 0x42 >> >> Also no AER messages. TBH, I'm not sure how you were able to see the random >> issues with these APIs. That looks like a race, which is scary. >> >> I do not want to ignore your scenario, but would like to reproduce and get to >> the bottom of it. >> > > I synced with Baochen internally and able to repro the issue. Ths issue is due > to hand modifying the LNKCTL register from userspace. The PCI core maintains > the ASPM state internally and uses it to change the state when the > pci_{enable/disable}_link_state*() APIs are called. > > So if the userspace or a client driver modifies the LNKCTL register manually, it > makes the PCI cached ASPM states invalid. So while this series fixes the driver > from doing that, nothing prevents userspace from doing so using 'setpci' and > other tools. Userspace should only use sysfs attributes to change the state and > avoid modifying the PCI registers when the PCI core is controlling the device. > So this is the reason behind the errantic behavior of the API and it is not due > to the issue with the API or the PCI core. IMO we can not rely on userspace doing what or not doing what, or on how it is doing, right? So can we fix PCI core to avoid this? > > - Mani >
On Fri, Jul 18, 2025 at 07:05:03PM GMT, Baochen Qiang wrote: > > > On 7/18/2025 6:20 PM, Manivannan Sadhasivam wrote: > > On Fri, Jul 18, 2025 at 01:27:27PM GMT, Manivannan Sadhasivam wrote: > >> On Fri, Jul 18, 2025 at 10:05:02AM GMT, Baochen Qiang wrote: > >>> > >>> > >>> On 7/17/2025 7:29 PM, Manivannan Sadhasivam wrote: > >>>> On Thu, Jul 17, 2025 at 06:46:12PM GMT, Baochen Qiang wrote: > >>>>> > >>>>> > >>>>> On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: > >>>>>> On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: > >>>>>> > >>>>>> [...] > >>>>>> > >>>>>>>> @@ -16,6 +16,8 @@ > >>>>>>>> #include "mhi.h" > >>>>>>>> #include "debug.h" > >>>>>>>> > >>>>>>>> +#include "../ath.h" > >>>>>>>> + > >>>>>>>> #define ATH12K_PCI_BAR_NUM 0 > >>>>>>>> #define ATH12K_PCI_DMA_MASK 36 > >>>>>>>> > >>>>>>>> @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > >>>>>>>> u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > >>>>>>>> > >>>>>>>> /* disable L0s and L1 */ > >>>>>>>> - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > >>>>>>>> - PCI_EXP_LNKCTL_ASPMC); > >>>>>>>> + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > >>>>>>> > >>>>>>> Not always, but sometimes seems the 'disable' does not work: > >>>>>>> > >>>>>>> [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable > >>>>>>> [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable > >>>>>>> > >>>>>>> > >>>>>>>> > >>>>>>>> set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > >>>>>>>> } > >>>>>>>> @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > >>>>>>>> { > >>>>>>>> if (ab_pci->ab->hw_params->supports_aspm && > >>>>>>>> test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > >>>>>>>> - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > >>>>>>>> - PCI_EXP_LNKCTL_ASPMC, > >>>>>>>> - ab_pci->link_ctl & > >>>>>>>> - PCI_EXP_LNKCTL_ASPMC); > >>>>>>>> + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > >>>>>>> > >>>>>>> always, the 'enable' is not working: > >>>>>>> > >>>>>>> [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore > >>>>>>> [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore > >>>>>>> > >>>>>> > >>>>>> Interesting! I applied your diff and I never see this issue so far (across 10+ > >>>>>> reboots): > >>>>> > >>>>> I was not testing reboot. Here is what I am doing: > >>>>> > >>>>> step1: rmmod ath12k > >>>>> step2: force LinkCtrl using setpci (make sure it is 0x43, which seems more likely to see > >>>>> the issue) > >>>>> > >>>>> sudo setpci -s 02:00.0 0x80.B=0x43 > >>>>> > >>>>> step3: insmod ath12k and check linkctrl > >>>>> > >>>> > >>>> So I did the same and got: > >>>> > >>>> [ 3283.363569] ath12k_pci_power_up 1475: link_ctl 0x43 > >>>> [ 3283.363769] ath12k_pci_power_up 1480: link_ctl 0x40 > >>>> [ 3284.007661] ath12k_pci_start 1180: link_ctl 0x40 > >>>> [ 3284.007826] ath12k_pci_start 1185: link_ctl 0x42 > >>>> > >>>> My host machine is Qcom based Thinkpad T14s and it doesn't support L0s. So > >>>> that's why the lnkctl value once enabled becomes 0x42. This is exactly the > >>>> reason why the drivers should not muck around LNKCTL register manually. > >>> > >>> Thanks, then the 0x43 -> 0x40 -> 0x40 -> 0x42 sequence should not be a concern. But still > >>> the random 0x43 -> 0x43 -> 0x43 -> 0x42 sequence seems problematic. > >>> > >>> How many iterations have you done with above steps? From my side it seems random so better > >>> to do some stress test. > >>> > >> > >> So I ran the modprobe for about 50 times on the Intel NUC that has QCA6390, but > >> didn't spot the disparity. This is the script I used: > >> > >> for i in {1..50} ;do echo "Loop $i"; sudo setpci -s 01:00.0 0x80.B=0x43;\ > >> sudo modprobe -r ath11k_pci; sleep 1; sudo modprobe ath11k_pci; sleep 1;done > >> > >> And I always got: > >> > >> [ 5862.388083] ath11k_pci_aspm_disable: 609 lnkctrl: 0x43 > >> [ 5862.388124] ath11k_pci_aspm_disable: 614 lnkctrl: 0x40 > >> [ 5862.876291] ath11k_pci_start: 880 lnkctrl: 0x40 > >> [ 5862.876346] ath11k_pci_start: 886 lnkctrl: 0x42 > >> > >> Also no AER messages. TBH, I'm not sure how you were able to see the random > >> issues with these APIs. That looks like a race, which is scary. > >> > >> I do not want to ignore your scenario, but would like to reproduce and get to > >> the bottom of it. > >> > > > > I synced with Baochen internally and able to repro the issue. Ths issue is due > > to hand modifying the LNKCTL register from userspace. The PCI core maintains > > the ASPM state internally and uses it to change the state when the > > pci_{enable/disable}_link_state*() APIs are called. > > > > So if the userspace or a client driver modifies the LNKCTL register manually, it > > makes the PCI cached ASPM states invalid. So while this series fixes the driver > > from doing that, nothing prevents userspace from doing so using 'setpci' and > > other tools. Userspace should only use sysfs attributes to change the state and > > avoid modifying the PCI registers when the PCI core is controlling the device. > > So this is the reason behind the errantic behavior of the API and it is not due > > to the issue with the API or the PCI core. > > IMO we can not rely on userspace doing what or not doing what, or on how it is doing, > right? So can we fix PCI core to avoid this? > I'm not sure it is possible to *fix* the PCI core here. Since the PCI core gives userspace access to the entire config space of the device, the userspace reads/writes to any of the registers it want. So unless the config space access if forbidden if a driver is bound to the device, it is inevitable. And then there is also /dev/mem... Interestingly, there is an API available for this purpose: pci_request_config_region_exclusive(), but it is used only by the AMD arch driver to prevent userspace from writing to the entire config space of the device. Maybe it makes sense to use something like this to prevent the userspace access to the entire config space if the driver is bind to the device. Bjorn, thoughts? - Mani -- மணிவண்ணன் சதாசிவம்
On Fri, Jul 18, 2025 at 05:19:28PM +0530, Manivannan Sadhasivam wrote: > On Fri, Jul 18, 2025 at 07:05:03PM GMT, Baochen Qiang wrote: > > On 7/18/2025 6:20 PM, Manivannan Sadhasivam wrote: > > > On Fri, Jul 18, 2025 at 01:27:27PM GMT, Manivannan Sadhasivam wrote: > > >> On Fri, Jul 18, 2025 at 10:05:02AM GMT, Baochen Qiang wrote: > > >>> On 7/17/2025 7:29 PM, Manivannan Sadhasivam wrote: > > >>>> On Thu, Jul 17, 2025 at 06:46:12PM GMT, Baochen Qiang wrote: > > >>>>> On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: > > >>>>>> On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: > > >>>>>> > > >>>>>> [...] > > >>>>>> > > >>>>>>>> @@ -16,6 +16,8 @@ > > >>>>>>>> #include "mhi.h" > > >>>>>>>> #include "debug.h" > > >>>>>>>> > > >>>>>>>> +#include "../ath.h" > > >>>>>>>> + > > >>>>>>>> #define ATH12K_PCI_BAR_NUM 0 > > >>>>>>>> #define ATH12K_PCI_DMA_MASK 36 > > >>>>>>>> > > >>>>>>>> @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > > >>>>>>>> u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > > >>>>>>>> > > >>>>>>>> /* disable L0s and L1 */ > > >>>>>>>> - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > >>>>>>>> - PCI_EXP_LNKCTL_ASPMC); > > >>>>>>>> + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > > >>>>>>> > > >>>>>>> Not always, but sometimes seems the 'disable' does not work: > > >>>>>>> > > >>>>>>> [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable > > >>>>>>> [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable > > >>>>>>> > > >>>>>>> > > >>>>>>>> > > >>>>>>>> set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > > >>>>>>>> } > > >>>>>>>> @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > > >>>>>>>> { > > >>>>>>>> if (ab_pci->ab->hw_params->supports_aspm && > > >>>>>>>> test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > > >>>>>>>> - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > >>>>>>>> - PCI_EXP_LNKCTL_ASPMC, > > >>>>>>>> - ab_pci->link_ctl & > > >>>>>>>> - PCI_EXP_LNKCTL_ASPMC); > > >>>>>>>> + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > > >>>>>>> > > >>>>>>> always, the 'enable' is not working: > > >>>>>>> > > >>>>>>> [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore > > >>>>>>> [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore > > >>>>>>> > > >>>>>> > > >>>>>> Interesting! I applied your diff and I never see this issue so far (across 10+ > > >>>>>> reboots): > > >>>>> > > >>>>> I was not testing reboot. Here is what I am doing: > > >>>>> > > >>>>> step1: rmmod ath12k > > >>>>> step2: force LinkCtrl using setpci (make sure it is 0x43, which seems more likely to see > > >>>>> the issue) > > >>>>> > > >>>>> sudo setpci -s 02:00.0 0x80.B=0x43 > > >>>>> > > >>>>> step3: insmod ath12k and check linkctrl > > >>>>> > > >>>> > > >>>> So I did the same and got: > > >>>> > > >>>> [ 3283.363569] ath12k_pci_power_up 1475: link_ctl 0x43 > > >>>> [ 3283.363769] ath12k_pci_power_up 1480: link_ctl 0x40 > > >>>> [ 3284.007661] ath12k_pci_start 1180: link_ctl 0x40 > > >>>> [ 3284.007826] ath12k_pci_start 1185: link_ctl 0x42 > > >>>> > > >>>> My host machine is Qcom based Thinkpad T14s and it doesn't > > >>>> support L0s. So that's why the lnkctl value once enabled > > >>>> becomes 0x42. This is exactly the reason why the drivers > > >>>> should not muck around LNKCTL register manually. > > >>> > > >>> Thanks, then the 0x43 -> 0x40 -> 0x40 -> 0x42 sequence should > > >>> not be a concern. But still the random 0x43 -> 0x43 -> 0x43 -> > > >>> 0x42 sequence seems problematic. > > >>> > > >>> How many iterations have you done with above steps? From my > > >>> side it seems random so better to do some stress test. > > >>> > > >> > > >> So I ran the modprobe for about 50 times on the Intel NUC that > > >> has QCA6390, but didn't spot the disparity. This is the script > > >> I used: > > >> > > >> for i in {1..50} ;do echo "Loop $i"; sudo setpci -s 01:00.0 0x80.B=0x43;\ > > >> sudo modprobe -r ath11k_pci; sleep 1; sudo modprobe ath11k_pci; sleep 1;done > > >> > > >> And I always got: > > >> > > >> [ 5862.388083] ath11k_pci_aspm_disable: 609 lnkctrl: 0x43 > > >> [ 5862.388124] ath11k_pci_aspm_disable: 614 lnkctrl: 0x40 > > >> [ 5862.876291] ath11k_pci_start: 880 lnkctrl: 0x40 > > >> [ 5862.876346] ath11k_pci_start: 886 lnkctrl: 0x42 > > >> > > >> Also no AER messages. TBH, I'm not sure how you were able to > > >> see the random issues with these APIs. That looks like a race, > > >> which is scary. > > >> > > >> I do not want to ignore your scenario, but would like to > > >> reproduce and get to the bottom of it. > > > > > > I synced with Baochen internally and able to repro the issue. > > > Ths issue is due to hand modifying the LNKCTL register from > > > userspace. The PCI core maintains the ASPM state internally and > > > uses it to change the state when the > > > pci_{enable/disable}_link_state*() APIs are called. > > > > > > So if the userspace or a client driver modifies the LNKCTL > > > register manually, it makes the PCI cached ASPM states invalid. > > > So while this series fixes the driver from doing that, nothing > > > prevents userspace from doing so using 'setpci' and other tools. > > > Userspace should only use sysfs attributes to change the state > > > and avoid modifying the PCI registers when the PCI core is > > > controlling the device. So this is the reason behind the > > > errantic behavior of the API and it is not due to the issue with > > > the API or the PCI core. > > > > IMO we can not rely on userspace doing what or not doing what, or > > on how it is doing, right? So can we fix PCI core to avoid this? > > I'm not sure it is possible to *fix* the PCI core here. Since the > PCI core gives userspace access to the entire config space of the > device, the userspace reads/writes to any of the registers it want. > So unless the config space access if forbidden if a driver is bound > to the device, it is inevitable. And then there is also /dev/mem... > > Interestingly, there is an API available for this purpose: > pci_request_config_region_exclusive(), but it is used only by the > AMD arch driver to prevent userspace from writing to the entire > config space of the device. > > Maybe it makes sense to use something like this to prevent the > userspace access to the entire config space if the driver is bind to > the device. I'm not really a fan of pci_request_config_region_exclusive() because it's such a singleton thing. I don't like to be one of only a few users of an interface. Linux has a long tradition of allowing root users to shoot themselves in the foot, and setpci is very useful as a debugging tool. Maybe tainting the kernel for config writes from userspace, and possibly even a WARN_ONCE() at the time, would be a compromise. Bjorn
On Fri, Jul 18, 2025 at 11:26:00AM GMT, Bjorn Helgaas wrote: > On Fri, Jul 18, 2025 at 05:19:28PM +0530, Manivannan Sadhasivam wrote: > > On Fri, Jul 18, 2025 at 07:05:03PM GMT, Baochen Qiang wrote: > > > On 7/18/2025 6:20 PM, Manivannan Sadhasivam wrote: > > > > On Fri, Jul 18, 2025 at 01:27:27PM GMT, Manivannan Sadhasivam wrote: > > > >> On Fri, Jul 18, 2025 at 10:05:02AM GMT, Baochen Qiang wrote: > > > >>> On 7/17/2025 7:29 PM, Manivannan Sadhasivam wrote: > > > >>>> On Thu, Jul 17, 2025 at 06:46:12PM GMT, Baochen Qiang wrote: > > > >>>>> On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: > > > >>>>>> On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: > > > >>>>>> > > > >>>>>> [...] > > > >>>>>> > > > >>>>>>>> @@ -16,6 +16,8 @@ > > > >>>>>>>> #include "mhi.h" > > > >>>>>>>> #include "debug.h" > > > >>>>>>>> > > > >>>>>>>> +#include "../ath.h" > > > >>>>>>>> + > > > >>>>>>>> #define ATH12K_PCI_BAR_NUM 0 > > > >>>>>>>> #define ATH12K_PCI_DMA_MASK 36 > > > >>>>>>>> > > > >>>>>>>> @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > > > >>>>>>>> u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > > > >>>>>>>> > > > >>>>>>>> /* disable L0s and L1 */ > > > >>>>>>>> - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > > >>>>>>>> - PCI_EXP_LNKCTL_ASPMC); > > > >>>>>>>> + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > > > >>>>>>> > > > >>>>>>> Not always, but sometimes seems the 'disable' does not work: > > > >>>>>>> > > > >>>>>>> [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable > > > >>>>>>> [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable > > > >>>>>>> > > > >>>>>>> > > > >>>>>>>> > > > >>>>>>>> set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > > > >>>>>>>> } > > > >>>>>>>> @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > > > >>>>>>>> { > > > >>>>>>>> if (ab_pci->ab->hw_params->supports_aspm && > > > >>>>>>>> test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > > > >>>>>>>> - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > > >>>>>>>> - PCI_EXP_LNKCTL_ASPMC, > > > >>>>>>>> - ab_pci->link_ctl & > > > >>>>>>>> - PCI_EXP_LNKCTL_ASPMC); > > > >>>>>>>> + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > > > >>>>>>> > > > >>>>>>> always, the 'enable' is not working: > > > >>>>>>> > > > >>>>>>> [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore > > > >>>>>>> [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore > > > >>>>>>> > > > >>>>>> > > > >>>>>> Interesting! I applied your diff and I never see this issue so far (across 10+ > > > >>>>>> reboots): > > > >>>>> > > > >>>>> I was not testing reboot. Here is what I am doing: > > > >>>>> > > > >>>>> step1: rmmod ath12k > > > >>>>> step2: force LinkCtrl using setpci (make sure it is 0x43, which seems more likely to see > > > >>>>> the issue) > > > >>>>> > > > >>>>> sudo setpci -s 02:00.0 0x80.B=0x43 > > > >>>>> > > > >>>>> step3: insmod ath12k and check linkctrl > > > >>>>> > > > >>>> > > > >>>> So I did the same and got: > > > >>>> > > > >>>> [ 3283.363569] ath12k_pci_power_up 1475: link_ctl 0x43 > > > >>>> [ 3283.363769] ath12k_pci_power_up 1480: link_ctl 0x40 > > > >>>> [ 3284.007661] ath12k_pci_start 1180: link_ctl 0x40 > > > >>>> [ 3284.007826] ath12k_pci_start 1185: link_ctl 0x42 > > > >>>> > > > >>>> My host machine is Qcom based Thinkpad T14s and it doesn't > > > >>>> support L0s. So that's why the lnkctl value once enabled > > > >>>> becomes 0x42. This is exactly the reason why the drivers > > > >>>> should not muck around LNKCTL register manually. > > > >>> > > > >>> Thanks, then the 0x43 -> 0x40 -> 0x40 -> 0x42 sequence should > > > >>> not be a concern. But still the random 0x43 -> 0x43 -> 0x43 -> > > > >>> 0x42 sequence seems problematic. > > > >>> > > > >>> How many iterations have you done with above steps? From my > > > >>> side it seems random so better to do some stress test. > > > >>> > > > >> > > > >> So I ran the modprobe for about 50 times on the Intel NUC that > > > >> has QCA6390, but didn't spot the disparity. This is the script > > > >> I used: > > > >> > > > >> for i in {1..50} ;do echo "Loop $i"; sudo setpci -s 01:00.0 0x80.B=0x43;\ > > > >> sudo modprobe -r ath11k_pci; sleep 1; sudo modprobe ath11k_pci; sleep 1;done > > > >> > > > >> And I always got: > > > >> > > > >> [ 5862.388083] ath11k_pci_aspm_disable: 609 lnkctrl: 0x43 > > > >> [ 5862.388124] ath11k_pci_aspm_disable: 614 lnkctrl: 0x40 > > > >> [ 5862.876291] ath11k_pci_start: 880 lnkctrl: 0x40 > > > >> [ 5862.876346] ath11k_pci_start: 886 lnkctrl: 0x42 > > > >> > > > >> Also no AER messages. TBH, I'm not sure how you were able to > > > >> see the random issues with these APIs. That looks like a race, > > > >> which is scary. > > > >> > > > >> I do not want to ignore your scenario, but would like to > > > >> reproduce and get to the bottom of it. > > > > > > > > I synced with Baochen internally and able to repro the issue. > > > > Ths issue is due to hand modifying the LNKCTL register from > > > > userspace. The PCI core maintains the ASPM state internally and > > > > uses it to change the state when the > > > > pci_{enable/disable}_link_state*() APIs are called. > > > > > > > > So if the userspace or a client driver modifies the LNKCTL > > > > register manually, it makes the PCI cached ASPM states invalid. > > > > So while this series fixes the driver from doing that, nothing > > > > prevents userspace from doing so using 'setpci' and other tools. > > > > Userspace should only use sysfs attributes to change the state > > > > and avoid modifying the PCI registers when the PCI core is > > > > controlling the device. So this is the reason behind the > > > > errantic behavior of the API and it is not due to the issue with > > > > the API or the PCI core. > > > > > > IMO we can not rely on userspace doing what or not doing what, or > > > on how it is doing, right? So can we fix PCI core to avoid this? > > > > I'm not sure it is possible to *fix* the PCI core here. Since the > > PCI core gives userspace access to the entire config space of the > > device, the userspace reads/writes to any of the registers it want. > > So unless the config space access if forbidden if a driver is bound > > to the device, it is inevitable. And then there is also /dev/mem... > > > > Interestingly, there is an API available for this purpose: > > pci_request_config_region_exclusive(), but it is used only by the > > AMD arch driver to prevent userspace from writing to the entire > > config space of the device. > > > > Maybe it makes sense to use something like this to prevent the > > userspace access to the entire config space if the driver is bind to > > the device. > > I'm not really a fan of pci_request_config_region_exclusive() because > it's such a singleton thing. I don't like to be one of only a few > users of an interface. > If the API is serving the purpose, I don't see why we cannot be 'one among the few users'. > Linux has a long tradition of allowing root users to shoot themselves > in the foot, and setpci is very useful as a debugging tool. Maybe > tainting the kernel for config writes from userspace, and possibly > even a WARN_ONCE() at the time, would be a compromise. > I really do not see a need to let the userspace modify the config space when a driver is bind to it. It fully makes sense when there is no driver attached to the device. But if there is one, then it just warrants trouble. If we want to be really cautious with userspace tooling, then we can introduce a kernel config option similar to CONFIG_IO_STRICT_DEVMEM and keep it disabled by default. - Mani -- மணிவண்ணன் சதாசிவம்
On 7/18/2025 1:27 PM, Manivannan Sadhasivam wrote: > On Fri, Jul 18, 2025 at 10:05:02AM GMT, Baochen Qiang wrote: >> >> >> On 7/17/2025 7:29 PM, Manivannan Sadhasivam wrote: >>> On Thu, Jul 17, 2025 at 06:46:12PM GMT, Baochen Qiang wrote: >>>> >>>> >>>> On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: >>>>> On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: >>>>> >>>>> [...] >>>>> >>>>>>> @@ -16,6 +16,8 @@ >>>>>>> #include "mhi.h" >>>>>>> #include "debug.h" >>>>>>> >>>>>>> +#include "../ath.h" >>>>>>> + >>>>>>> #define ATH12K_PCI_BAR_NUM 0 >>>>>>> #define ATH12K_PCI_DMA_MASK 36 >>>>>>> >>>>>>> @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) >>>>>>> u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); >>>>>>> >>>>>>> /* disable L0s and L1 */ >>>>>>> - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, >>>>>>> - PCI_EXP_LNKCTL_ASPMC); >>>>>>> + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); >>>>>> >>>>>> Not always, but sometimes seems the 'disable' does not work: >>>>>> >>>>>> [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable >>>>>> [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable >>>>>> >>>>>> >>>>>>> >>>>>>> set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); >>>>>>> } >>>>>>> @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) >>>>>>> { >>>>>>> if (ab_pci->ab->hw_params->supports_aspm && >>>>>>> test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) >>>>>>> - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, >>>>>>> - PCI_EXP_LNKCTL_ASPMC, >>>>>>> - ab_pci->link_ctl & >>>>>>> - PCI_EXP_LNKCTL_ASPMC); >>>>>>> + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); >>>>>> >>>>>> always, the 'enable' is not working: >>>>>> >>>>>> [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore >>>>>> [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore >>>>>> >>>>> >>>>> Interesting! I applied your diff and I never see this issue so far (across 10+ >>>>> reboots): >>>> >>>> I was not testing reboot. Here is what I am doing: >>>> >>>> step1: rmmod ath12k >>>> step2: force LinkCtrl using setpci (make sure it is 0x43, which seems more likely to see >>>> the issue) >>>> >>>> sudo setpci -s 02:00.0 0x80.B=0x43 >>>> >>>> step3: insmod ath12k and check linkctrl >>>> >>> >>> So I did the same and got: >>> >>> [ 3283.363569] ath12k_pci_power_up 1475: link_ctl 0x43 >>> [ 3283.363769] ath12k_pci_power_up 1480: link_ctl 0x40 >>> [ 3284.007661] ath12k_pci_start 1180: link_ctl 0x40 >>> [ 3284.007826] ath12k_pci_start 1185: link_ctl 0x42 >>> >>> My host machine is Qcom based Thinkpad T14s and it doesn't support L0s. So >>> that's why the lnkctl value once enabled becomes 0x42. This is exactly the >>> reason why the drivers should not muck around LNKCTL register manually. >> >> Thanks, then the 0x43 -> 0x40 -> 0x40 -> 0x42 sequence should not be a concern. But still >> the random 0x43 -> 0x43 -> 0x43 -> 0x42 sequence seems problematic. >> >> How many iterations have you done with above steps? From my side it seems random so better >> to do some stress test. >> > > So I ran the modprobe for about 50 times on the Intel NUC that has QCA6390, but > didn't spot the disparity. This is the script I used: > > for i in {1..50} ;do echo "Loop $i"; sudo setpci -s 01:00.0 0x80.B=0x43;\ > sudo modprobe -r ath11k_pci; sleep 1; sudo modprobe ath11k_pci; sleep 1;done > > And I always got: > > [ 5862.388083] ath11k_pci_aspm_disable: 609 lnkctrl: 0x43 > [ 5862.388124] ath11k_pci_aspm_disable: 614 lnkctrl: 0x40 > [ 5862.876291] ath11k_pci_start: 880 lnkctrl: 0x40 > [ 5862.876346] ath11k_pci_start: 886 lnkctrl: 0x42 > > Also no AER messages. TBH, I'm not sure how you were able to see the random > issues with these APIs. That looks like a race, which is scary. > How about using locked variants pci_disable_link_state_locked & pci_enable_link_state_locked give it a try? - Krishna Chaitanya > I do not want to ignore your scenario, but would like to reproduce and get to > the bottom of it. > > - Mani >
On Fri, Jul 18, 2025 at 01:33:46PM GMT, Krishna Chaitanya Chundru wrote: > > > On 7/18/2025 1:27 PM, Manivannan Sadhasivam wrote: > > On Fri, Jul 18, 2025 at 10:05:02AM GMT, Baochen Qiang wrote: > > > > > > > > > On 7/17/2025 7:29 PM, Manivannan Sadhasivam wrote: > > > > On Thu, Jul 17, 2025 at 06:46:12PM GMT, Baochen Qiang wrote: > > > > > > > > > > > > > > > On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: > > > > > > On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: > > > > > > > > > > > > [...] > > > > > > > > > > > > > > @@ -16,6 +16,8 @@ > > > > > > > > #include "mhi.h" > > > > > > > > #include "debug.h" > > > > > > > > +#include "../ath.h" > > > > > > > > + > > > > > > > > #define ATH12K_PCI_BAR_NUM 0 > > > > > > > > #define ATH12K_PCI_DMA_MASK 36 > > > > > > > > @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) > > > > > > > > u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); > > > > > > > > /* disable L0s and L1 */ > > > > > > > > - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > > > > > > > - PCI_EXP_LNKCTL_ASPMC); > > > > > > > > + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); > > > > > > > > > > > > > > Not always, but sometimes seems the 'disable' does not work: > > > > > > > > > > > > > > [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable > > > > > > > [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable > > > > > > > > > > > > > > > > > > > > > > set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); > > > > > > > > } > > > > > > > > @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) > > > > > > > > { > > > > > > > > if (ab_pci->ab->hw_params->supports_aspm && > > > > > > > > test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) > > > > > > > > - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, > > > > > > > > - PCI_EXP_LNKCTL_ASPMC, > > > > > > > > - ab_pci->link_ctl & > > > > > > > > - PCI_EXP_LNKCTL_ASPMC); > > > > > > > > + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); > > > > > > > > > > > > > > always, the 'enable' is not working: > > > > > > > > > > > > > > [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore > > > > > > > [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore > > > > > > > > > > > > > > > > > > > Interesting! I applied your diff and I never see this issue so far (across 10+ > > > > > > reboots): > > > > > > > > > > I was not testing reboot. Here is what I am doing: > > > > > > > > > > step1: rmmod ath12k > > > > > step2: force LinkCtrl using setpci (make sure it is 0x43, which seems more likely to see > > > > > the issue) > > > > > > > > > > sudo setpci -s 02:00.0 0x80.B=0x43 > > > > > > > > > > step3: insmod ath12k and check linkctrl > > > > > > > > > > > > > So I did the same and got: > > > > > > > > [ 3283.363569] ath12k_pci_power_up 1475: link_ctl 0x43 > > > > [ 3283.363769] ath12k_pci_power_up 1480: link_ctl 0x40 > > > > [ 3284.007661] ath12k_pci_start 1180: link_ctl 0x40 > > > > [ 3284.007826] ath12k_pci_start 1185: link_ctl 0x42 > > > > > > > > My host machine is Qcom based Thinkpad T14s and it doesn't support L0s. So > > > > that's why the lnkctl value once enabled becomes 0x42. This is exactly the > > > > reason why the drivers should not muck around LNKCTL register manually. > > > > > > Thanks, then the 0x43 -> 0x40 -> 0x40 -> 0x42 sequence should not be a concern. But still > > > the random 0x43 -> 0x43 -> 0x43 -> 0x42 sequence seems problematic. > > > > > > How many iterations have you done with above steps? From my side it seems random so better > > > to do some stress test. > > > > > > > So I ran the modprobe for about 50 times on the Intel NUC that has QCA6390, but > > didn't spot the disparity. This is the script I used: > > > > for i in {1..50} ;do echo "Loop $i"; sudo setpci -s 01:00.0 0x80.B=0x43;\ > > sudo modprobe -r ath11k_pci; sleep 1; sudo modprobe ath11k_pci; sleep 1;done > > > > And I always got: > > > > [ 5862.388083] ath11k_pci_aspm_disable: 609 lnkctrl: 0x43 > > [ 5862.388124] ath11k_pci_aspm_disable: 614 lnkctrl: 0x40 > > [ 5862.876291] ath11k_pci_start: 880 lnkctrl: 0x40 > > [ 5862.876346] ath11k_pci_start: 886 lnkctrl: 0x42 > > > > Also no AER messages. TBH, I'm not sure how you were able to see the random > > issues with these APIs. That looks like a race, which is scary. > > > How about using locked variants pci_disable_link_state_locked & > pci_enable_link_state_locked give it a try? > Locked variants should only be used when the caller is holding the pci_bus_sem lock, which in this case it is not. Unlike the name sounds, it doesn't provide any extra locking. - Mani -- மணிவண்ணன் சதாசிவம்
On 7/18/2025 1:42 PM, Manivannan Sadhasivam wrote: > On Fri, Jul 18, 2025 at 01:33:46PM GMT, Krishna Chaitanya Chundru wrote: >> >> >> On 7/18/2025 1:27 PM, Manivannan Sadhasivam wrote: >>> On Fri, Jul 18, 2025 at 10:05:02AM GMT, Baochen Qiang wrote: >>>> >>>> >>>> On 7/17/2025 7:29 PM, Manivannan Sadhasivam wrote: >>>>> On Thu, Jul 17, 2025 at 06:46:12PM GMT, Baochen Qiang wrote: >>>>>> >>>>>> >>>>>> On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: >>>>>>> On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: >>>>>>> >>>>>>> [...] >>>>>>> >>>>>>>>> @@ -16,6 +16,8 @@ >>>>>>>>> #include "mhi.h" >>>>>>>>> #include "debug.h" >>>>>>>>> +#include "../ath.h" >>>>>>>>> + >>>>>>>>> #define ATH12K_PCI_BAR_NUM 0 >>>>>>>>> #define ATH12K_PCI_DMA_MASK 36 >>>>>>>>> @@ -928,8 +930,7 @@ static void ath12k_pci_aspm_disable(struct ath12k_pci *ab_pci) >>>>>>>>> u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); >>>>>>>>> /* disable L0s and L1 */ >>>>>>>>> - pcie_capability_clear_word(ab_pci->pdev, PCI_EXP_LNKCTL, >>>>>>>>> - PCI_EXP_LNKCTL_ASPMC); >>>>>>>>> + pci_disable_link_state(ab_pci->pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); >>>>>>>> >>>>>>>> Not always, but sometimes seems the 'disable' does not work: >>>>>>>> >>>>>>>> [ 279.920507] ath12k_pci_power_up 1475: link_ctl 0x43 //before disable >>>>>>>> [ 279.920539] ath12k_pci_power_up 1482: link_ctl 0x43 //after disable >>>>>>>> >>>>>>>> >>>>>>>>> set_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags); >>>>>>>>> } >>>>>>>>> @@ -958,10 +959,7 @@ static void ath12k_pci_aspm_restore(struct ath12k_pci *ab_pci) >>>>>>>>> { >>>>>>>>> if (ab_pci->ab->hw_params->supports_aspm && >>>>>>>>> test_and_clear_bit(ATH12K_PCI_ASPM_RESTORE, &ab_pci->flags)) >>>>>>>>> - pcie_capability_clear_and_set_word(ab_pci->pdev, PCI_EXP_LNKCTL, >>>>>>>>> - PCI_EXP_LNKCTL_ASPMC, >>>>>>>>> - ab_pci->link_ctl & >>>>>>>>> - PCI_EXP_LNKCTL_ASPMC); >>>>>>>>> + pci_enable_link_state(ab_pci->pdev, ath_pci_aspm_state(ab_pci->link_ctl)); >>>>>>>> >>>>>>>> always, the 'enable' is not working: >>>>>>>> >>>>>>>> [ 280.561762] ath12k_pci_start 1180: link_ctl 0x43 //before restore >>>>>>>> [ 280.561809] ath12k_pci_start 1185: link_ctl 0x42 //after restore >>>>>>>> >>>>>>> >>>>>>> Interesting! I applied your diff and I never see this issue so far (across 10+ >>>>>>> reboots): >>>>>> >>>>>> I was not testing reboot. Here is what I am doing: >>>>>> >>>>>> step1: rmmod ath12k >>>>>> step2: force LinkCtrl using setpci (make sure it is 0x43, which seems more likely to see >>>>>> the issue) >>>>>> >>>>>> sudo setpci -s 02:00.0 0x80.B=0x43 >>>>>> >>>>>> step3: insmod ath12k and check linkctrl >>>>>> >>>>> >>>>> So I did the same and got: >>>>> >>>>> [ 3283.363569] ath12k_pci_power_up 1475: link_ctl 0x43 >>>>> [ 3283.363769] ath12k_pci_power_up 1480: link_ctl 0x40 >>>>> [ 3284.007661] ath12k_pci_start 1180: link_ctl 0x40 >>>>> [ 3284.007826] ath12k_pci_start 1185: link_ctl 0x42 >>>>> >>>>> My host machine is Qcom based Thinkpad T14s and it doesn't support L0s. So >>>>> that's why the lnkctl value once enabled becomes 0x42. This is exactly the >>>>> reason why the drivers should not muck around LNKCTL register manually. >>>> >>>> Thanks, then the 0x43 -> 0x40 -> 0x40 -> 0x42 sequence should not be a concern. But still >>>> the random 0x43 -> 0x43 -> 0x43 -> 0x42 sequence seems problematic. >>>> >>>> How many iterations have you done with above steps? From my side it seems random so better >>>> to do some stress test. >>>> >>> >>> So I ran the modprobe for about 50 times on the Intel NUC that has QCA6390, but >>> didn't spot the disparity. This is the script I used: >>> >>> for i in {1..50} ;do echo "Loop $i"; sudo setpci -s 01:00.0 0x80.B=0x43;\ >>> sudo modprobe -r ath11k_pci; sleep 1; sudo modprobe ath11k_pci; sleep 1;done >>> >>> And I always got: >>> >>> [ 5862.388083] ath11k_pci_aspm_disable: 609 lnkctrl: 0x43 >>> [ 5862.388124] ath11k_pci_aspm_disable: 614 lnkctrl: 0x40 >>> [ 5862.876291] ath11k_pci_start: 880 lnkctrl: 0x40 >>> [ 5862.876346] ath11k_pci_start: 886 lnkctrl: 0x42 >>> >>> Also no AER messages. TBH, I'm not sure how you were able to see the random >>> issues with these APIs. That looks like a race, which is scary. >>> >> How about using locked variants pci_disable_link_state_locked & >> pci_enable_link_state_locked give it a try? >> > > Locked variants should only be used when the caller is holding the pci_bus_sem > lock, which in this case it is not. Unlike the name sounds, it doesn't provide > any extra locking. > Got it. Thanks for the info. Qiang, Can you narrow down AER issue if it is coming always while enabling ASPM only. And can you share us lspci o/p of the endpoint and the port to which it is connected before and after. - Krishna Chaitanya. > - Mani >
On 7/17/25 12:46 PM, Baochen Qiang wrote: > > > On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: >> On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: >> [...] >>> In addition, frequently I can see below AER warnings: >>> >>> [ 280.383143] aer_ratelimit: 30 callbacks suppressed >>> [ 280.383151] pcieport 0000:00:1c.0: AER: Correctable error message received from >>> 0000:00:1c.0 >>> [ 280.383177] pcieport 0000:00:1c.0: PCIe Bus Error: severity=Correctable, type=Data Link >>> Layer, (Transmitter ID) >>> [ 280.383184] pcieport 0000:00:1c.0: device [8086:7ab8] error status/mask=00001000/00002000 >>> [ 280.383193] pcieport 0000:00:1c.0: [12] Timeout >>> >> >> I don't see any AER errors either. > > My WLAN chip is attached via a PCIe-to-M.2 adapter, maybe some hardware issue? However I > never saw them until your changes applied. It'd be useful to know whether that's a Qualcomm platform running an upstream-ish kernel, or some other host - we've had platform- specific issues in the past and the necessary margining/tuning presets were only introduced recently Konrad
On 7/17/2025 6:55 PM, Konrad Dybcio wrote: > On 7/17/25 12:46 PM, Baochen Qiang wrote: >> >> >> On 7/17/2025 6:31 PM, Manivannan Sadhasivam wrote: >>> On Thu, Jul 17, 2025 at 05:24:13PM GMT, Baochen Qiang wrote: >>> > > [...] > >>>> In addition, frequently I can see below AER warnings: >>>> >>>> [ 280.383143] aer_ratelimit: 30 callbacks suppressed >>>> [ 280.383151] pcieport 0000:00:1c.0: AER: Correctable error message received from >>>> 0000:00:1c.0 >>>> [ 280.383177] pcieport 0000:00:1c.0: PCIe Bus Error: severity=Correctable, type=Data Link >>>> Layer, (Transmitter ID) >>>> [ 280.383184] pcieport 0000:00:1c.0: device [8086:7ab8] error status/mask=00001000/00002000 >>>> [ 280.383193] pcieport 0000:00:1c.0: [12] Timeout >>>> >>> >>> I don't see any AER errors either. >> >> My WLAN chip is attached via a PCIe-to-M.2 adapter, maybe some hardware issue? However I >> never saw them until your changes applied. > > It'd be useful to know whether that's a Qualcomm platform running > an upstream-ish kernel, or some other host - we've had platform- > specific issues in the past and the necessary margining/tuning presets > were only introduced recently It is an Intel based desktop, so not a Qualcomm platform. But it is indeed an upstream kernel: https://git.kernel.org/pub/scm/linux/kernel/git/ath/ath.git > > Konrad
Hi Manivannan, kernel test robot noticed the following build errors: [auto build test ERROR on 19272b37aa4f83ca52bdf9c16d5d81bdd1354494] url: https://github.com/intel-lab-lkp/linux/commits/Manivannan-Sadhasivam-via-B4-Relay/PCI-ASPM-Fix-the-behavior-of-pci_enable_link_state-APIs/20250716-205857 base: 19272b37aa4f83ca52bdf9c16d5d81bdd1354494 patch link: https://lore.kernel.org/r/20250716-ath-aspm-fix-v1-4-dd3e62c1b692%40oss.qualcomm.com patch subject: [PATCH 4/6] wifi: ath12k: Use pci_{enable/disable}_link_state() APIs to enable/disable ASPM states config: m68k-allmodconfig (https://download.01.org/0day-ci/archive/20250717/202507171411.xOxUslAs-lkp@intel.com/config) compiler: m68k-linux-gcc (GCC) 15.1.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250717/202507171411.xOxUslAs-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202507171411.xOxUslAs-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from drivers/net/wireless/ath/main.c:22: drivers/net/wireless/ath/ath.h: In function 'ath_pci_aspm_state': >> drivers/net/wireless/ath/ath.h:346:26: error: 'PCIE_LINK_STATE_L0S' undeclared (first use in this function) 346 | state |= PCIE_LINK_STATE_L0S; | ^~~~~~~~~~~~~~~~~~~ drivers/net/wireless/ath/ath.h:346:26: note: each undeclared identifier is reported only once for each function it appears in >> drivers/net/wireless/ath/ath.h:348:26: error: 'PCIE_LINK_STATE_L1' undeclared (first use in this function) 348 | state |= PCIE_LINK_STATE_L1; | ^~~~~~~~~~~~~~~~~~ -- In file included from drivers/net/wireless/ath/ath9k/common.h:19, from drivers/net/wireless/ath/ath9k/ath9k.h:29, from drivers/net/wireless/ath/ath9k/beacon.c:18: drivers/net/wireless/ath/ath9k/../ath.h: In function 'ath_pci_aspm_state': >> drivers/net/wireless/ath/ath9k/../ath.h:346:26: error: 'PCIE_LINK_STATE_L0S' undeclared (first use in this function) 346 | state |= PCIE_LINK_STATE_L0S; | ^~~~~~~~~~~~~~~~~~~ drivers/net/wireless/ath/ath9k/../ath.h:346:26: note: each undeclared identifier is reported only once for each function it appears in >> drivers/net/wireless/ath/ath9k/../ath.h:348:26: error: 'PCIE_LINK_STATE_L1' undeclared (first use in this function) 348 | state |= PCIE_LINK_STATE_L1; | ^~~~~~~~~~~~~~~~~~ -- In file included from drivers/net/wireless/ath/carl9170/../regd.h:23, from drivers/net/wireless/ath/carl9170/carl9170.h:61, from drivers/net/wireless/ath/carl9170/main.c:47: drivers/net/wireless/ath/carl9170/../ath.h: In function 'ath_pci_aspm_state': >> drivers/net/wireless/ath/carl9170/../ath.h:346:26: error: 'PCIE_LINK_STATE_L0S' undeclared (first use in this function) 346 | state |= PCIE_LINK_STATE_L0S; | ^~~~~~~~~~~~~~~~~~~ drivers/net/wireless/ath/carl9170/../ath.h:346:26: note: each undeclared identifier is reported only once for each function it appears in >> drivers/net/wireless/ath/carl9170/../ath.h:348:26: error: 'PCIE_LINK_STATE_L1' undeclared (first use in this function) 348 | state |= PCIE_LINK_STATE_L1; | ^~~~~~~~~~~~~~~~~~ -- In file included from drivers/net/wireless/ath/ath6kl/../regd.h:23, from drivers/net/wireless/ath/ath6kl/wmi.c:24: drivers/net/wireless/ath/ath6kl/../ath.h: In function 'ath_pci_aspm_state': >> drivers/net/wireless/ath/ath6kl/../ath.h:346:26: error: 'PCIE_LINK_STATE_L0S' undeclared (first use in this function) 346 | state |= PCIE_LINK_STATE_L0S; | ^~~~~~~~~~~~~~~~~~~ drivers/net/wireless/ath/ath6kl/../ath.h:346:26: note: each undeclared identifier is reported only once for each function it appears in >> drivers/net/wireless/ath/ath6kl/../ath.h:348:26: error: 'PCIE_LINK_STATE_L1' undeclared (first use in this function) 348 | state |= PCIE_LINK_STATE_L1; | ^~~~~~~~~~~~~~~~~~ -- In file included from drivers/net/wireless/ath/ath10k/core.h:25, from drivers/net/wireless/ath/ath10k/mac.h:11, from drivers/net/wireless/ath/ath10k/mac.c:9: drivers/net/wireless/ath/ath10k/../ath.h: In function 'ath_pci_aspm_state': >> drivers/net/wireless/ath/ath10k/../ath.h:346:26: error: 'PCIE_LINK_STATE_L0S' undeclared (first use in this function) 346 | state |= PCIE_LINK_STATE_L0S; | ^~~~~~~~~~~~~~~~~~~ drivers/net/wireless/ath/ath10k/../ath.h:346:26: note: each undeclared identifier is reported only once for each function it appears in >> drivers/net/wireless/ath/ath10k/../ath.h:348:26: error: 'PCIE_LINK_STATE_L1' undeclared (first use in this function) 348 | state |= PCIE_LINK_STATE_L1; | ^~~~~~~~~~~~~~~~~~ vim +/PCIE_LINK_STATE_L0S +346 drivers/net/wireless/ath/ath.h 340 341 static inline int ath_pci_aspm_state(u16 lnkctl) 342 { 343 int state = 0; 344 345 if (lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) > 346 state |= PCIE_LINK_STATE_L0S; 347 if (lnkctl & PCI_EXP_LNKCTL_ASPM_L1) > 348 state |= PCIE_LINK_STATE_L1; 349 350 return state; 351 } 352 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Thu, Jul 17, 2025 at 02:59:26PM GMT, kernel test robot wrote: > Hi Manivannan, > > kernel test robot noticed the following build errors: > > [auto build test ERROR on 19272b37aa4f83ca52bdf9c16d5d81bdd1354494] > > url: https://github.com/intel-lab-lkp/linux/commits/Manivannan-Sadhasivam-via-B4-Relay/PCI-ASPM-Fix-the-behavior-of-pci_enable_link_state-APIs/20250716-205857 > base: 19272b37aa4f83ca52bdf9c16d5d81bdd1354494 > patch link: https://lore.kernel.org/r/20250716-ath-aspm-fix-v1-4-dd3e62c1b692%40oss.qualcomm.com > patch subject: [PATCH 4/6] wifi: ath12k: Use pci_{enable/disable}_link_state() APIs to enable/disable ASPM states > config: m68k-allmodconfig (https://download.01.org/0day-ci/archive/20250717/202507171411.xOxUslAs-lkp@intel.com/config) > compiler: m68k-linux-gcc (GCC) 15.1.0 > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250717/202507171411.xOxUslAs-lkp@intel.com/reproduce) > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <lkp@intel.com> > | Closes: https://lore.kernel.org/oe-kbuild-all/202507171411.xOxUslAs-lkp@intel.com/ > > All errors (new ones prefixed by >>): > > In file included from drivers/net/wireless/ath/main.c:22: > drivers/net/wireless/ath/ath.h: In function 'ath_pci_aspm_state': > >> drivers/net/wireless/ath/ath.h:346:26: error: 'PCIE_LINK_STATE_L0S' undeclared (first use in this function) > 346 | state |= PCIE_LINK_STATE_L0S; > | ^~~~~~~~~~~~~~~~~~~ Ok, this is an issue in the PCI header. The CONFIG_PCI symbol wraps the ASPM definitions also. In this config, CONFIG_PCI is unset, so it triggered the undeclared definition error. I will add a patch to move all definitions out of the CONFIG_PCI guard. It is supposed to wrap only the function definitions/declarations. - Mani > drivers/net/wireless/ath/ath.h:346:26: note: each undeclared identifier is reported only once for each function it appears in > >> drivers/net/wireless/ath/ath.h:348:26: error: 'PCIE_LINK_STATE_L1' undeclared (first use in this function) > 348 | state |= PCIE_LINK_STATE_L1; > | ^~~~~~~~~~~~~~~~~~ > -- > In file included from drivers/net/wireless/ath/ath9k/common.h:19, > from drivers/net/wireless/ath/ath9k/ath9k.h:29, > from drivers/net/wireless/ath/ath9k/beacon.c:18: > drivers/net/wireless/ath/ath9k/../ath.h: In function 'ath_pci_aspm_state': > >> drivers/net/wireless/ath/ath9k/../ath.h:346:26: error: 'PCIE_LINK_STATE_L0S' undeclared (first use in this function) > 346 | state |= PCIE_LINK_STATE_L0S; > | ^~~~~~~~~~~~~~~~~~~ > drivers/net/wireless/ath/ath9k/../ath.h:346:26: note: each undeclared identifier is reported only once for each function it appears in > >> drivers/net/wireless/ath/ath9k/../ath.h:348:26: error: 'PCIE_LINK_STATE_L1' undeclared (first use in this function) > 348 | state |= PCIE_LINK_STATE_L1; > | ^~~~~~~~~~~~~~~~~~ > -- > In file included from drivers/net/wireless/ath/carl9170/../regd.h:23, > from drivers/net/wireless/ath/carl9170/carl9170.h:61, > from drivers/net/wireless/ath/carl9170/main.c:47: > drivers/net/wireless/ath/carl9170/../ath.h: In function 'ath_pci_aspm_state': > >> drivers/net/wireless/ath/carl9170/../ath.h:346:26: error: 'PCIE_LINK_STATE_L0S' undeclared (first use in this function) > 346 | state |= PCIE_LINK_STATE_L0S; > | ^~~~~~~~~~~~~~~~~~~ > drivers/net/wireless/ath/carl9170/../ath.h:346:26: note: each undeclared identifier is reported only once for each function it appears in > >> drivers/net/wireless/ath/carl9170/../ath.h:348:26: error: 'PCIE_LINK_STATE_L1' undeclared (first use in this function) > 348 | state |= PCIE_LINK_STATE_L1; > | ^~~~~~~~~~~~~~~~~~ > -- > In file included from drivers/net/wireless/ath/ath6kl/../regd.h:23, > from drivers/net/wireless/ath/ath6kl/wmi.c:24: > drivers/net/wireless/ath/ath6kl/../ath.h: In function 'ath_pci_aspm_state': > >> drivers/net/wireless/ath/ath6kl/../ath.h:346:26: error: 'PCIE_LINK_STATE_L0S' undeclared (first use in this function) > 346 | state |= PCIE_LINK_STATE_L0S; > | ^~~~~~~~~~~~~~~~~~~ > drivers/net/wireless/ath/ath6kl/../ath.h:346:26: note: each undeclared identifier is reported only once for each function it appears in > >> drivers/net/wireless/ath/ath6kl/../ath.h:348:26: error: 'PCIE_LINK_STATE_L1' undeclared (first use in this function) > 348 | state |= PCIE_LINK_STATE_L1; > | ^~~~~~~~~~~~~~~~~~ > -- > In file included from drivers/net/wireless/ath/ath10k/core.h:25, > from drivers/net/wireless/ath/ath10k/mac.h:11, > from drivers/net/wireless/ath/ath10k/mac.c:9: > drivers/net/wireless/ath/ath10k/../ath.h: In function 'ath_pci_aspm_state': > >> drivers/net/wireless/ath/ath10k/../ath.h:346:26: error: 'PCIE_LINK_STATE_L0S' undeclared (first use in this function) > 346 | state |= PCIE_LINK_STATE_L0S; > | ^~~~~~~~~~~~~~~~~~~ > drivers/net/wireless/ath/ath10k/../ath.h:346:26: note: each undeclared identifier is reported only once for each function it appears in > >> drivers/net/wireless/ath/ath10k/../ath.h:348:26: error: 'PCIE_LINK_STATE_L1' undeclared (first use in this function) > 348 | state |= PCIE_LINK_STATE_L1; > | ^~~~~~~~~~~~~~~~~~ > > > vim +/PCIE_LINK_STATE_L0S +346 drivers/net/wireless/ath/ath.h > > 340 > 341 static inline int ath_pci_aspm_state(u16 lnkctl) > 342 { > 343 int state = 0; > 344 > 345 if (lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) > > 346 state |= PCIE_LINK_STATE_L0S; > 347 if (lnkctl & PCI_EXP_LNKCTL_ASPM_L1) > > 348 state |= PCIE_LINK_STATE_L1; > 349 > 350 return state; > 351 } > 352 > > -- > 0-DAY CI Kernel Test Service > https://github.com/intel/lkp-tests/wiki -- மணிவண்ணன் சதாசிவம்
© 2016 - 2025 Red Hat, Inc.