From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
To fix PCIe bridge resource allocation issues when powering PCIe
switches with the pwrctrl driver, introduce APIs to explicitly power
on and off all related devices simultaneously.
Previously, the individual pwrctrl drivers powered on/off the PCIe devices
autonomously, without any control from the controller drivers. But to
enforce ordering w.r.t powering on the devices, these APIs will power
on/off all the devices at the same time.
The pci_pwrctrl_power_on_devices() API recursively scans the PCI child
nodes, makes sure that pwrctrl drivers are bind to devices, and calls their
power_on() callbacks.
Similarly, pci_pwrctrl_power_off_devices() API powers off devices
recursively via their power_off() callbacks.
These APIs are expected to be called during the controller probe and
suspend/resume time to power on/off the devices. But before calling these
APIs, the pwrctrl devices should've been created beforehand using the
pci_pwrctrl_{create/destroy}_devices() APIs.
Co-developed-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
Signed-off-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
---
drivers/pci/pwrctrl/core.c | 121 ++++++++++++++++++++++++++++++++++++++++++++
include/linux/pci-pwrctrl.h | 4 ++
2 files changed, 125 insertions(+)
diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c
index 6eca54e0d540..e0a0cf015bd0 100644
--- a/drivers/pci/pwrctrl/core.c
+++ b/drivers/pci/pwrctrl/core.c
@@ -65,6 +65,7 @@ void pci_pwrctrl_init(struct pci_pwrctrl *pwrctrl, struct device *dev)
{
pwrctrl->dev = dev;
INIT_WORK(&pwrctrl->work, rescan_work_func);
+ dev_set_drvdata(dev, pwrctrl);
}
EXPORT_SYMBOL_GPL(pci_pwrctrl_init);
@@ -152,6 +153,126 @@ int devm_pci_pwrctrl_device_set_ready(struct device *dev,
}
EXPORT_SYMBOL_GPL(devm_pci_pwrctrl_device_set_ready);
+static int __pci_pwrctrl_power_on_device(struct device *dev)
+{
+ struct pci_pwrctrl *pwrctrl = dev_get_drvdata(dev);
+
+ if (!pwrctrl)
+ return 0;
+
+ return pwrctrl->power_on(pwrctrl);
+}
+
+/*
+ * Power on the devices in a depth first manner. Before powering on the device,
+ * make sure its driver is bound.
+ */
+static int pci_pwrctrl_power_on_device(struct device_node *np)
+{
+ struct platform_device *pdev;
+ int ret;
+
+ for_each_available_child_of_node_scoped(np, child) {
+ ret = pci_pwrctrl_power_on_device(child);
+ if (ret)
+ return ret;
+ }
+
+ pdev = of_find_device_by_node(np);
+ if (pdev) {
+ if (!device_is_bound(&pdev->dev)) {
+ dev_err(&pdev->dev, "driver is not bound\n");
+ ret = -EPROBE_DEFER;
+ } else {
+ ret = __pci_pwrctrl_power_on_device(&pdev->dev);
+ }
+ put_device(&pdev->dev);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * pci_pwrctrl_power_on_devices - Power on the pwrctrl devices
+ *
+ * @parent: Parent PCI device for which the pwrctrl devices need to be powered
+ * on.
+ *
+ * This function recursively traverses all pwrctrl devices for the child nodes
+ * of the specified PCI parent device, and powers them on in a depth first
+ * manner.
+ *
+ * Returns: 0 on success, negative error number on error.
+ */
+int pci_pwrctrl_power_on_devices(struct device *parent)
+{
+ struct device_node *np = parent->of_node;
+ int ret;
+
+ for_each_available_child_of_node_scoped(np, child) {
+ ret = pci_pwrctrl_power_on_device(child);
+ if (ret) {
+ pci_pwrctrl_power_off_devices(parent);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pci_pwrctrl_power_on_devices);
+
+static void __pci_pwrctrl_power_off_device(struct device *dev)
+{
+ struct pci_pwrctrl *pwrctrl = dev_get_drvdata(dev);
+
+ if (!pwrctrl)
+ return;
+
+ return pwrctrl->power_off(pwrctrl);
+}
+
+static int pci_pwrctrl_power_off_device(struct device_node *np)
+{
+ struct platform_device *pdev;
+
+ for_each_available_child_of_node_scoped(np, child)
+ pci_pwrctrl_power_off_device(child);
+
+ pdev = of_find_device_by_node(np);
+ if (pdev) {
+ if (device_is_bound(&pdev->dev))
+ __pci_pwrctrl_power_off_device(&pdev->dev);
+
+ put_device(&pdev->dev);
+ }
+
+ return 0;
+}
+
+/**
+ * pci_pwrctrl_power_off_devices - Power off the pwrctrl devices
+ *
+ * @parent: Parent PCI device for which the pwrctrl devices need to be powered
+ * off.
+ *
+ * This function recursively traverses all pwrctrl devices for the child nodes
+ * of the specified PCI parent device, and powers them off in a depth first
+ * manner.
+ *
+ * Returns: 0 on success, negative error number on error.
+ */
+void pci_pwrctrl_power_off_devices(struct device *parent)
+{
+ struct device_node *np = parent->of_node;
+
+ for_each_available_child_of_node_scoped(np, child)
+ pci_pwrctrl_power_off_device(child);
+}
+EXPORT_SYMBOL_GPL(pci_pwrctrl_power_off_devices);
+
static int pci_pwrctrl_create_device(struct device_node *np, struct device *parent)
{
struct platform_device *pdev;
diff --git a/include/linux/pci-pwrctrl.h b/include/linux/pci-pwrctrl.h
index 5590ffec0bea..1b77769eebbe 100644
--- a/include/linux/pci-pwrctrl.h
+++ b/include/linux/pci-pwrctrl.h
@@ -57,8 +57,12 @@ int devm_pci_pwrctrl_device_set_ready(struct device *dev,
#if IS_ENABLED(CONFIG_PCI_PWRCTRL)
int pci_pwrctrl_create_devices(struct device *parent);
void pci_pwrctrl_destroy_devices(struct device *parent);
+int pci_pwrctrl_power_on_devices(struct device *parent);
+void pci_pwrctrl_power_off_devices(struct device *parent);
#else
static inline int pci_pwrctrl_create_devices(struct device *parent) { return 0; }
static void pci_pwrctrl_destroy_devices(struct device *parent) { }
+static inline int pci_pwrctrl_power_on_devices(struct device *parent) { return 0; }
+static void pci_pwrctrl_power_off_devices(struct device *parent) { }
#endif
#endif /* __PCI_PWRCTRL_H__ */
--
2.48.1
On Mon, 24 Nov 2025 17:20:47 +0100, Manivannan Sadhasivam via B4 Relay
<devnull+manivannan.sadhasivam.oss.qualcomm.com@kernel.org> said:
> From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
>
> To fix PCIe bridge resource allocation issues when powering PCIe
> switches with the pwrctrl driver, introduce APIs to explicitly power
> on and off all related devices simultaneously.
>
> Previously, the individual pwrctrl drivers powered on/off the PCIe devices
> autonomously, without any control from the controller drivers. But to
> enforce ordering w.r.t powering on the devices, these APIs will power
> on/off all the devices at the same time.
>
> The pci_pwrctrl_power_on_devices() API recursively scans the PCI child
> nodes, makes sure that pwrctrl drivers are bind to devices, and calls their
> power_on() callbacks.
>
> Similarly, pci_pwrctrl_power_off_devices() API powers off devices
> recursively via their power_off() callbacks.
>
> These APIs are expected to be called during the controller probe and
> suspend/resume time to power on/off the devices. But before calling these
> APIs, the pwrctrl devices should've been created beforehand using the
> pci_pwrctrl_{create/destroy}_devices() APIs.
>
> Co-developed-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
> Signed-off-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
> Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
> ---
Makes sense.
Reviewed-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
On Mon, 24 Nov 2025 17:20:47 +0100, Manivannan Sadhasivam via B4 Relay
<devnull+manivannan.sadhasivam.oss.qualcomm.com@kernel.org> said:
> From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
>
> To fix PCIe bridge resource allocation issues when powering PCIe
> switches with the pwrctrl driver, introduce APIs to explicitly power
> on and off all related devices simultaneously.
>
> Previously, the individual pwrctrl drivers powered on/off the PCIe devices
> autonomously, without any control from the controller drivers. But to
> enforce ordering w.r.t powering on the devices, these APIs will power
> on/off all the devices at the same time.
>
> The pci_pwrctrl_power_on_devices() API recursively scans the PCI child
> nodes, makes sure that pwrctrl drivers are bind to devices, and calls their
> power_on() callbacks.
>
> Similarly, pci_pwrctrl_power_off_devices() API powers off devices
> recursively via their power_off() callbacks.
>
> These APIs are expected to be called during the controller probe and
> suspend/resume time to power on/off the devices. But before calling these
> APIs, the pwrctrl devices should've been created beforehand using the
> pci_pwrctrl_{create/destroy}_devices() APIs.
>
> Co-developed-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
> Signed-off-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
> Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
> ---
> drivers/pci/pwrctrl/core.c | 121 ++++++++++++++++++++++++++++++++++++++++++++
> include/linux/pci-pwrctrl.h | 4 ++
> 2 files changed, 125 insertions(+)
>
> diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c
> index 6eca54e0d540..e0a0cf015bd0 100644
> --- a/drivers/pci/pwrctrl/core.c
> +++ b/drivers/pci/pwrctrl/core.c
> @@ -65,6 +65,7 @@ void pci_pwrctrl_init(struct pci_pwrctrl *pwrctrl, struct device *dev)
> {
> pwrctrl->dev = dev;
> INIT_WORK(&pwrctrl->work, rescan_work_func);
> + dev_set_drvdata(dev, pwrctrl);
> }
> EXPORT_SYMBOL_GPL(pci_pwrctrl_init);
>
> @@ -152,6 +153,126 @@ int devm_pci_pwrctrl_device_set_ready(struct device *dev,
> }
> EXPORT_SYMBOL_GPL(devm_pci_pwrctrl_device_set_ready);
>
> +static int __pci_pwrctrl_power_on_device(struct device *dev)
Both this and __pci_pwrctrl_power_off_device() are only used once each. Does
it really make sense to split it out?
> +{
> + struct pci_pwrctrl *pwrctrl = dev_get_drvdata(dev);
> +
> + if (!pwrctrl)
> + return 0;
> +
> + return pwrctrl->power_on(pwrctrl);
> +}
> +
> +/*
> + * Power on the devices in a depth first manner. Before powering on the device,
> + * make sure its driver is bound.
> + */
> +static int pci_pwrctrl_power_on_device(struct device_node *np)
> +{
> + struct platform_device *pdev;
> + int ret;
> +
> + for_each_available_child_of_node_scoped(np, child) {
> + ret = pci_pwrctrl_power_on_device(child);
> + if (ret)
> + return ret;
> + }
> +
> + pdev = of_find_device_by_node(np);
> + if (pdev) {
> + if (!device_is_bound(&pdev->dev)) {
> + dev_err(&pdev->dev, "driver is not bound\n");
This is not an error though, is it? If there are multiple deferalls, we'll
spam the kernel log.
> + ret = -EPROBE_DEFER;
> + } else {
> + ret = __pci_pwrctrl_power_on_device(&pdev->dev);
> + }
> + put_device(&pdev->dev);
> +
> + if (ret)
> + return ret;
> + }
> +
> + return 0;
Bart
On Tue, Nov 25, 2025 at 05:34:04AM -0800, Bartosz Golaszewski wrote:
> On Mon, 24 Nov 2025 17:20:47 +0100, Manivannan Sadhasivam via B4 Relay
> <devnull+manivannan.sadhasivam.oss.qualcomm.com@kernel.org> said:
> > From: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
> >
> > To fix PCIe bridge resource allocation issues when powering PCIe
> > switches with the pwrctrl driver, introduce APIs to explicitly power
> > on and off all related devices simultaneously.
> >
> > Previously, the individual pwrctrl drivers powered on/off the PCIe devices
> > autonomously, without any control from the controller drivers. But to
> > enforce ordering w.r.t powering on the devices, these APIs will power
> > on/off all the devices at the same time.
> >
> > The pci_pwrctrl_power_on_devices() API recursively scans the PCI child
> > nodes, makes sure that pwrctrl drivers are bind to devices, and calls their
> > power_on() callbacks.
> >
> > Similarly, pci_pwrctrl_power_off_devices() API powers off devices
> > recursively via their power_off() callbacks.
> >
> > These APIs are expected to be called during the controller probe and
> > suspend/resume time to power on/off the devices. But before calling these
> > APIs, the pwrctrl devices should've been created beforehand using the
> > pci_pwrctrl_{create/destroy}_devices() APIs.
> >
> > Co-developed-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
> > Signed-off-by: Krishna Chaitanya Chundru <krishna.chundru@oss.qualcomm.com>
> > Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@oss.qualcomm.com>
> > ---
> > drivers/pci/pwrctrl/core.c | 121 ++++++++++++++++++++++++++++++++++++++++++++
> > include/linux/pci-pwrctrl.h | 4 ++
> > 2 files changed, 125 insertions(+)
> >
> > diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c
> > index 6eca54e0d540..e0a0cf015bd0 100644
> > --- a/drivers/pci/pwrctrl/core.c
> > +++ b/drivers/pci/pwrctrl/core.c
> > @@ -65,6 +65,7 @@ void pci_pwrctrl_init(struct pci_pwrctrl *pwrctrl, struct device *dev)
> > {
> > pwrctrl->dev = dev;
> > INIT_WORK(&pwrctrl->work, rescan_work_func);
> > + dev_set_drvdata(dev, pwrctrl);
> > }
> > EXPORT_SYMBOL_GPL(pci_pwrctrl_init);
> >
> > @@ -152,6 +153,126 @@ int devm_pci_pwrctrl_device_set_ready(struct device *dev,
> > }
> > EXPORT_SYMBOL_GPL(devm_pci_pwrctrl_device_set_ready);
> >
> > +static int __pci_pwrctrl_power_on_device(struct device *dev)
>
> Both this and __pci_pwrctrl_power_off_device() are only used once each. Does
> it really make sense to split it out?
>
I just find it neat to split it out. Otherwise, the else condition looks clumsy
in pci_pwrctrl_power_on_device().
> > +{
> > + struct pci_pwrctrl *pwrctrl = dev_get_drvdata(dev);
> > +
> > + if (!pwrctrl)
> > + return 0;
> > +
> > + return pwrctrl->power_on(pwrctrl);
> > +}
> > +
> > +/*
> > + * Power on the devices in a depth first manner. Before powering on the device,
> > + * make sure its driver is bound.
> > + */
> > +static int pci_pwrctrl_power_on_device(struct device_node *np)
> > +{
> > + struct platform_device *pdev;
> > + int ret;
> > +
> > + for_each_available_child_of_node_scoped(np, child) {
> > + ret = pci_pwrctrl_power_on_device(child);
> > + if (ret)
> > + return ret;
> > + }
> > +
> > + pdev = of_find_device_by_node(np);
> > + if (pdev) {
> > + if (!device_is_bound(&pdev->dev)) {
> > + dev_err(&pdev->dev, "driver is not bound\n");
>
> This is not an error though, is it? If there are multiple deferalls, we'll
> spam the kernel log.
>
Good question. Initially, I made it as a debug log, but then realized that
people may wonder why their controller driver encounters probe deferral without
much clue, especially when the driver spits out other logs before calling this
API. So decided to make it dev_err() to give a visual indication.
If it is not preferred, I can demote it to debug log.
- Mani
--
மணிவண்ணன் சதாசிவம்
On Tue, Nov 25, 2025 at 2:50 PM Manivannan Sadhasivam <mani@kernel.org> wrote: > > > > > This is not an error though, is it? If there are multiple deferalls, we'll > > spam the kernel log. > > > > Good question. Initially, I made it as a debug log, but then realized that > people may wonder why their controller driver encounters probe deferral without > much clue, especially when the driver spits out other logs before calling this > API. So decided to make it dev_err() to give a visual indication. > > If it is not preferred, I can demote it to debug log. If we must log it, I'd say a dev_dbg() is enough. Probe deferral is not unusual. It would be awesome to be able to synchronize the controller probe with the pwrctl device binding but I don't have an idea on how to do it yet. :( Bart
© 2016 - 2025 Red Hat, Inc.