drivers/pci/remove.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
of_find_device_by_node() doesn't like a NULL pointer, and may end up
identifying an arbitrary device, which we then start tearing down. We
should check for NULL first.
Resolves issues seen when doing `echo 1 > /sys/bus/pci/devices/.../remove`:
[ 222.952201] ------------[ cut here ]------------
[ 222.952218] WARNING: CPU: 0 PID: 5095 at drivers/regulator/core.c:5885 regulator_unregister+0x140/0x160
...
[ 222.953490] CPU: 0 UID: 0 PID: 5095 Comm: bash Tainted: G C 6.12.0-rc1 #3
...
[ 222.954134] Call trace:
[ 222.954150] regulator_unregister+0x140/0x160
[ 222.954186] devm_rdev_release+0x1c/0x30
[ 222.954215] release_nodes+0x68/0x100
[ 222.954249] devres_release_all+0x98/0xf8
[ 222.954282] device_unbind_cleanup+0x20/0x70
[ 222.954306] device_release_driver_internal+0x1f4/0x240
[ 222.954333] device_release_driver+0x20/0x40
[ 222.954358] bus_remove_device+0xd8/0x170
[ 222.954393] device_del+0x154/0x380
[ 222.954422] device_unregister+0x28/0x88
[ 222.954451] of_device_unregister+0x1c/0x30
[ 222.954488] pci_stop_bus_device+0x154/0x1b0
[ 222.954521] pci_stop_and_remove_bus_device_locked+0x28/0x48
[ 222.954553] remove_store+0xa0/0xb8
[ 222.954589] dev_attr_store+0x20/0x40
[ 222.954615] sysfs_kf_write+0x4c/0x68
[ 222.954644] kernfs_fop_write_iter+0x128/0x200
[ 222.954670] do_iter_readv_writev+0xdc/0x1e0
[ 222.954709] vfs_writev+0x100/0x2a0
[ 222.954742] do_writev+0x84/0x130
[ 222.954773] __arm64_sys_writev+0x28/0x40
[ 222.954808] invoke_syscall+0x50/0x120
[ 222.954845] el0_svc_common.constprop.0+0x48/0xf0
[ 222.954878] do_el0_svc+0x24/0x38
[ 222.954910] el0_svc+0x34/0xe0
[ 222.954945] el0t_64_sync_handler+0x120/0x138
[ 222.954978] el0t_64_sync+0x190/0x198
[ 222.955006] ---[ end trace 0000000000000000 ]---
[ 222.965216] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000c0
...
[ 223.107395] CPU: 3 UID: 0 PID: 5095 Comm: bash Tainted: G WC 6.12.0-rc1 #3
...
[ 223.227750] Call trace:
[ 223.230501] pci_stop_bus_device+0x190/0x1b0
[ 223.235314] pci_stop_and_remove_bus_device_locked+0x28/0x48
[ 223.241672] remove_store+0xa0/0xb8
[ 223.245616] dev_attr_store+0x20/0x40
[ 223.249737] sysfs_kf_write+0x4c/0x68
[ 223.253859] kernfs_fop_write_iter+0x128/0x200
[ 223.253887] do_iter_readv_writev+0xdc/0x1e0
[ 223.263631] vfs_writev+0x100/0x2a0
[ 223.267550] do_writev+0x84/0x130
[ 223.271273] __arm64_sys_writev+0x28/0x40
[ 223.275774] invoke_syscall+0x50/0x120
[ 223.279988] el0_svc_common.constprop.0+0x48/0xf0
[ 223.285270] do_el0_svc+0x24/0x38
[ 223.288993] el0_svc+0x34/0xe0
[ 223.292426] el0t_64_sync_handler+0x120/0x138
[ 223.297311] el0t_64_sync+0x190/0x198
[ 223.301423] Code: 17fffff8 91030000 d2800101 f9800011 (c85f7c02)
[ 223.308248] ---[ end trace 0000000000000000 ]---
Fixes: 681725afb6b9 ("PCI/pwrctl: Remove pwrctl device without iterating over all children of pwrctl parent")
Signed-off-by: Brian Norris <briannorris@chromium.org>
---
drivers/pci/remove.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 963b8d2855c1..efc37fcb73e2 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -19,14 +19,19 @@ static void pci_free_resources(struct pci_dev *dev)
static void pci_pwrctrl_unregister(struct device *dev)
{
+ struct device_node *np;
struct platform_device *pdev;
- pdev = of_find_device_by_node(dev_of_node(dev));
+ np = dev_of_node(dev);
+ if (!np)
+ return;
+
+ pdev = of_find_device_by_node(np);
if (!pdev)
return;
of_device_unregister(pdev);
- of_node_clear_flag(dev_of_node(dev), OF_POPULATED);
+ of_node_clear_flag(np, OF_POPULATED);
}
static void pci_stop_dev(struct pci_dev *dev)
--
2.47.0.338
[+to Mani; +cc Jon, Saurabh]
On Tue, Nov 26, 2024 at 01:04:34PM -0800, Brian Norris wrote:
> of_find_device_by_node() doesn't like a NULL pointer, and may end up
> identifying an arbitrary device, which we then start tearing down. We
> should check for NULL first.
>
> Resolves issues seen when doing `echo 1 > /sys/bus/pci/devices/.../remove`:
>
> [ 222.952201] ------------[ cut here ]------------
> [ 222.952218] WARNING: CPU: 0 PID: 5095 at drivers/regulator/core.c:5885 regulator_unregister+0x140/0x160
> ...
> [ 222.953490] CPU: 0 UID: 0 PID: 5095 Comm: bash Tainted: G C 6.12.0-rc1 #3
> ...
> [ 222.954134] Call trace:
> [ 222.954150] regulator_unregister+0x140/0x160
> [ 222.954186] devm_rdev_release+0x1c/0x30
> [ 222.954215] release_nodes+0x68/0x100
> [ 222.954249] devres_release_all+0x98/0xf8
> [ 222.954282] device_unbind_cleanup+0x20/0x70
> [ 222.954306] device_release_driver_internal+0x1f4/0x240
> [ 222.954333] device_release_driver+0x20/0x40
> [ 222.954358] bus_remove_device+0xd8/0x170
> [ 222.954393] device_del+0x154/0x380
> [ 222.954422] device_unregister+0x28/0x88
> [ 222.954451] of_device_unregister+0x1c/0x30
> [ 222.954488] pci_stop_bus_device+0x154/0x1b0
> [ 222.954521] pci_stop_and_remove_bus_device_locked+0x28/0x48
> [ 222.954553] remove_store+0xa0/0xb8
> [ 222.954589] dev_attr_store+0x20/0x40
> [ 222.954615] sysfs_kf_write+0x4c/0x68
> [ 222.954644] kernfs_fop_write_iter+0x128/0x200
> [ 222.954670] do_iter_readv_writev+0xdc/0x1e0
> [ 222.954709] vfs_writev+0x100/0x2a0
> [ 222.954742] do_writev+0x84/0x130
> [ 222.954773] __arm64_sys_writev+0x28/0x40
> [ 222.954808] invoke_syscall+0x50/0x120
> [ 222.954845] el0_svc_common.constprop.0+0x48/0xf0
> [ 222.954878] do_el0_svc+0x24/0x38
> [ 222.954910] el0_svc+0x34/0xe0
> [ 222.954945] el0t_64_sync_handler+0x120/0x138
> [ 222.954978] el0t_64_sync+0x190/0x198
> [ 222.955006] ---[ end trace 0000000000000000 ]---
> [ 222.965216] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000c0
> ...
> [ 223.107395] CPU: 3 UID: 0 PID: 5095 Comm: bash Tainted: G WC 6.12.0-rc1 #3
> ...
> [ 223.227750] Call trace:
> [ 223.230501] pci_stop_bus_device+0x190/0x1b0
> [ 223.235314] pci_stop_and_remove_bus_device_locked+0x28/0x48
> [ 223.241672] remove_store+0xa0/0xb8
> [ 223.245616] dev_attr_store+0x20/0x40
> [ 223.249737] sysfs_kf_write+0x4c/0x68
> [ 223.253859] kernfs_fop_write_iter+0x128/0x200
> [ 223.253887] do_iter_readv_writev+0xdc/0x1e0
> [ 223.263631] vfs_writev+0x100/0x2a0
> [ 223.267550] do_writev+0x84/0x130
> [ 223.271273] __arm64_sys_writev+0x28/0x40
> [ 223.275774] invoke_syscall+0x50/0x120
> [ 223.279988] el0_svc_common.constprop.0+0x48/0xf0
> [ 223.285270] do_el0_svc+0x24/0x38
> [ 223.288993] el0_svc+0x34/0xe0
> [ 223.292426] el0t_64_sync_handler+0x120/0x138
> [ 223.297311] el0t_64_sync+0x190/0x198
> [ 223.301423] Code: 17fffff8 91030000 d2800101 f9800011 (c85f7c02)
> [ 223.308248] ---[ end trace 0000000000000000 ]---
>
> Fixes: 681725afb6b9 ("PCI/pwrctl: Remove pwrctl device without iterating over all children of pwrctl parent")
> Signed-off-by: Brian Norris <briannorris@chromium.org>
Applied to pci/for-linus; hopefully this can still make v6.13-rc1.
I added a Reported-by for Saurabh and tried to add some context around
why we got in this fix in the first place, but I'm not confident that
I got it all right. Please comment:
commit 5c8418cf4025 ("PCI/pwrctrl: Unregister platform device only if one actually exists")
Author: Brian Norris <briannorris@chromium.org>
Date: Tue Nov 26 13:04:34 2024 -0800
PCI/pwrctrl: Unregister platform device only if one actually exists
If a PCI device has an associated device_node with power supplies,
pci_bus_add_device() creates platform devices for use by pwrctrl. When the
PCI device is removed, pci_stop_dev() uses of_find_device_by_node() to
locate the related platform device, then unregisters it.
But when we remove a PCI device with no associated device node,
dev_of_node(dev) is NULL, and of_find_device_by_node(NULL) returns the
first device with "dev->of_node == NULL". The result is that we (a)
mistakenly unregister a completely unrelated platform device, leading to
issues like the first trace below, and (b) dereference the NULL pointer
from dev_of_node() when clearing OF_POPULATED, as in the second trace.
Unregister a platform device only if there is one associated with this PCI
device. This resolves issues seen when doing:
# echo 1 > /sys/bus/pci/devices/.../remove
Sample issue from unregistering the wrong platform device:
WARNING: CPU: 0 PID: 5095 at drivers/regulator/core.c:5885 regulator_unregister+0x140/0x160
Call trace:
regulator_unregister+0x140/0x160
devm_rdev_release+0x1c/0x30
release_nodes+0x68/0x100
devres_release_all+0x98/0xf8
device_unbind_cleanup+0x20/0x70
device_release_driver_internal+0x1f4/0x240
device_release_driver+0x20/0x40
bus_remove_device+0xd8/0x170
device_del+0x154/0x380
device_unregister+0x28/0x88
of_device_unregister+0x1c/0x30
pci_stop_bus_device+0x154/0x1b0
pci_stop_and_remove_bus_device_locked+0x28/0x48
remove_store+0xa0/0xb8
dev_attr_store+0x20/0x40
sysfs_kf_write+0x4c/0x68
Later NULL pointer dereference for of_node_clear_flag(NULL, OF_POPULATED):
Unable to handle kernel NULL pointer dereference at virtual address 00000000000000c0
Call trace:
pci_stop_bus_device+0x190/0x1b0
pci_stop_and_remove_bus_device_locked+0x28/0x48
remove_store+0xa0/0xb8
dev_attr_store+0x20/0x40
sysfs_kf_write+0x4c/0x68
Link: https://lore.kernel.org/r/20241126210443.4052876-1-briannorris@chromium.org
Fixes: 681725afb6b9 ("PCI/pwrctl: Remove pwrctl device without iterating over all children of pwrctl parent")
Reported-by: Saurabh Sengar <ssengar@linux.microsoft.com>
Closes: https://lore.kernel.org/r/1732890621-19656-1-git-send-email-ssengar@linux.microsoft.com
Signed-off-by: Brian Norris <briannorris@chromium.org>
[bhelgaas: commit log]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
> ---
>
> drivers/pci/remove.c | 9 +++++++--
> 1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
> index 963b8d2855c1..efc37fcb73e2 100644
> --- a/drivers/pci/remove.c
> +++ b/drivers/pci/remove.c
> @@ -19,14 +19,19 @@ static void pci_free_resources(struct pci_dev *dev)
>
> static void pci_pwrctrl_unregister(struct device *dev)
> {
> + struct device_node *np;
> struct platform_device *pdev;
>
> - pdev = of_find_device_by_node(dev_of_node(dev));
> + np = dev_of_node(dev);
> + if (!np)
> + return;
> +
> + pdev = of_find_device_by_node(np);
> if (!pdev)
> return;
>
> of_device_unregister(pdev);
> - of_node_clear_flag(dev_of_node(dev), OF_POPULATED);
> + of_node_clear_flag(np, OF_POPULATED);
> }
>
> static void pci_stop_dev(struct pci_dev *dev)
> --
> 2.47.0.338
>
On Sat, Nov 30, 2024 at 12:23:19PM -0600, Bjorn Helgaas wrote:
> [+to Mani; +cc Jon, Saurabh]
>
> On Tue, Nov 26, 2024 at 01:04:34PM -0800, Brian Norris wrote:
> > of_find_device_by_node() doesn't like a NULL pointer, and may end up
> > identifying an arbitrary device, which we then start tearing down. We
> > should check for NULL first.
> >
> > Resolves issues seen when doing `echo 1 > /sys/bus/pci/devices/.../remove`:
> >
> > [ 222.952201] ------------[ cut here ]------------
> > [ 222.952218] WARNING: CPU: 0 PID: 5095 at drivers/regulator/core.c:5885 regulator_unregister+0x140/0x160
> > ...
> > [ 222.953490] CPU: 0 UID: 0 PID: 5095 Comm: bash Tainted: G C 6.12.0-rc1 #3
> > ...
> > [ 222.954134] Call trace:
> > [ 222.954150] regulator_unregister+0x140/0x160
> > [ 222.954186] devm_rdev_release+0x1c/0x30
> > [ 222.954215] release_nodes+0x68/0x100
> > [ 222.954249] devres_release_all+0x98/0xf8
> > [ 222.954282] device_unbind_cleanup+0x20/0x70
> > [ 222.954306] device_release_driver_internal+0x1f4/0x240
> > [ 222.954333] device_release_driver+0x20/0x40
> > [ 222.954358] bus_remove_device+0xd8/0x170
> > [ 222.954393] device_del+0x154/0x380
> > [ 222.954422] device_unregister+0x28/0x88
> > [ 222.954451] of_device_unregister+0x1c/0x30
> > [ 222.954488] pci_stop_bus_device+0x154/0x1b0
> > [ 222.954521] pci_stop_and_remove_bus_device_locked+0x28/0x48
> > [ 222.954553] remove_store+0xa0/0xb8
> > [ 222.954589] dev_attr_store+0x20/0x40
> > [ 222.954615] sysfs_kf_write+0x4c/0x68
> > [ 222.954644] kernfs_fop_write_iter+0x128/0x200
> > [ 222.954670] do_iter_readv_writev+0xdc/0x1e0
> > [ 222.954709] vfs_writev+0x100/0x2a0
> > [ 222.954742] do_writev+0x84/0x130
> > [ 222.954773] __arm64_sys_writev+0x28/0x40
> > [ 222.954808] invoke_syscall+0x50/0x120
> > [ 222.954845] el0_svc_common.constprop.0+0x48/0xf0
> > [ 222.954878] do_el0_svc+0x24/0x38
> > [ 222.954910] el0_svc+0x34/0xe0
> > [ 222.954945] el0t_64_sync_handler+0x120/0x138
> > [ 222.954978] el0t_64_sync+0x190/0x198
> > [ 222.955006] ---[ end trace 0000000000000000 ]---
> > [ 222.965216] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000c0
> > ...
> > [ 223.107395] CPU: 3 UID: 0 PID: 5095 Comm: bash Tainted: G WC 6.12.0-rc1 #3
> > ...
> > [ 223.227750] Call trace:
> > [ 223.230501] pci_stop_bus_device+0x190/0x1b0
> > [ 223.235314] pci_stop_and_remove_bus_device_locked+0x28/0x48
> > [ 223.241672] remove_store+0xa0/0xb8
> > [ 223.245616] dev_attr_store+0x20/0x40
> > [ 223.249737] sysfs_kf_write+0x4c/0x68
> > [ 223.253859] kernfs_fop_write_iter+0x128/0x200
> > [ 223.253887] do_iter_readv_writev+0xdc/0x1e0
> > [ 223.263631] vfs_writev+0x100/0x2a0
> > [ 223.267550] do_writev+0x84/0x130
> > [ 223.271273] __arm64_sys_writev+0x28/0x40
> > [ 223.275774] invoke_syscall+0x50/0x120
> > [ 223.279988] el0_svc_common.constprop.0+0x48/0xf0
> > [ 223.285270] do_el0_svc+0x24/0x38
> > [ 223.288993] el0_svc+0x34/0xe0
> > [ 223.292426] el0t_64_sync_handler+0x120/0x138
> > [ 223.297311] el0t_64_sync+0x190/0x198
> > [ 223.301423] Code: 17fffff8 91030000 d2800101 f9800011 (c85f7c02)
> > [ 223.308248] ---[ end trace 0000000000000000 ]---
> >
> > Fixes: 681725afb6b9 ("PCI/pwrctl: Remove pwrctl device without iterating over all children of pwrctl parent")
> > Signed-off-by: Brian Norris <briannorris@chromium.org>
>
> Applied to pci/for-linus; hopefully this can still make v6.13-rc1.
>
> I added a Reported-by for Saurabh and tried to add some context around
> why we got in this fix in the first place, but I'm not confident that
> I got it all right. Please comment:
>
> commit 5c8418cf4025 ("PCI/pwrctrl: Unregister platform device only if one actually exists")
> Author: Brian Norris <briannorris@chromium.org>
> Date: Tue Nov 26 13:04:34 2024 -0800
>
> PCI/pwrctrl: Unregister platform device only if one actually exists
>
> If a PCI device has an associated device_node with power supplies,
> pci_bus_add_device() creates platform devices for use by pwrctrl. When the
> PCI device is removed, pci_stop_dev() uses of_find_device_by_node() to
> locate the related platform device, then unregisters it.
>
> But when we remove a PCI device with no associated device node,
> dev_of_node(dev) is NULL, and of_find_device_by_node(NULL) returns the
> first device with "dev->of_node == NULL". The result is that we (a)
> mistakenly unregister a completely unrelated platform device, leading to
> issues like the first trace below, and (b) dereference the NULL pointer
> from dev_of_node() when clearing OF_POPULATED, as in the second trace.
>
> Unregister a platform device only if there is one associated with this PCI
> device. This resolves issues seen when doing:
>
> # echo 1 > /sys/bus/pci/devices/.../remove
>
> Sample issue from unregistering the wrong platform device:
>
> WARNING: CPU: 0 PID: 5095 at drivers/regulator/core.c:5885 regulator_unregister+0x140/0x160
> Call trace:
> regulator_unregister+0x140/0x160
> devm_rdev_release+0x1c/0x30
> release_nodes+0x68/0x100
> devres_release_all+0x98/0xf8
> device_unbind_cleanup+0x20/0x70
> device_release_driver_internal+0x1f4/0x240
> device_release_driver+0x20/0x40
> bus_remove_device+0xd8/0x170
> device_del+0x154/0x380
> device_unregister+0x28/0x88
> of_device_unregister+0x1c/0x30
> pci_stop_bus_device+0x154/0x1b0
> pci_stop_and_remove_bus_device_locked+0x28/0x48
> remove_store+0xa0/0xb8
> dev_attr_store+0x20/0x40
> sysfs_kf_write+0x4c/0x68
>
> Later NULL pointer dereference for of_node_clear_flag(NULL, OF_POPULATED):
>
> Unable to handle kernel NULL pointer dereference at virtual address 00000000000000c0
> Call trace:
> pci_stop_bus_device+0x190/0x1b0
> pci_stop_and_remove_bus_device_locked+0x28/0x48
> remove_store+0xa0/0xb8
> dev_attr_store+0x20/0x40
> sysfs_kf_write+0x4c/0x68
>
This looks good to me. Thanks for taking care!
- Mani
> Link: https://lore.kernel.org/r/20241126210443.4052876-1-briannorris@chromium.org
> Fixes: 681725afb6b9 ("PCI/pwrctl: Remove pwrctl device without iterating over all children of pwrctl parent")
> Reported-by: Saurabh Sengar <ssengar@linux.microsoft.com>
> Closes: https://lore.kernel.org/r/1732890621-19656-1-git-send-email-ssengar@linux.microsoft.com
> Signed-off-by: Brian Norris <briannorris@chromium.org>
> [bhelgaas: commit log]
> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
>
> > ---
> >
> > drivers/pci/remove.c | 9 +++++++--
> > 1 file changed, 7 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
> > index 963b8d2855c1..efc37fcb73e2 100644
> > --- a/drivers/pci/remove.c
> > +++ b/drivers/pci/remove.c
> > @@ -19,14 +19,19 @@ static void pci_free_resources(struct pci_dev *dev)
> >
> > static void pci_pwrctrl_unregister(struct device *dev)
> > {
> > + struct device_node *np;
> > struct platform_device *pdev;
> >
> > - pdev = of_find_device_by_node(dev_of_node(dev));
> > + np = dev_of_node(dev);
> > + if (!np)
> > + return;
> > +
> > + pdev = of_find_device_by_node(np);
> > if (!pdev)
> > return;
> >
> > of_device_unregister(pdev);
> > - of_node_clear_flag(dev_of_node(dev), OF_POPULATED);
> > + of_node_clear_flag(np, OF_POPULATED);
> > }
> >
> > static void pci_stop_dev(struct pci_dev *dev)
> > --
> > 2.47.0.338
> >
--
மணிவண்ணன் சதாசிவம்
[+cc Jon, Saurabh]
On Tue, Nov 26, 2024 at 01:04:34PM -0800, Brian Norris wrote:
> of_find_device_by_node() doesn't like a NULL pointer, and may end up
> identifying an arbitrary device, which we then start tearing down. We
> should check for NULL first.
I assume pci_pwrctrl_unregister() is unregistering something added by
pci_pwrctrl_create_devices() in pci_bus_add_device()?
There are a couple things I don't like about this code (these are
unrelated to this particular regression fix, but they make me worry
that I'm misunderstanding how this all works):
- pci_pwrctrl_create_devices() and pci_pwrctrl_unregister() are not
parallel names.
- pci_pwrctrl_create_devices() has a loop and potentially creates
several pwrctrl devices, one for each child that has a "*-supply"
property, but there is no corresponding loop in
pci_pwrctrl_unregister().
I see that 681725afb6b9 ("PCI/pwrctl: Remove pwrctl device without
iterating over all children of pwrctl parent") is at least partly
*about* removing without iterating, and the commit log has some
explanation about how pwrctrl devices lying around will get removed,
but the lack of parallelism is a hassle for readers.
The subject and commit log also refer to "*the* pwrctl device," which
suggests there is only one, but that seems slightly misleading even
though it goes on to mention "any pwrctl devices lying around."
If they get removed when the parent gets removed, why do we need to
remove *any* of them? Is the real point that we need to clear
OF_POPULATED?
Maybe I'm missing the whole point here?
> [ 222.965216] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000c0
> ...
> [ 223.107395] CPU: 3 UID: 0 PID: 5095 Comm: bash Tainted: G WC 6.12.0-rc1 #3
> ...
> [ 223.227750] Call trace:
> [ 223.230501] pci_stop_bus_device+0x190/0x1b0
> [ 223.235314] pci_stop_and_remove_bus_device_locked+0x28/0x48
> [ 223.241672] remove_store+0xa0/0xb8
> [ 223.245616] dev_attr_store+0x20/0x40
> [ 223.249737] sysfs_kf_write+0x4c/0x68
> [ 223.253859] kernfs_fop_write_iter+0x128/0x200
> [ 223.253887] do_iter_readv_writev+0xdc/0x1e0
> [ 223.263631] vfs_writev+0x100/0x2a0
> [ 223.267550] do_writev+0x84/0x130
> [ 223.271273] __arm64_sys_writev+0x28/0x40
> [ 223.275774] invoke_syscall+0x50/0x120
> [ 223.279988] el0_svc_common.constprop.0+0x48/0xf0
> [ 223.285270] do_el0_svc+0x24/0x38
> [ 223.288993] el0_svc+0x34/0xe0
> [ 223.292426] el0t_64_sync_handler+0x120/0x138
> [ 223.297311] el0t_64_sync+0x190/0x198
> [ 223.301423] Code: 17fffff8 91030000 d2800101 f9800011 (c85f7c02)
> [ 223.308248] ---[ end trace 0000000000000000 ]---
>
> Fixes: 681725afb6b9 ("PCI/pwrctl: Remove pwrctl device without iterating over all children of pwrctl parent")
> Signed-off-by: Brian Norris <briannorris@chromium.org>
> ---
>
> drivers/pci/remove.c | 9 +++++++--
> 1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
> index 963b8d2855c1..efc37fcb73e2 100644
> --- a/drivers/pci/remove.c
> +++ b/drivers/pci/remove.c
> @@ -19,14 +19,19 @@ static void pci_free_resources(struct pci_dev *dev)
>
> static void pci_pwrctrl_unregister(struct device *dev)
> {
> + struct device_node *np;
> struct platform_device *pdev;
>
> - pdev = of_find_device_by_node(dev_of_node(dev));
> + np = dev_of_node(dev);
> + if (!np)
> + return;
> +
> + pdev = of_find_device_by_node(np);
> if (!pdev)
> return;
>
> of_device_unregister(pdev);
> - of_node_clear_flag(dev_of_node(dev), OF_POPULATED);
> + of_node_clear_flag(np, OF_POPULATED);
> }
>
> static void pci_stop_dev(struct pci_dev *dev)
> --
> 2.47.0.338
>
On Fri, Nov 29, 2024 at 01:28:11PM -0600, Bjorn Helgaas wrote:
> [+cc Jon, Saurabh]
>
> On Tue, Nov 26, 2024 at 01:04:34PM -0800, Brian Norris wrote:
> > of_find_device_by_node() doesn't like a NULL pointer, and may end up
> > identifying an arbitrary device, which we then start tearing down. We
> > should check for NULL first.
>
> I assume pci_pwrctrl_unregister() is unregistering something added by
> pci_pwrctrl_create_devices() in pci_bus_add_device()?
>
> There are a couple things I don't like about this code (these are
> unrelated to this particular regression fix, but they make me worry
> that I'm misunderstanding how this all works):
>
The internals of PCI pwrctl is not documented clearly in a single place.
Let me try to clarify below.
> - pci_pwrctrl_create_devices() and pci_pwrctrl_unregister() are not
> parallel names.
>
> - pci_pwrctrl_create_devices() has a loop and potentially creates
> several pwrctrl devices, one for each child that has a "*-supply"
> property, but there is no corresponding loop in
> pci_pwrctrl_unregister().
So we create pwrctl devices starting from PCI bridges. This is because, in order
for the endpoints to be enumerated, the relevant pwrctl drivers need to be
probed first (i.e., pci_bus_add_device() will be called for the endpoints only
when they are detected on the bus, but that cannot happen until the relevant
pwrctl driver is probed). That's why we have the loop in
pci_pwrctrl_create_devices() to iterate over the children of PCI bridges defined
in devicetree.
When the pwrctl devices are created, they will be destroyed in
pci_pwrctrl_unregister(). Now we don't have loop here because, this function
is called from pci_stop_dev() which will be called for each added PCI devices.
So even when the bridge device gets removed, all of its child devices will be
removed first, so we don't need to iterate.
>
> I see that 681725afb6b9 ("PCI/pwrctl: Remove pwrctl device without
> iterating over all children of pwrctl parent") is at least partly
> *about* removing without iterating, and the commit log has some
> explanation about how pwrctrl devices lying around will get removed,
> but the lack of parallelism is a hassle for readers.
>
> The subject and commit log also refer to "*the* pwrctl device," which
> suggests there is only one, but that seems slightly misleading even
> though it goes on to mention "any pwrctl devices lying around."
>
With 'lying around', the commit meant that if the pwrctl device is not
associated with the PCI device (it can happen if the relevant PCI client driver
is not probed), then those dormant pwrctl devices will be removed when their
parent (PCI bridge) gets removed. Because for those PCI devices, pci_stop_dev()
won't be called.
> If they get removed when the parent gets removed, why do we need to
> remove *any* of them? Is the real point that we need to clear
> OF_POPULATED?
>
The idea of pci_pwrctrl_unregister() is to remove the pwrctl device when the
associated PCI device gets removed. When this happens, the pwrctl driver will
turn off the power to the corresponding PCI device (in other words, it reverses
what it did in probe()). That's the primary reason why we are removing the
pwrctl device when the PCI device gets removed.
Clearing the OF_POPULATED should also be done separately because of the
shortcomings of the of_platform_depopulate() API. See: commit f1536585588b
("PCI: Don't rely on of_platform_depopulate() for reused OF-nodes").
- Mani
> Maybe I'm missing the whole point here?
>
> > [ 222.965216] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000c0
> > ...
> > [ 223.107395] CPU: 3 UID: 0 PID: 5095 Comm: bash Tainted: G WC 6.12.0-rc1 #3
> > ...
> > [ 223.227750] Call trace:
> > [ 223.230501] pci_stop_bus_device+0x190/0x1b0
> > [ 223.235314] pci_stop_and_remove_bus_device_locked+0x28/0x48
> > [ 223.241672] remove_store+0xa0/0xb8
> > [ 223.245616] dev_attr_store+0x20/0x40
> > [ 223.249737] sysfs_kf_write+0x4c/0x68
> > [ 223.253859] kernfs_fop_write_iter+0x128/0x200
> > [ 223.253887] do_iter_readv_writev+0xdc/0x1e0
> > [ 223.263631] vfs_writev+0x100/0x2a0
> > [ 223.267550] do_writev+0x84/0x130
> > [ 223.271273] __arm64_sys_writev+0x28/0x40
> > [ 223.275774] invoke_syscall+0x50/0x120
> > [ 223.279988] el0_svc_common.constprop.0+0x48/0xf0
> > [ 223.285270] do_el0_svc+0x24/0x38
> > [ 223.288993] el0_svc+0x34/0xe0
> > [ 223.292426] el0t_64_sync_handler+0x120/0x138
> > [ 223.297311] el0t_64_sync+0x190/0x198
> > [ 223.301423] Code: 17fffff8 91030000 d2800101 f9800011 (c85f7c02)
> > [ 223.308248] ---[ end trace 0000000000000000 ]---
> >
> > Fixes: 681725afb6b9 ("PCI/pwrctl: Remove pwrctl device without iterating over all children of pwrctl parent")
> > Signed-off-by: Brian Norris <briannorris@chromium.org>
> > ---
> >
> > drivers/pci/remove.c | 9 +++++++--
> > 1 file changed, 7 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
> > index 963b8d2855c1..efc37fcb73e2 100644
> > --- a/drivers/pci/remove.c
> > +++ b/drivers/pci/remove.c
> > @@ -19,14 +19,19 @@ static void pci_free_resources(struct pci_dev *dev)
> >
> > static void pci_pwrctrl_unregister(struct device *dev)
> > {
> > + struct device_node *np;
> > struct platform_device *pdev;
> >
> > - pdev = of_find_device_by_node(dev_of_node(dev));
> > + np = dev_of_node(dev);
> > + if (!np)
> > + return;
> > +
> > + pdev = of_find_device_by_node(np);
> > if (!pdev)
> > return;
> >
> > of_device_unregister(pdev);
> > - of_node_clear_flag(dev_of_node(dev), OF_POPULATED);
> > + of_node_clear_flag(np, OF_POPULATED);
> > }
> >
> > static void pci_stop_dev(struct pci_dev *dev)
> > --
> > 2.47.0.338
> >
--
மணிவண்ணன் சதாசிவம்
On Sun, Dec 01, 2024 at 01:51:08PM +0530, Manivannan Sadhasivam wrote: > So we create pwrctl devices starting from PCI bridges. This is because, > in order for the endpoints to be enumerated, the relevant pwrctl drivers > need to be probed first (i.e., pci_bus_add_device() will be called for > the endpoints only when they are detected on the bus, but that cannot > happen until the relevant pwrctl driver is probed). That's why we have > the loop in pci_pwrctrl_create_devices() to iterate over the children > of PCI bridges defined in devicetree. I think a better approach would be to create a pwrctrl device when the corresponding PCI device is scanned. In pci_scan_child_bus_extend(), you'll find this loop: /* Go find them, Rover! */ for (devfn = 0; devfn < 256; devfn += 8) pci_scan_slot(bus, devfn); ... where pci_scan_slot() reads the Vendor ID to determine whether a device is present at the devfn address and then goes on to create the pci_dev. I think what you want to do is, just before the Vendor ID is read, create the pwrctrl device and enable power. The OF node of the pwrctrl device can be found by way of the devfn, right? So you can just search whether an OF node exists for a given devfn. Moreover, for multifunction devices I think you may want to use refcounting so that the pwrctrl device does not cut power unless the refcount reaches zero. Thanks, Lukas
On Sun, Dec 01, 2024 at 01:51:08PM +0530, Manivannan Sadhasivam wrote: > The idea of pci_pwrctrl_unregister() is to remove the pwrctl device when the > associated PCI device gets removed. When this happens, the pwrctl driver will > turn off the power to the corresponding PCI device After pci_pwrctrl_unregister() is called from pci_stop_dev(), the device may be accessed by one of the calls in pci_destroy_dev(). E.g. pci_doe_destroy() may set the DOE Abort bit in the DOE Control Register if a DOE exchange is ongoing. One cannot assume that no such exchange is ongoing merely because the device was unbound from its driver. The PCI core may have legitimate reasons to perform a DOE exchange or generally access config space even after the device has been unbound. And IIUC, those accesses will fail if pwrctrl has powered the device down. Another example is pcie_aspm_exit_link_state(), which will adjust ASPM settings on device removal. So it seems to me that the call to pci_pwrctrl_unregister() needs to be moved to pci_doe_destroy(). However I'm worried that will break the symmetry with pci_pwrctrl_create_devices(), which is only called in the !pci_dev_is_added() case. Thanks, Lukas
On Sun, Dec 01, 2024 at 11:54:45AM +0100, Lukas Wunner wrote: > On Sun, Dec 01, 2024 at 01:51:08PM +0530, Manivannan Sadhasivam wrote: > > The idea of pci_pwrctrl_unregister() is to remove the pwrctl device when the > > associated PCI device gets removed. When this happens, the pwrctl driver will > > turn off the power to the corresponding PCI device > > After pci_pwrctrl_unregister() is called from pci_stop_dev(), > the device may be accessed by one of the calls in pci_destroy_dev(). > > E.g. pci_doe_destroy() may set the DOE Abort bit in the DOE Control > Register if a DOE exchange is ongoing. One cannot assume that no > such exchange is ongoing merely because the device was unbound from > its driver. The PCI core may have legitimate reasons to perform a DOE > exchange or generally access config space even after the device has been > unbound. And IIUC, those accesses will fail if pwrctrl has powered the > device down. > > Another example is pcie_aspm_exit_link_state(), which will adjust ASPM > settings on device removal. > > So it seems to me that the call to pci_pwrctrl_unregister() needs to be > moved to pci_doe_destroy(). However I'm worried that will break the > symmetry with pci_pwrctrl_create_devices(), which is only called in the > !pci_dev_is_added() case. > So I took a stab at both of your suggestions: 1. Moving the pwrctrl devices creation to pci_scan_device() 2. Moving the pwrctrl devices unregister to pci_destroy_dev() Both seems to be working fine and it also allows creating pwrctrl devices for the PCI bridges without any change (for which I was planning to add one more patch earlier). But the devlink creation still needs to happen in pci_bus_add_device(), which I think is fine. Will post the patches tomorrow, thanks! - Mani -- மணிவண்ணன் சதாசிவம்
On 26/11/2024 21:04, Brian Norris wrote:
> of_find_device_by_node() doesn't like a NULL pointer, and may end up
> identifying an arbitrary device, which we then start tearing down. We
> should check for NULL first.
>
> Resolves issues seen when doing `echo 1 > /sys/bus/pci/devices/.../remove`:
>
> [ 222.952201] ------------[ cut here ]------------
> [ 222.952218] WARNING: CPU: 0 PID: 5095 at drivers/regulator/core.c:5885 regulator_unregister+0x140/0x160
> ...
> [ 222.953490] CPU: 0 UID: 0 PID: 5095 Comm: bash Tainted: G C 6.12.0-rc1 #3
> ...
> [ 222.954134] Call trace:
> [ 222.954150] regulator_unregister+0x140/0x160
> [ 222.954186] devm_rdev_release+0x1c/0x30
> [ 222.954215] release_nodes+0x68/0x100
> [ 222.954249] devres_release_all+0x98/0xf8
> [ 222.954282] device_unbind_cleanup+0x20/0x70
> [ 222.954306] device_release_driver_internal+0x1f4/0x240
> [ 222.954333] device_release_driver+0x20/0x40
> [ 222.954358] bus_remove_device+0xd8/0x170
> [ 222.954393] device_del+0x154/0x380
> [ 222.954422] device_unregister+0x28/0x88
> [ 222.954451] of_device_unregister+0x1c/0x30
> [ 222.954488] pci_stop_bus_device+0x154/0x1b0
> [ 222.954521] pci_stop_and_remove_bus_device_locked+0x28/0x48
> [ 222.954553] remove_store+0xa0/0xb8
> [ 222.954589] dev_attr_store+0x20/0x40
> [ 222.954615] sysfs_kf_write+0x4c/0x68
> [ 222.954644] kernfs_fop_write_iter+0x128/0x200
> [ 222.954670] do_iter_readv_writev+0xdc/0x1e0
> [ 222.954709] vfs_writev+0x100/0x2a0
> [ 222.954742] do_writev+0x84/0x130
> [ 222.954773] __arm64_sys_writev+0x28/0x40
> [ 222.954808] invoke_syscall+0x50/0x120
> [ 222.954845] el0_svc_common.constprop.0+0x48/0xf0
> [ 222.954878] do_el0_svc+0x24/0x38
> [ 222.954910] el0_svc+0x34/0xe0
> [ 222.954945] el0t_64_sync_handler+0x120/0x138
> [ 222.954978] el0t_64_sync+0x190/0x198
> [ 222.955006] ---[ end trace 0000000000000000 ]---
> [ 222.965216] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000c0
> ...
> [ 223.107395] CPU: 3 UID: 0 PID: 5095 Comm: bash Tainted: G WC 6.12.0-rc1 #3
> ...
> [ 223.227750] Call trace:
> [ 223.230501] pci_stop_bus_device+0x190/0x1b0
> [ 223.235314] pci_stop_and_remove_bus_device_locked+0x28/0x48
> [ 223.241672] remove_store+0xa0/0xb8
> [ 223.245616] dev_attr_store+0x20/0x40
> [ 223.249737] sysfs_kf_write+0x4c/0x68
> [ 223.253859] kernfs_fop_write_iter+0x128/0x200
> [ 223.253887] do_iter_readv_writev+0xdc/0x1e0
> [ 223.263631] vfs_writev+0x100/0x2a0
> [ 223.267550] do_writev+0x84/0x130
> [ 223.271273] __arm64_sys_writev+0x28/0x40
> [ 223.275774] invoke_syscall+0x50/0x120
> [ 223.279988] el0_svc_common.constprop.0+0x48/0xf0
> [ 223.285270] do_el0_svc+0x24/0x38
> [ 223.288993] el0_svc+0x34/0xe0
> [ 223.292426] el0t_64_sync_handler+0x120/0x138
> [ 223.297311] el0t_64_sync+0x190/0x198
> [ 223.301423] Code: 17fffff8 91030000 d2800101 f9800011 (c85f7c02)
> [ 223.308248] ---[ end trace 0000000000000000 ]---
>
> Fixes: 681725afb6b9 ("PCI/pwrctl: Remove pwrctl device without iterating over all children of pwrctl parent")
> Signed-off-by: Brian Norris <briannorris@chromium.org>
> ---
>
> drivers/pci/remove.c | 9 +++++++--
> 1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
> index 963b8d2855c1..efc37fcb73e2 100644
> --- a/drivers/pci/remove.c
> +++ b/drivers/pci/remove.c
> @@ -19,14 +19,19 @@ static void pci_free_resources(struct pci_dev *dev)
>
> static void pci_pwrctrl_unregister(struct device *dev)
> {
> + struct device_node *np;
> struct platform_device *pdev;
>
> - pdev = of_find_device_by_node(dev_of_node(dev));
> + np = dev_of_node(dev);
> + if (!np)
> + return;
> +
> + pdev = of_find_device_by_node(np);
> if (!pdev)
> return;
>
> of_device_unregister(pdev);
> - of_node_clear_flag(dev_of_node(dev), OF_POPULATED);
> + of_node_clear_flag(np, OF_POPULATED);
> }
>
> static void pci_stop_dev(struct pci_dev *dev)
This fixes a regression we have been seeing on Tegra devices. FWIW ...
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Thanks!
Jon
--
nvpublic
On Tue, Nov 26, 2024 at 01:04:34PM -0800, Brian Norris wrote:
> of_find_device_by_node() doesn't like a NULL pointer, and may end up
> identifying an arbitrary device, which we then start tearing down. We
> should check for NULL first.
>
> Resolves issues seen when doing `echo 1 > /sys/bus/pci/devices/.../remove`:
>
> [ 222.952201] ------------[ cut here ]------------
> [ 222.952218] WARNING: CPU: 0 PID: 5095 at drivers/regulator/core.c:5885 regulator_unregister+0x140/0x160
> ...
> [ 222.953490] CPU: 0 UID: 0 PID: 5095 Comm: bash Tainted: G C 6.12.0-rc1 #3
> ...
> [ 222.954134] Call trace:
> [ 222.954150] regulator_unregister+0x140/0x160
> [ 222.954186] devm_rdev_release+0x1c/0x30
> [ 222.954215] release_nodes+0x68/0x100
> [ 222.954249] devres_release_all+0x98/0xf8
> [ 222.954282] device_unbind_cleanup+0x20/0x70
> [ 222.954306] device_release_driver_internal+0x1f4/0x240
> [ 222.954333] device_release_driver+0x20/0x40
> [ 222.954358] bus_remove_device+0xd8/0x170
> [ 222.954393] device_del+0x154/0x380
> [ 222.954422] device_unregister+0x28/0x88
> [ 222.954451] of_device_unregister+0x1c/0x30
> [ 222.954488] pci_stop_bus_device+0x154/0x1b0
> [ 222.954521] pci_stop_and_remove_bus_device_locked+0x28/0x48
> [ 222.954553] remove_store+0xa0/0xb8
> [ 222.954589] dev_attr_store+0x20/0x40
> [ 222.954615] sysfs_kf_write+0x4c/0x68
> [ 222.954644] kernfs_fop_write_iter+0x128/0x200
> [ 222.954670] do_iter_readv_writev+0xdc/0x1e0
> [ 222.954709] vfs_writev+0x100/0x2a0
> [ 222.954742] do_writev+0x84/0x130
> [ 222.954773] __arm64_sys_writev+0x28/0x40
> [ 222.954808] invoke_syscall+0x50/0x120
> [ 222.954845] el0_svc_common.constprop.0+0x48/0xf0
> [ 222.954878] do_el0_svc+0x24/0x38
> [ 222.954910] el0_svc+0x34/0xe0
> [ 222.954945] el0t_64_sync_handler+0x120/0x138
> [ 222.954978] el0t_64_sync+0x190/0x198
> [ 222.955006] ---[ end trace 0000000000000000 ]---
> [ 222.965216] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000c0
> ...
> [ 223.107395] CPU: 3 UID: 0 PID: 5095 Comm: bash Tainted: G WC 6.12.0-rc1 #3
> ...
> [ 223.227750] Call trace:
> [ 223.230501] pci_stop_bus_device+0x190/0x1b0
> [ 223.235314] pci_stop_and_remove_bus_device_locked+0x28/0x48
> [ 223.241672] remove_store+0xa0/0xb8
> [ 223.245616] dev_attr_store+0x20/0x40
> [ 223.249737] sysfs_kf_write+0x4c/0x68
> [ 223.253859] kernfs_fop_write_iter+0x128/0x200
> [ 223.253887] do_iter_readv_writev+0xdc/0x1e0
> [ 223.263631] vfs_writev+0x100/0x2a0
> [ 223.267550] do_writev+0x84/0x130
> [ 223.271273] __arm64_sys_writev+0x28/0x40
> [ 223.275774] invoke_syscall+0x50/0x120
> [ 223.279988] el0_svc_common.constprop.0+0x48/0xf0
> [ 223.285270] do_el0_svc+0x24/0x38
> [ 223.288993] el0_svc+0x34/0xe0
> [ 223.292426] el0t_64_sync_handler+0x120/0x138
> [ 223.297311] el0t_64_sync+0x190/0x198
> [ 223.301423] Code: 17fffff8 91030000 d2800101 f9800011 (c85f7c02)
> [ 223.308248] ---[ end trace 0000000000000000 ]---
>
> Fixes: 681725afb6b9 ("PCI/pwrctl: Remove pwrctl device without iterating over all children of pwrctl parent")
> Signed-off-by: Brian Norris <briannorris@chromium.org>
Hmm, again of_find_device_by_node()...
Thanks for the patch.
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
- Mani
--
மணிவண்ணன் சதாசிவம்
© 2016 - 2026 Red Hat, Inc.