drivers/gpu/drm/nouveau/nouveau_drm.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+)
Kexec reboot does not reset PCI devices.
Invoking the full DRM/TTM teardown from ->shutdown can trigger WARNs when
userspace still holds DRM file descriptors.
Quiesce the GPU through the suspend path and then power down the PCI
function so the next kernel can re-initialize the device from a consistent
state.
WARNING: drivers/gpu/drm/drm_mode_config.c:578 at drm_mode_config_cleanup+0x2e7/0x300, CPU#2: kexec/1300
Call Trace:
<TASK>
? srso_return_thunk+0x5/0x5f
? enable_work+0x3a/0x100
nouveau_display_destroy+0x39/0x70 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
nouveau_drm_device_fini+0x7b/0x1f0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
nouveau_drm_shutdown+0x52/0xc0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
pci_device_shutdown+0x35/0x60
device_shutdown+0x11c/0x1b0
kernel_kexec+0x13a/0x160
__do_sys_reboot+0x209/0x240
do_syscall_64+0x81/0x610
? srso_return_thunk+0x5/0x5f
? __rtnl_unlock+0x37/0x70
? srso_return_thunk+0x5/0x5f
? netdev_run_todo+0x63/0x570
? netif_change_flags+0x54/0x70
? srso_return_thunk+0x5/0x5f
? devinet_ioctl+0x1e5/0x790
? srso_return_thunk+0x5/0x5f
? inet_ioctl+0x1e9/0x200
? srso_return_thunk+0x5/0x5f
? srso_return_thunk+0x5/0x5f
? sock_do_ioctl+0x7d/0x130
? srso_return_thunk+0x5/0x5f
? __x64_sys_ioctl+0x97/0xe0
? srso_return_thunk+0x5/0x5f
? srso_return_thunk+0x5/0x5f
? do_syscall_64+0x23b/0x610
? srso_return_thunk+0x5/0x5f
? put_user_ifreq+0x7a/0x90
? srso_return_thunk+0x5/0x5f
? sock_do_ioctl+0x107/0x130
? srso_return_thunk+0x5/0x5f
? __x64_sys_ioctl+0x97/0xe0
? srso_return_thunk+0x5/0x5f
? do_syscall_64+0x81/0x610
? srso_return_thunk+0x5/0x5f
? exc_page_fault+0x7e/0x1a0
entry_SYSCALL_64_after_hwframe+0x76/0x7e
nouveau 0000:26:00.0: [drm] drm_WARN_ON(!list_empty(&fb->filp_head))
WARNING: drivers/gpu/drm/drm_framebuffer.c:833 at drm_framebuffer_free+0x73/0xa0, CPU#2: kexec/1300
Call Trace:
<TASK>
drm_mode_config_cleanup+0x248/0x300
? __pfx___drm_printfn_dbg+0x10/0x10
? drm_mode_config_cleanup+0x1dc/0x300
nouveau_display_destroy+0x39/0x70 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
nouveau_drm_device_fini+0x7b/0x1f0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
nouveau_drm_shutdown+0x52/0xc0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
pci_device_shutdown+0x35/0x60
device_shutdown+0x11c/0x1b0
kernel_kexec+0x13a/0x160
__do_sys_reboot+0x209/0x240
do_syscall_64+0x81/0x610
? srso_return_thunk+0x5/0x5f
? __rtnl_unlock+0x37/0x70
? srso_return_thunk+0x5/0x5f
? netdev_run_todo+0x63/0x570
? netif_change_flags+0x54/0x70
? srso_return_thunk+0x5/0x5f
? devinet_ioctl+0x1e5/0x790
? srso_return_thunk+0x5/0x5f
? inet_ioctl+0x1e9/0x200
? srso_return_thunk+0x5/0x5f
? srso_return_thunk+0x5/0x5f
? sock_do_ioctl+0x7d/0x130
? srso_return_thunk+0x5/0x5f
? __x64_sys_ioctl+0x97/0xe0
? srso_return_thunk+0x5/0x5f
? srso_return_thunk+0x5/0x5f
? do_syscall_64+0x23b/0x610
? srso_return_thunk+0x5/0x5f
? put_user_ifreq+0x7a/0x90
? srso_return_thunk+0x5/0x5f
? sock_do_ioctl+0x107/0x130
? srso_return_thunk+0x5/0x5f
? __x64_sys_ioctl+0x97/0xe0
? srso_return_thunk+0x5/0x5f
? do_syscall_64+0x81/0x610
? srso_return_thunk+0x5/0x5f
? exc_page_fault+0x7e/0x1a0
entry_SYSCALL_64_after_hwframe+0x76/0x7e
WARNING: include/drm/ttm/ttm_resource.h:406 at nouveau_ttm_fini+0x257/0x270 [nouveau], CPU#2: kexec/1300
Call Trace:
<TASK>
nouveau_drm_device_fini+0x93/0x1f0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
nouveau_drm_shutdown+0x52/0xc0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
pci_device_shutdown+0x35/0x60
device_shutdown+0x11c/0x1b0
kernel_kexec+0x13a/0x160
__do_sys_reboot+0x209/0x240
do_syscall_64+0x81/0x610
? srso_return_thunk+0x5/0x5f
? __rtnl_unlock+0x37/0x70
? srso_return_thunk+0x5/0x5f
? netdev_run_todo+0x63/0x570
? netif_change_flags+0x54/0x70
? srso_return_thunk+0x5/0x5f
? devinet_ioctl+0x1e5/0x790
? srso_return_thunk+0x5/0x5f
? inet_ioctl+0x1e9/0x200
? srso_return_thunk+0x5/0x5f
? srso_return_thunk+0x5/0x5f
? sock_do_ioctl+0x7d/0x130
? srso_return_thunk+0x5/0x5f
? __x64_sys_ioctl+0x97/0xe0
? srso_return_thunk+0x5/0x5f
? srso_return_thunk+0x5/0x5f
? do_syscall_64+0x23b/0x610
? srso_return_thunk+0x5/0x5f
? put_user_ifreq+0x7a/0x90
? srso_return_thunk+0x5/0x5f
? sock_do_ioctl+0x107/0x130
? srso_return_thunk+0x5/0x5f
? __x64_sys_ioctl+0x97/0xe0
? srso_return_thunk+0x5/0x5f
? do_syscall_64+0x81/0x610
? srso_return_thunk+0x5/0x5f
? exc_page_fault+0x7e/0x1a0
entry_SYSCALL_64_after_hwframe+0x76/0x7e
Signed-off-by: Li Chen <me@linux.beauty>
---
drivers/gpu/drm/nouveau/nouveau_drm.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 1527b801f013..50384462723b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -1079,6 +1079,29 @@ nouveau_pmops_resume(struct device *dev)
return ret;
}
+static void
+nouveau_drm_shutdown(struct pci_dev *pdev)
+{
+ struct nouveau_drm *drm = pci_get_drvdata(pdev);
+ int ret;
+
+ if (!drm)
+ return;
+
+ if (drm->dev->switch_power_state == DRM_SWITCH_POWER_OFF ||
+ drm->dev->switch_power_state == DRM_SWITCH_POWER_DYNAMIC_OFF)
+ return;
+
+ ret = nouveau_do_suspend(drm, false);
+ if (ret)
+ NV_ERROR(drm, "shutdown suspend failed with: %d\n", ret);
+
+ pci_save_state(pdev);
+ pci_disable_device(pdev);
+ pci_set_power_state(pdev, PCI_D3hot);
+ usleep_range(200, 400);
+}
+
static int
nouveau_pmops_freeze(struct device *dev)
{
@@ -1408,6 +1431,7 @@ nouveau_drm_pci_driver = {
.id_table = nouveau_drm_pci_table,
.probe = nouveau_drm_probe,
.remove = nouveau_drm_remove,
+ .shutdown = nouveau_drm_shutdown,
.driver.pm = &nouveau_pm_ops,
};
--
2.52.0
On Tue, 20 Jan 2026 at 22:15, Li Chen <me@linux.beauty> wrote:
>
> Kexec reboot does not reset PCI devices.
> Invoking the full DRM/TTM teardown from ->shutdown can trigger WARNs when
> userspace still holds DRM file descriptors.
>
> Quiesce the GPU through the suspend path and then power down the PCI
> function so the next kernel can re-initialize the device from a consistent
> state.
>
> WARNING: drivers/gpu/drm/drm_mode_config.c:578 at drm_mode_config_cleanup+0x2e7/0x300, CPU#2: kexec/1300
> Call Trace:
> <TASK>
> ? srso_return_thunk+0x5/0x5f
> ? enable_work+0x3a/0x100
> nouveau_display_destroy+0x39/0x70 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> nouveau_drm_device_fini+0x7b/0x1f0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> nouveau_drm_shutdown+0x52/0xc0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> pci_device_shutdown+0x35/0x60
> device_shutdown+0x11c/0x1b0
> kernel_kexec+0x13a/0x160
> __do_sys_reboot+0x209/0x240
> do_syscall_64+0x81/0x610
> ? srso_return_thunk+0x5/0x5f
> ? __rtnl_unlock+0x37/0x70
> ? srso_return_thunk+0x5/0x5f
> ? netdev_run_todo+0x63/0x570
> ? netif_change_flags+0x54/0x70
> ? srso_return_thunk+0x5/0x5f
> ? devinet_ioctl+0x1e5/0x790
> ? srso_return_thunk+0x5/0x5f
> ? inet_ioctl+0x1e9/0x200
> ? srso_return_thunk+0x5/0x5f
> ? srso_return_thunk+0x5/0x5f
> ? sock_do_ioctl+0x7d/0x130
> ? srso_return_thunk+0x5/0x5f
> ? __x64_sys_ioctl+0x97/0xe0
> ? srso_return_thunk+0x5/0x5f
> ? srso_return_thunk+0x5/0x5f
> ? do_syscall_64+0x23b/0x610
> ? srso_return_thunk+0x5/0x5f
> ? put_user_ifreq+0x7a/0x90
> ? srso_return_thunk+0x5/0x5f
> ? sock_do_ioctl+0x107/0x130
> ? srso_return_thunk+0x5/0x5f
> ? __x64_sys_ioctl+0x97/0xe0
> ? srso_return_thunk+0x5/0x5f
> ? do_syscall_64+0x81/0x610
> ? srso_return_thunk+0x5/0x5f
> ? exc_page_fault+0x7e/0x1a0
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>
> nouveau 0000:26:00.0: [drm] drm_WARN_ON(!list_empty(&fb->filp_head))
> WARNING: drivers/gpu/drm/drm_framebuffer.c:833 at drm_framebuffer_free+0x73/0xa0, CPU#2: kexec/1300
> Call Trace:
> <TASK>
> drm_mode_config_cleanup+0x248/0x300
> ? __pfx___drm_printfn_dbg+0x10/0x10
> ? drm_mode_config_cleanup+0x1dc/0x300
> nouveau_display_destroy+0x39/0x70 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> nouveau_drm_device_fini+0x7b/0x1f0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> nouveau_drm_shutdown+0x52/0xc0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> pci_device_shutdown+0x35/0x60
> device_shutdown+0x11c/0x1b0
> kernel_kexec+0x13a/0x160
> __do_sys_reboot+0x209/0x240
> do_syscall_64+0x81/0x610
> ? srso_return_thunk+0x5/0x5f
> ? __rtnl_unlock+0x37/0x70
> ? srso_return_thunk+0x5/0x5f
> ? netdev_run_todo+0x63/0x570
> ? netif_change_flags+0x54/0x70
> ? srso_return_thunk+0x5/0x5f
> ? devinet_ioctl+0x1e5/0x790
> ? srso_return_thunk+0x5/0x5f
> ? inet_ioctl+0x1e9/0x200
> ? srso_return_thunk+0x5/0x5f
> ? srso_return_thunk+0x5/0x5f
> ? sock_do_ioctl+0x7d/0x130
> ? srso_return_thunk+0x5/0x5f
> ? __x64_sys_ioctl+0x97/0xe0
> ? srso_return_thunk+0x5/0x5f
> ? srso_return_thunk+0x5/0x5f
> ? do_syscall_64+0x23b/0x610
> ? srso_return_thunk+0x5/0x5f
> ? put_user_ifreq+0x7a/0x90
> ? srso_return_thunk+0x5/0x5f
> ? sock_do_ioctl+0x107/0x130
> ? srso_return_thunk+0x5/0x5f
> ? __x64_sys_ioctl+0x97/0xe0
> ? srso_return_thunk+0x5/0x5f
> ? do_syscall_64+0x81/0x610
> ? srso_return_thunk+0x5/0x5f
> ? exc_page_fault+0x7e/0x1a0
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>
> WARNING: include/drm/ttm/ttm_resource.h:406 at nouveau_ttm_fini+0x257/0x270 [nouveau], CPU#2: kexec/1300
> Call Trace:
> <TASK>
> nouveau_drm_device_fini+0x93/0x1f0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> nouveau_drm_shutdown+0x52/0xc0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> pci_device_shutdown+0x35/0x60
> device_shutdown+0x11c/0x1b0
> kernel_kexec+0x13a/0x160
> __do_sys_reboot+0x209/0x240
> do_syscall_64+0x81/0x610
> ? srso_return_thunk+0x5/0x5f
> ? __rtnl_unlock+0x37/0x70
> ? srso_return_thunk+0x5/0x5f
> ? netdev_run_todo+0x63/0x570
> ? netif_change_flags+0x54/0x70
> ? srso_return_thunk+0x5/0x5f
> ? devinet_ioctl+0x1e5/0x790
> ? srso_return_thunk+0x5/0x5f
> ? inet_ioctl+0x1e9/0x200
> ? srso_return_thunk+0x5/0x5f
> ? srso_return_thunk+0x5/0x5f
> ? sock_do_ioctl+0x7d/0x130
> ? srso_return_thunk+0x5/0x5f
> ? __x64_sys_ioctl+0x97/0xe0
> ? srso_return_thunk+0x5/0x5f
> ? srso_return_thunk+0x5/0x5f
> ? do_syscall_64+0x23b/0x610
> ? srso_return_thunk+0x5/0x5f
> ? put_user_ifreq+0x7a/0x90
> ? srso_return_thunk+0x5/0x5f
> ? sock_do_ioctl+0x107/0x130
> ? srso_return_thunk+0x5/0x5f
> ? __x64_sys_ioctl+0x97/0xe0
> ? srso_return_thunk+0x5/0x5f
> ? do_syscall_64+0x81/0x610
> ? srso_return_thunk+0x5/0x5f
> ? exc_page_fault+0x7e/0x1a0
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>
> Signed-off-by: Li Chen <me@linux.beauty>
> ---
> drivers/gpu/drm/nouveau/nouveau_drm.c | 24 ++++++++++++++++++++++++
> 1 file changed, 24 insertions(+)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
> index 1527b801f013..50384462723b 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> @@ -1079,6 +1079,29 @@ nouveau_pmops_resume(struct device *dev)
> return ret;
> }
>
> +static void
> +nouveau_drm_shutdown(struct pci_dev *pdev)
> +{
> + struct nouveau_drm *drm = pci_get_drvdata(pdev);
> + int ret;
> +
> + if (!drm)
> + return;
> +
> + if (drm->dev->switch_power_state == DRM_SWITCH_POWER_OFF ||
> + drm->dev->switch_power_state == DRM_SWITCH_POWER_DYNAMIC_OFF)
> + return;
> +
> + ret = nouveau_do_suspend(drm, false);
> + if (ret)
> + NV_ERROR(drm, "shutdown suspend failed with: %d\n", ret);
> +
> + pci_save_state(pdev);
> + pci_disable_device(pdev);
> + pci_set_power_state(pdev, PCI_D3hot);
> + usleep_range(200, 400);\
Why is this needed? it at least needs a comment.
Dave.
Hi Dave,
> On Tue, 20 Jan 2026 at 22:15, Li Chen <me@linux.beauty> wrote:
> >
> > Kexec reboot does not reset PCI devices.
> > Invoking the full DRM/TTM teardown from ->shutdown can trigger WARNs when
> > userspace still holds DRM file descriptors.
> >
> > Quiesce the GPU through the suspend path and then power down the PCI
> > function so the next kernel can re-initialize the device from a consistent
> > state.
> >
> > WARNING: drivers/gpu/drm/drm_mode_config.c:578 at drm_mode_config_cleanup+0x2e7/0x300, CPU#2: kexec/1300
> > Call Trace:
> > <TASK>
> > ? srso_return_thunk+0x5/0x5f
> > ? enable_work+0x3a/0x100
> > nouveau_display_destroy+0x39/0x70 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> > nouveau_drm_device_fini+0x7b/0x1f0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> > nouveau_drm_shutdown+0x52/0xc0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> > pci_device_shutdown+0x35/0x60
> > device_shutdown+0x11c/0x1b0
> > kernel_kexec+0x13a/0x160
> > __do_sys_reboot+0x209/0x240
> > do_syscall_64+0x81/0x610
> > ? srso_return_thunk+0x5/0x5f
> > ? __rtnl_unlock+0x37/0x70
> > ? srso_return_thunk+0x5/0x5f
> > ? netdev_run_todo+0x63/0x570
> > ? netif_change_flags+0x54/0x70
> > ? srso_return_thunk+0x5/0x5f
> > ? devinet_ioctl+0x1e5/0x790
> > ? srso_return_thunk+0x5/0x5f
> > ? inet_ioctl+0x1e9/0x200
> > ? srso_return_thunk+0x5/0x5f
> > ? srso_return_thunk+0x5/0x5f
> > ? sock_do_ioctl+0x7d/0x130
> > ? srso_return_thunk+0x5/0x5f
> > ? __x64_sys_ioctl+0x97/0xe0
> > ? srso_return_thunk+0x5/0x5f
> > ? srso_return_thunk+0x5/0x5f
> > ? do_syscall_64+0x23b/0x610
> > ? srso_return_thunk+0x5/0x5f
> > ? put_user_ifreq+0x7a/0x90
> > ? srso_return_thunk+0x5/0x5f
> > ? sock_do_ioctl+0x107/0x130
> > ? srso_return_thunk+0x5/0x5f
> > ? __x64_sys_ioctl+0x97/0xe0
> > ? srso_return_thunk+0x5/0x5f
> > ? do_syscall_64+0x81/0x610
> > ? srso_return_thunk+0x5/0x5f
> > ? exc_page_fault+0x7e/0x1a0
> > entry_SYSCALL_64_after_hwframe+0x76/0x7e
> >
> > nouveau 0000:26:00.0: [drm] drm_WARN_ON(!list_empty(&fb->filp_head))
> > WARNING: drivers/gpu/drm/drm_framebuffer.c:833 at drm_framebuffer_free+0x73/0xa0, CPU#2: kexec/1300
> > Call Trace:
> > <TASK>
> > drm_mode_config_cleanup+0x248/0x300
> > ? __pfx___drm_printfn_dbg+0x10/0x10
> > ? drm_mode_config_cleanup+0x1dc/0x300
> > nouveau_display_destroy+0x39/0x70 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> > nouveau_drm_device_fini+0x7b/0x1f0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> > nouveau_drm_shutdown+0x52/0xc0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> > pci_device_shutdown+0x35/0x60
> > device_shutdown+0x11c/0x1b0
> > kernel_kexec+0x13a/0x160
> > __do_sys_reboot+0x209/0x240
> > do_syscall_64+0x81/0x610
> > ? srso_return_thunk+0x5/0x5f
> > ? __rtnl_unlock+0x37/0x70
> > ? srso_return_thunk+0x5/0x5f
> > ? netdev_run_todo+0x63/0x570
> > ? netif_change_flags+0x54/0x70
> > ? srso_return_thunk+0x5/0x5f
> > ? devinet_ioctl+0x1e5/0x790
> > ? srso_return_thunk+0x5/0x5f
> > ? inet_ioctl+0x1e9/0x200
> > ? srso_return_thunk+0x5/0x5f
> > ? srso_return_thunk+0x5/0x5f
> > ? sock_do_ioctl+0x7d/0x130
> > ? srso_return_thunk+0x5/0x5f
> > ? __x64_sys_ioctl+0x97/0xe0
> > ? srso_return_thunk+0x5/0x5f
> > ? srso_return_thunk+0x5/0x5f
> > ? do_syscall_64+0x23b/0x610
> > ? srso_return_thunk+0x5/0x5f
> > ? put_user_ifreq+0x7a/0x90
> > ? srso_return_thunk+0x5/0x5f
> > ? sock_do_ioctl+0x107/0x130
> > ? srso_return_thunk+0x5/0x5f
> > ? __x64_sys_ioctl+0x97/0xe0
> > ? srso_return_thunk+0x5/0x5f
> > ? do_syscall_64+0x81/0x610
> > ? srso_return_thunk+0x5/0x5f
> > ? exc_page_fault+0x7e/0x1a0
> > entry_SYSCALL_64_after_hwframe+0x76/0x7e
> >
> > WARNING: include/drm/ttm/ttm_resource.h:406 at nouveau_ttm_fini+0x257/0x270 [nouveau], CPU#2: kexec/1300
> > Call Trace:
> > <TASK>
> > nouveau_drm_device_fini+0x93/0x1f0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> > nouveau_drm_shutdown+0x52/0xc0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> > pci_device_shutdown+0x35/0x60
> > device_shutdown+0x11c/0x1b0
> > kernel_kexec+0x13a/0x160
> > __do_sys_reboot+0x209/0x240
> > do_syscall_64+0x81/0x610
> > ? srso_return_thunk+0x5/0x5f
> > ? __rtnl_unlock+0x37/0x70
> > ? srso_return_thunk+0x5/0x5f
> > ? netdev_run_todo+0x63/0x570
> > ? netif_change_flags+0x54/0x70
> > ? srso_return_thunk+0x5/0x5f
> > ? devinet_ioctl+0x1e5/0x790
> > ? srso_return_thunk+0x5/0x5f
> > ? inet_ioctl+0x1e9/0x200
> > ? srso_return_thunk+0x5/0x5f
> > ? srso_return_thunk+0x5/0x5f
> > ? sock_do_ioctl+0x7d/0x130
> > ? srso_return_thunk+0x5/0x5f
> > ? __x64_sys_ioctl+0x97/0xe0
> > ? srso_return_thunk+0x5/0x5f
> > ? srso_return_thunk+0x5/0x5f
> > ? do_syscall_64+0x23b/0x610
> > ? srso_return_thunk+0x5/0x5f
> > ? put_user_ifreq+0x7a/0x90
> > ? srso_return_thunk+0x5/0x5f
> > ? sock_do_ioctl+0x107/0x130
> > ? srso_return_thunk+0x5/0x5f
> > ? __x64_sys_ioctl+0x97/0xe0
> > ? srso_return_thunk+0x5/0x5f
> > ? do_syscall_64+0x81/0x610
> > ? srso_return_thunk+0x5/0x5f
> > ? exc_page_fault+0x7e/0x1a0
> > entry_SYSCALL_64_after_hwframe+0x76/0x7e
> >
> > Signed-off-by: Li Chen <me@linux.beauty>
> > ---
> > drivers/gpu/drm/nouveau/nouveau_drm.c | 24 ++++++++++++++++++++++++
> > 1 file changed, 24 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
> > index 1527b801f013..50384462723b 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> > @@ -1079,6 +1079,29 @@ nouveau_pmops_resume(struct device *dev)
> > return ret;
> > }
> >
> > +static void
> > +nouveau_drm_shutdown(struct pci_dev *pdev)
> > +{
> > + struct nouveau_drm *drm = pci_get_drvdata(pdev);
> > + int ret;
> > +
> > + if (!drm)
> > + return;
> > +
> > + if (drm->dev->switch_power_state == DRM_SWITCH_POWER_OFF ||
> > + drm->dev->switch_power_state == DRM_SWITCH_POWER_DYNAMIC_OFF)
> > + return;
> > +
> > + ret = nouveau_do_suspend(drm, false);
> > + if (ret)
> > + NV_ERROR(drm, "shutdown suspend failed with: %d\n", ret);
> > +
> > + pci_save_state(pdev);
> > + pci_disable_device(pdev);
> > + pci_set_power_state(pdev, PCI_D3hot);
> > + usleep_range(200, 400);\
>
> Why is this needed? it at least needs a comment.
This patch is needed for kexec/KHO/LUO reboot: firmware reset is skipped, so the GPU may remain in an active/DMA
state and the next kernel can fail to re-init nouveau. We can’t call the full DRM/TTM teardown from ->shutdown() because
userspace may still hold DRM fds during shutdown, which triggers WARNs.
So the shutdown hook reuses the suspend/quiesce path (safe with open fds) and then powers down the PCI function (D3hot)
to leave the device in a consistent state for the next kernel.
Regards,
Li
On Wed Jan 21, 2026 at 1:50 AM CET, Li Chen wrote: > > > + usleep_range(200, 400);\ > > > > Why is this needed? it at least needs a comment. > > This patch is needed for kexec/KHO/LUO reboot: firmware reset is skipped, so the GPU may remain in an active/DMA > state and the next kernel can fail to re-init nouveau. We can’t call the full DRM/TTM teardown from ->shutdown() because > userspace may still hold DRM fds during shutdown, which triggers WARNs. > > So the shutdown hook reuses the suspend/quiesce path (safe with open fds) and then powers down the PCI function (D3hot) > to leave the device in a consistent state for the next kernel. I think the question was about the usleep_range() specifically. :)
Hi Danilo > On Wed Jan 21, 2026 at 1:50 AM CET, Li Chen wrote: > > > > + usleep_range(200, 400);\ > > > > > > Why is this needed? it at least needs a comment. > > > > This patch is needed for kexec/KHO/LUO reboot: firmware reset is skipped, so the GPU may remain in an active/DMA > > state and the next kernel can fail to re-init nouveau. We can’t call the full DRM/TTM teardown from ->shutdown() because > > userspace may still hold DRM fds during shutdown, which triggers WARNs. > > > > So the shutdown hook reuses the suspend/quiesce path (safe with open fds) and then powers down the PCI function (D3hot) > > to leave the device in a consistent state for the next kernel. > > I think the question was about the usleep_range() specifically. :) > Thanks for the hint. That usleep_range(200, 400) is just to give the PCI power transition time to settle before an immediate kexec jump. It’s mirroring the existing nouveau_pmops_suspend() behavior, which already does udelay(200) right after pci_set_power_state(..., PCI_D3hot). In ->shutdown() we’re allowed to sleep, so I used usleep_range() instead of a busy-wait udelay(). Regards, Li
Hi Dave, > Hi Danilo > > > On Wed Jan 21, 2026 at 1:50 AM CET, Li Chen wrote: > > > > > + usleep_range(200, 400);\ > > > > > > > > Why is this needed? it at least needs a comment. > > > > > > This patch is needed for kexec/KHO/LUO reboot: firmware reset is skipped, so the GPU may remain in an active/DMA > > > state and the next kernel can fail to re-init nouveau. We can’t call the full DRM/TTM teardown from ->shutdown() because > > > userspace may still hold DRM fds during shutdown, which triggers WARNs. > > > > > > So the shutdown hook reuses the suspend/quiesce path (safe with open fds) and then powers down the PCI function (D3hot) > > > to leave the device in a consistent state for the next kernel. > > > > I think the question was about the usleep_range() specifically. :) > > > > Thanks for the hint. > > That usleep_range(200, 400) is just to give the PCI power transition time to settle before an immediate kexec jump. > > It’s mirroring the existing nouveau_pmops_suspend() behavior, which already does udelay(200) right after pci_set_power_state(..., PCI_D3hot). In ->shutdown() we’re allowed to > sleep, so I used usleep_range() instead of a busy-wait udelay(). Dave, if this is ok, I would add it as comment in the next version. Regards, Li
On Wed, 21 Jan 2026 at 15:44, Li Chen <me@linux.beauty> wrote: > > Hi Dave, > > > Hi Danilo > > > > > On Wed Jan 21, 2026 at 1:50 AM CET, Li Chen wrote: > > > > > > + usleep_range(200, 400);\ > > > > > > > > > > Why is this needed? it at least needs a comment. > > > > > > > > This patch is needed for kexec/KHO/LUO reboot: firmware reset is skipped, so the GPU may remain in an active/DMA > > > > state and the next kernel can fail to re-init nouveau. We can’t call the full DRM/TTM teardown from ->shutdown() because > > > > userspace may still hold DRM fds during shutdown, which triggers WARNs. > > > > > > > > So the shutdown hook reuses the suspend/quiesce path (safe with open fds) and then powers down the PCI function (D3hot) > > > > to leave the device in a consistent state for the next kernel. > > > > > > I think the question was about the usleep_range() specifically. :) > > > > > > > Thanks for the hint. > > > > That usleep_range(200, 400) is just to give the PCI power transition time to settle before an immediate kexec jump. > > > > It’s mirroring the existing nouveau_pmops_suspend() behavior, which already does udelay(200) right after pci_set_power_state(..., PCI_D3hot). In ->shutdown() we’re allowed to > > sleep, so I used usleep_range() instead of a busy-wait udelay(). > > Dave, if this is ok, I would add it as comment in the next version. Please do, Also with that, Reviewed-by: Dave Airlie <airlied@redhat.com> > > Regards, > Li >
© 2016 - 2026 Red Hat, Inc.