[v1] drm/tyr: implement GPU reset API

[PATCH v1 RESEND 4/4] drm/tyr: add GPU reset handling

Posted by Onur Özkan 3 weeks, 4 days ago

Move Tyr reset logic into a new reset module and add async reset work.

This adds:
- ResetHandle with internal controller state
- a dedicated ordered reset workqueue
- a pending flag to avoid duplicate queued resets
- run_reset() as the shared synchronous reset helper

Probe now calls reset::run_reset() before normal init. Driver data now
keeps ResetHandle so reset work is drained before clocks and regulators
are dropped.

Tested-by: Deborah Brouwer <deborah.brouwer@collabora.com>
Signed-off-by: Onur Özkan <work@onurozkan.dev>
---
 drivers/gpu/drm/tyr/driver.rs |  40 +++-----
 drivers/gpu/drm/tyr/reset.rs  | 180 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/tyr/tyr.rs    |   1 +
 3 files changed, 192 insertions(+), 29 deletions(-)
 create mode 100644 drivers/gpu/drm/tyr/reset.rs

diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs
index f7951804e4e0..c80238a21ff2 100644
--- a/drivers/gpu/drm/tyr/driver.rs
+++ b/drivers/gpu/drm/tyr/driver.rs
@@ -6,11 +6,8 @@
         OptionalClk, //
     },
     device::{
-        Bound,
-        Core,
-        Device, //
+        Core, //
     },
-    devres::Devres,
     dma::{
         Device as DmaDevice,
         DmaMask, //
@@ -22,10 +19,7 @@
         Registered,
         UnregisteredDevice, //
     },
-    io::poll,
-    new_mutex,
-    of,
-    platform,
+    new_mutex, of, platform,
     prelude::*,
     regulator,
     regulator::Regulator,
@@ -35,17 +29,15 @@
         Arc,
         Mutex, //
     },
-    time, //
 };
 
 use crate::{
     file::TyrDrmFileData,
     fw::Firmware,
     gem::BoData,
-    gpu,
     gpu::GpuInfo,
     mmu::Mmu,
-    regs, //
+    reset, //
 };
 
 pub(crate) type IoMem = kernel::io::mem::IoMem<SZ_2M>;
@@ -62,6 +54,11 @@ pub(crate) struct TyrPlatformDriverData {
 
 #[pin_data]
 pub(crate) struct TyrDrmDeviceData {
+    // `ResetHandle::drop()` drains queued/running works and this must happen
+    // before clocks/regulators are dropped. So keep this field before them to
+    // ensure the correct drop order.
+    pub(crate) reset: reset::ResetHandle,
+
     pub(crate) pdev: ARef<platform::Device>,
 
     pub(crate) fw: Arc<Firmware>,
@@ -90,22 +87,6 @@ unsafe impl Send for TyrDrmDeviceData {}
 // SAFETY: This will be removed in a future patch.
 unsafe impl Sync for TyrDrmDeviceData {}
 
-fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
-    // Clear any stale reset-complete IRQ state before issuing a new soft reset.
-    regs::GPU_IRQ_CLEAR.write(dev, iomem, regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED)?;
-    regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?;
-
-    poll::read_poll_timeout(
-        || regs::GPU_IRQ_RAWSTAT.read(dev, iomem),
-        |status| *status & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED != 0,
-        time::Delta::from_millis(1),
-        time::Delta::from_millis(100),
-    )
-    .inspect_err(|_| dev_err!(dev, "GPU reset failed."))?;
-
-    Ok(())
-}
-
 kernel::of_device_table!(
     OF_TABLE,
     MODULE_OF_TABLE,
@@ -138,8 +119,7 @@ fn probe(
         let request = pdev.io_request_by_index(0).ok_or(ENODEV)?;
         let iomem = Arc::pin_init(request.iomap_sized::<SZ_2M>(), GFP_KERNEL)?;
 
-        issue_soft_reset(pdev.as_ref(), &iomem)?;
-        gpu::l2_power_on(pdev.as_ref(), &iomem)?;
+        reset::run_reset(pdev.as_ref(), &iomem)?;
 
         let gpu_info = GpuInfo::new(pdev.as_ref(), &iomem)?;
         gpu_info.log(pdev);
@@ -153,6 +133,7 @@ fn probe(
 
         let uninit_ddev = UnregisteredDevice::<TyrDrmDriver>::new(pdev.as_ref())?;
         let platform: ARef<platform::Device> = pdev.into();
+        let reset = reset::ResetHandle::new(platform.clone(), iomem.clone())?;
 
         let mmu = Mmu::new(pdev, iomem.as_arc_borrow(), &gpu_info)?;
 
@@ -178,6 +159,7 @@ fn probe(
                     _mali: mali_regulator,
                     _sram: sram_regulator,
                 }),
+                reset,
                 gpu_info,
         });
         let ddev = Registration::new_foreign_owned(uninit_ddev, pdev.as_ref(), data, 0)?;
diff --git a/drivers/gpu/drm/tyr/reset.rs b/drivers/gpu/drm/tyr/reset.rs
new file mode 100644
index 000000000000..29dfae98b0dd
--- /dev/null
+++ b/drivers/gpu/drm/tyr/reset.rs
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+
+//! Provides asynchronous reset handling for the Tyr DRM driver via
+//! [`ResetHandle`], which runs reset work on a dedicated ordered
+//! workqueue and avoids duplicate pending resets.
+
+use kernel::{
+    device::{
+        Bound,
+        Device, //
+    },
+    devres::Devres,
+    io::poll,
+    platform,
+    prelude::*,
+    sync::{
+        aref::ARef,
+        atomic::{
+            Acquire,
+            Atomic,
+            Relaxed,
+            Release, //
+        },
+        Arc,
+    },
+    time,
+    workqueue::{
+        self,
+        Work, //
+    },
+};
+
+use crate::{
+    driver::IoMem,
+    gpu,
+    regs, //
+};
+
+/// Manages asynchronous GPU reset handling and ensures only a single reset
+/// work is pending at a time.
+#[pin_data]
+struct Controller {
+    /// Platform device reference needed for reset operations and logging.
+    pdev: ARef<platform::Device>,
+    /// Mapped register space needed for reset operations.
+    iomem: Arc<Devres<IoMem>>,
+    /// Atomic flag for controlling the scheduling pending state.
+    pending: Atomic<bool>,
+    /// Dedicated ordered workqueue for reset operations.
+    wq: workqueue::OrderedQueue,
+    /// Work item backing async reset processing.
+    #[pin]
+    work: Work<Controller>,
+}
+
+kernel::impl_has_work! {
+    impl HasWork<Controller> for Controller { self.work }
+}
+
+impl workqueue::WorkItem for Controller {
+    type Pointer = Arc<Self>;
+
+    fn run(this: Arc<Self>) {
+        this.reset_work();
+    }
+}
+
+impl Controller {
+    /// Creates a [`Controller`] instance.
+    fn new(pdev: ARef<platform::Device>, iomem: Arc<Devres<IoMem>>) -> Result<Arc<Self>> {
+        let wq = workqueue::OrderedQueue::new(c"tyr-reset-wq", 0)?;
+
+        Arc::pin_init(
+            try_pin_init!(Self {
+                pdev,
+                iomem,
+                pending: Atomic::new(false),
+                wq,
+                work <- kernel::new_work!("tyr::reset"),
+            }),
+            GFP_KERNEL,
+        )
+    }
+
+    /// Processes one scheduled reset request.
+    ///
+    /// Panthor reference:
+    /// - drivers/gpu/drm/panthor/panthor_device.c::panthor_device_reset_work()
+    fn reset_work(self: &Arc<Self>) {
+        dev_info!(self.pdev.as_ref(), "GPU reset work is started.\n");
+
+        // SAFETY: `Controller` is part of driver-private data and only exists
+        // while the platform device is bound.
+        let pdev = unsafe { self.pdev.as_ref().as_bound() };
+        if let Err(e) = run_reset(pdev, &self.iomem) {
+            dev_err!(self.pdev.as_ref(), "GPU reset failed: {:?}\n", e);
+        } else {
+            dev_info!(self.pdev.as_ref(), "GPU reset work is done.\n");
+        }
+
+        self.pending.store(false, Release);
+    }
+}
+
+/// Reset handle that shuts down pending work gracefully on drop.
+pub(crate) struct ResetHandle(Arc<Controller>);
+
+impl ResetHandle {
+    /// Creates a [`ResetHandle`] instance.
+    pub(crate) fn new(pdev: ARef<platform::Device>, iomem: Arc<Devres<IoMem>>) -> Result<Self> {
+        Ok(Self(Controller::new(pdev, iomem)?))
+    }
+
+    /// Schedules reset work.
+    #[expect(dead_code)]
+    pub(crate) fn schedule(&self) {
+        // TODO: Similar to `panthor_device_schedule_reset()` in Panthor, add a
+        // power management check once Tyr supports it.
+
+        // Keep only one reset request running or queued. If one is already pending,
+        // we ignore new schedule requests.
+        if self.0.pending.cmpxchg(false, true, Relaxed).is_ok()
+            && self.0.wq.enqueue(self.0.clone()).is_err()
+        {
+            self.0.pending.store(false, Release);
+        }
+    }
+
+    /// Returns true if a reset is queued or in progress.
+    ///
+    /// Note that the state can change immediately after the return.
+    #[inline]
+    #[expect(dead_code)]
+    pub(crate) fn is_pending(&self) -> bool {
+        self.0.pending.load(Acquire)
+    }
+}
+
+impl Drop for ResetHandle {
+    fn drop(&mut self) {
+        // Drain queued/running work and block future queueing attempts for this
+        // work item before clocks/regulators are torn down.
+        // SAFETY: drop executes in a sleepable context.
+        unsafe { self.0.work.disable_sync() };
+    }
+}
+
+/// Issues a soft reset command and waits for reset-complete IRQ status.
+fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
+    // Clear any stale reset-complete IRQ state before issuing a new soft reset.
+    regs::GPU_IRQ_CLEAR.write(dev, iomem, regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED)?;
+    regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?;
+
+    poll::read_poll_timeout(
+        || regs::GPU_IRQ_RAWSTAT.read(dev, iomem),
+        |status| *status & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED != 0,
+        time::Delta::from_millis(1),
+        time::Delta::from_millis(100),
+    )
+    .inspect_err(|_| dev_err!(dev, "GPU reset failed."))?;
+
+    Ok(())
+}
+
+/// Runs one synchronous GPU reset pass.
+///
+/// Its visibility is `pub(super)` only so the probe path can run an
+/// initial reset; it is not part of this module's public API.
+///
+/// On success, the GPU is left in a state suitable for reinitialization.
+///
+/// The reset sequence is as follows:
+/// 1. Trigger a GPU soft reset.
+/// 2. Wait for the reset-complete IRQ status.
+/// 3. Power L2 back on.
+pub(super) fn run_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
+    issue_soft_reset(dev, iomem)?;
+    gpu::l2_power_on(dev, iomem)?;
+    Ok(())
+}
diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs
index 18b0668bb217..d0349bc49f27 100644
--- a/drivers/gpu/drm/tyr/tyr.rs
+++ b/drivers/gpu/drm/tyr/tyr.rs
@@ -14,6 +14,7 @@
 mod gpu;
 mod mmu;
 mod regs;
+mod reset;
 mod slot;
 mod vm;
 
-- 
2.51.2

Re: [PATCH v1 RESEND 4/4] drm/tyr: add GPU reset handling

Posted by Boris Brezillon 2 weeks, 5 days ago

On Fri, 13 Mar 2026 12:16:44 +0300
Onur Özkan <work@onurozkan.dev> wrote:

> +impl Controller {
> +    /// Creates a [`Controller`] instance.
> +    fn new(pdev: ARef<platform::Device>, iomem: Arc<Devres<IoMem>>) -> Result<Arc<Self>> {
> +        let wq = workqueue::OrderedQueue::new(c"tyr-reset-wq", 0)?;
> +
> +        Arc::pin_init(
> +            try_pin_init!(Self {
> +                pdev,
> +                iomem,
> +                pending: Atomic::new(false),
> +                wq,
> +                work <- kernel::new_work!("tyr::reset"),
> +            }),
> +            GFP_KERNEL,
> +        )
> +    }
> +
> +    /// Processes one scheduled reset request.
> +    ///
> +    /// Panthor reference:
> +    /// - drivers/gpu/drm/panthor/panthor_device.c::panthor_device_reset_work()
> +    fn reset_work(self: &Arc<Self>) {
> +        dev_info!(self.pdev.as_ref(), "GPU reset work is started.\n");
> +
> +        // SAFETY: `Controller` is part of driver-private data and only exists
> +        // while the platform device is bound.
> +        let pdev = unsafe { self.pdev.as_ref().as_bound() };
> +        if let Err(e) = run_reset(pdev, &self.iomem) {
> +            dev_err!(self.pdev.as_ref(), "GPU reset failed: {:?}\n", e);
> +        } else {
> +            dev_info!(self.pdev.as_ref(), "GPU reset work is done.\n");
> +        }

Unfortunately, the reset operation is not as simple as instructing the
GPU to reset, it's a complex synchronization process where you need to
try to gracefully put various components on hold before you reset, and
then resume those after the reset is effective. Of course, with what we
currently have in-tree, there's not much to suspend/resume, but I think
I'd prefer to design the thing so we can progressively add more
components without changing the reset logic too much.

I would probably start with a Resettable trait that has the
{pre,post}_reset() methods that exist in Panthor.

The other thing we need is a way for those components to know when a
reset is about to happen so they can postpone some actions they were
planning in order to not further delay the reset, or end up with
actions that fail because the HW is already unusable. Not too sure how
we want to handle that though. Panthor is currently sprinkled with
panthor_device_reset_is_pending() calls in key places, but that's still
very manual, maybe we can automate that a bit more in Tyr, dunno.

> +
> +        self.pending.store(false, Release);
> +    }
> +}

Re: [PATCH v1 RESEND 4/4] drm/tyr: add GPU reset handling

Posted by Daniel Almeida 3 days, 22 hours ago


> On 19 Mar 2026, at 08:08, Boris Brezillon <boris.brezillon@collabora.com> wrote:
> 
> On Fri, 13 Mar 2026 12:16:44 +0300
> Onur Özkan <work@onurozkan.dev> wrote:
> 
>> +impl Controller {
>> +    /// Creates a [`Controller`] instance.
>> +    fn new(pdev: ARef<platform::Device>, iomem: Arc<Devres<IoMem>>) -> Result<Arc<Self>> {
>> +        let wq = workqueue::OrderedQueue::new(c"tyr-reset-wq", 0)?;
>> +
>> +        Arc::pin_init(
>> +            try_pin_init!(Self {
>> +                pdev,
>> +                iomem,
>> +                pending: Atomic::new(false),
>> +                wq,
>> +                work <- kernel::new_work!("tyr::reset"),
>> +            }),
>> +            GFP_KERNEL,
>> +        )
>> +    }
>> +
>> +    /// Processes one scheduled reset request.
>> +    ///
>> +    /// Panthor reference:
>> +    /// - drivers/gpu/drm/panthor/panthor_device.c::panthor_device_reset_work()
>> +    fn reset_work(self: &Arc<Self>) {
>> +        dev_info!(self.pdev.as_ref(), "GPU reset work is started.\n");
>> +
>> +        // SAFETY: `Controller` is part of driver-private data and only exists
>> +        // while the platform device is bound.
>> +        let pdev = unsafe { self.pdev.as_ref().as_bound() };
>> +        if let Err(e) = run_reset(pdev, &self.iomem) {
>> +            dev_err!(self.pdev.as_ref(), "GPU reset failed: {:?}\n", e);
>> +        } else {
>> +            dev_info!(self.pdev.as_ref(), "GPU reset work is done.\n");
>> +        }
> 
> Unfortunately, the reset operation is not as simple as instructing the
> GPU to reset, it's a complex synchronization process where you need to
> try to gracefully put various components on hold before you reset, and
> then resume those after the reset is effective. Of course, with what we
> currently have in-tree, there's not much to suspend/resume, but I think
> I'd prefer to design the thing so we can progressively add more
> components without changing the reset logic too much.
> 
> I would probably start with a Resettable trait that has the
> {pre,post}_reset() methods that exist in Panthor.
> 
> The other thing we need is a way for those components to know when a
> reset is about to happen so they can postpone some actions they were
> planning in order to not further delay the reset, or end up with
> actions that fail because the HW is already unusable. Not too sure how
> we want to handle that though. Panthor is currently sprinkled with
> panthor_device_reset_is_pending() calls in key places, but that's still
> very manual, maybe we can automate that a bit more in Tyr, dunno.
> 


We could have an enum where one of the variants is Resetting, and the other one
gives access to whatever state is not accessible while resets are in progress.

Something like

pub enum TyrData {
  Active(ActiveTyrData),
  ResetInProgress(ActiveTyrData)
}

fn access() -> Option<&ActiveTyrData> {
  … // if the “ResetInProgress” variant is active, return None
}


>> +
>> +        self.pending.store(false, Release);
>> +    }
>> +}

Re: [PATCH v1 RESEND 4/4] drm/tyr: add GPU reset handling

Posted by Onur Özkan 2 weeks, 5 days ago

Hi Boris,

On Thu, 19 Mar 2026 12:08:28 +0100
Boris Brezillon <boris.brezillon@collabora.com> wrote:

> On Fri, 13 Mar 2026 12:16:44 +0300
> Onur Özkan <work@onurozkan.dev> wrote:
> 
> > +impl Controller {
> > +    /// Creates a [`Controller`] instance.
> > +    fn new(pdev: ARef<platform::Device>, iomem:
> > Arc<Devres<IoMem>>) -> Result<Arc<Self>> {
> > +        let wq = workqueue::OrderedQueue::new(c"tyr-reset-wq", 0)?;
> > +
> > +        Arc::pin_init(
> > +            try_pin_init!(Self {
> > +                pdev,
> > +                iomem,
> > +                pending: Atomic::new(false),
> > +                wq,
> > +                work <- kernel::new_work!("tyr::reset"),
> > +            }),
> > +            GFP_KERNEL,
> > +        )
> > +    }
> > +
> > +    /// Processes one scheduled reset request.
> > +    ///
> > +    /// Panthor reference:
> > +    /// -
> > drivers/gpu/drm/panthor/panthor_device.c::panthor_device_reset_work()
> > +    fn reset_work(self: &Arc<Self>) {
> > +        dev_info!(self.pdev.as_ref(), "GPU reset work is
> > started.\n"); +
> > +        // SAFETY: `Controller` is part of driver-private data and
> > only exists
> > +        // while the platform device is bound.
> > +        let pdev = unsafe { self.pdev.as_ref().as_bound() };
> > +        if let Err(e) = run_reset(pdev, &self.iomem) {
> > +            dev_err!(self.pdev.as_ref(), "GPU reset failed:
> > {:?}\n", e);
> > +        } else {
> > +            dev_info!(self.pdev.as_ref(), "GPU reset work is
> > done.\n");
> > +        }
> 
> Unfortunately, the reset operation is not as simple as instructing the
> GPU to reset, it's a complex synchronization process where you need to
> try to gracefully put various components on hold before you reset, and
> then resume those after the reset is effective. Of course, with what
> we currently have in-tree, there's not much to suspend/resume, but I
> think I'd prefer to design the thing so we can progressively add more
> components without changing the reset logic too much.
> 
> I would probably start with a Resettable trait that has the
> {pre,post}_reset() methods that exist in Panthor.

Yeah, I checked Panthor and it has these functions for the reset logic.
I will implement that in v2 and will dig further to see if there is
anything else to cover in regards to proper syncing.

> 
> The other thing we need is a way for those components to know when a
> reset is about to happen so they can postpone some actions they were
> planning in order to not further delay the reset, or end up with
> actions that fail because the HW is already unusable. Not too sure how
> we want to handle that though. Panthor is currently sprinkled with
> panthor_device_reset_is_pending() calls in key places, but that's
> still very manual, maybe we can automate that a bit more in Tyr,
> dunno.
>

Hmm, sounds like a perfect guard use-case. Is it possible to require
users to access the hw behind a guard (e.g., try_access_hw())? We would
then check if a reset is in progress and hold the user or return an
error until the reset is complete.

Thanks,
Onur

> > +
> > +        self.pending.store(false, Release);
> > +    }
> > +}

Re: [PATCH v1 RESEND 4/4] drm/tyr: add GPU reset handling

Posted by Daniel Almeida 3 weeks, 3 days ago


> On 13 Mar 2026, at 06:16, Onur Özkan <work@onurozkan.dev> wrote:
> 
> Move Tyr reset logic into a new reset module and add async reset work.
> 
> This adds:
> - ResetHandle with internal controller state
> - a dedicated ordered reset workqueue
> - a pending flag to avoid duplicate queued resets
> - run_reset() as the shared synchronous reset helper
> 
> Probe now calls reset::run_reset() before normal init. Driver data now
> keeps ResetHandle so reset work is drained before clocks and regulators
> are dropped.
> 
> Tested-by: Deborah Brouwer <deborah.brouwer@collabora.com>
> Signed-off-by: Onur Özkan <work@onurozkan.dev>
> ---
> drivers/gpu/drm/tyr/driver.rs |  40 +++-----
> drivers/gpu/drm/tyr/reset.rs  | 180 ++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/tyr/tyr.rs    |   1 +
> 3 files changed, 192 insertions(+), 29 deletions(-)
> create mode 100644 drivers/gpu/drm/tyr/reset.rs
> 
> diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs
> index f7951804e4e0..c80238a21ff2 100644
> --- a/drivers/gpu/drm/tyr/driver.rs
> +++ b/drivers/gpu/drm/tyr/driver.rs
> @@ -6,11 +6,8 @@
>         OptionalClk, //
>     },
>     device::{
> -        Bound,
> -        Core,
> -        Device, //
> +        Core, //
>     },
> -    devres::Devres,
>     dma::{
>         Device as DmaDevice,
>         DmaMask, //
> @@ -22,10 +19,7 @@
>         Registered,
>         UnregisteredDevice, //
>     },
> -    io::poll,
> -    new_mutex,
> -    of,
> -    platform,
> +    new_mutex, of, platform,
>     prelude::*,
>     regulator,
>     regulator::Regulator,
> @@ -35,17 +29,15 @@
>         Arc,
>         Mutex, //
>     },
> -    time, //
> };
> 
> use crate::{
>     file::TyrDrmFileData,
>     fw::Firmware,
>     gem::BoData,
> -    gpu,
>     gpu::GpuInfo,
>     mmu::Mmu,
> -    regs, //
> +    reset, //
> };
> 
> pub(crate) type IoMem = kernel::io::mem::IoMem<SZ_2M>;
> @@ -62,6 +54,11 @@ pub(crate) struct TyrPlatformDriverData {
> 
> #[pin_data]
> pub(crate) struct TyrDrmDeviceData {
> +    // `ResetHandle::drop()` drains queued/running works and this must happen
> +    // before clocks/regulators are dropped. So keep this field before them to
> +    // ensure the correct drop order.
> +    pub(crate) reset: reset::ResetHandle,
> +
>     pub(crate) pdev: ARef<platform::Device>,
> 
>     pub(crate) fw: Arc<Firmware>,
> @@ -90,22 +87,6 @@ unsafe impl Send for TyrDrmDeviceData {}
> // SAFETY: This will be removed in a future patch.
> unsafe impl Sync for TyrDrmDeviceData {}
> 
> -fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
> -    // Clear any stale reset-complete IRQ state before issuing a new soft reset.
> -    regs::GPU_IRQ_CLEAR.write(dev, iomem, regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED)?;
> -    regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?;
> -
> -    poll::read_poll_timeout(
> -        || regs::GPU_IRQ_RAWSTAT.read(dev, iomem),
> -        |status| *status & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED != 0,
> -        time::Delta::from_millis(1),
> -        time::Delta::from_millis(100),
> -    )
> -    .inspect_err(|_| dev_err!(dev, "GPU reset failed."))?;
> -
> -    Ok(())
> -}
> -
> kernel::of_device_table!(
>     OF_TABLE,
>     MODULE_OF_TABLE,
> @@ -138,8 +119,7 @@ fn probe(
>         let request = pdev.io_request_by_index(0).ok_or(ENODEV)?;
>         let iomem = Arc::pin_init(request.iomap_sized::<SZ_2M>(), GFP_KERNEL)?;
> 
> -        issue_soft_reset(pdev.as_ref(), &iomem)?;
> -        gpu::l2_power_on(pdev.as_ref(), &iomem)?;
> +        reset::run_reset(pdev.as_ref(), &iomem)?;
> 
>         let gpu_info = GpuInfo::new(pdev.as_ref(), &iomem)?;
>         gpu_info.log(pdev);
> @@ -153,6 +133,7 @@ fn probe(
> 
>         let uninit_ddev = UnregisteredDevice::<TyrDrmDriver>::new(pdev.as_ref())?;
>         let platform: ARef<platform::Device> = pdev.into();
> +        let reset = reset::ResetHandle::new(platform.clone(), iomem.clone())?;
> 
>         let mmu = Mmu::new(pdev, iomem.as_arc_borrow(), &gpu_info)?;
> 
> @@ -178,6 +159,7 @@ fn probe(
>                     _mali: mali_regulator,
>                     _sram: sram_regulator,
>                 }),
> +                reset,
>                 gpu_info,
>         });
>         let ddev = Registration::new_foreign_owned(uninit_ddev, pdev.as_ref(), data, 0)?;
> diff --git a/drivers/gpu/drm/tyr/reset.rs b/drivers/gpu/drm/tyr/reset.rs
> new file mode 100644
> index 000000000000..29dfae98b0dd
> --- /dev/null
> +++ b/drivers/gpu/drm/tyr/reset.rs
> @@ -0,0 +1,180 @@
> +// SPDX-License-Identifier: GPL-2.0 or MIT
> +
> +//! Provides asynchronous reset handling for the Tyr DRM driver via
> +//! [`ResetHandle`], which runs reset work on a dedicated ordered
> +//! workqueue and avoids duplicate pending resets.
> +
> +use kernel::{
> +    device::{
> +        Bound,
> +        Device, //
> +    },
> +    devres::Devres,
> +    io::poll,
> +    platform,
> +    prelude::*,
> +    sync::{
> +        aref::ARef,
> +        atomic::{
> +            Acquire,
> +            Atomic,
> +            Relaxed,
> +            Release, //
> +        },
> +        Arc,
> +    },
> +    time,
> +    workqueue::{
> +        self,
> +        Work, //
> +    },
> +};
> +
> +use crate::{
> +    driver::IoMem,
> +    gpu,
> +    regs, //
> +};
> +
> +/// Manages asynchronous GPU reset handling and ensures only a single reset
> +/// work is pending at a time.
> +#[pin_data]
> +struct Controller {
> +    /// Platform device reference needed for reset operations and logging.
> +    pdev: ARef<platform::Device>,
> +    /// Mapped register space needed for reset operations.
> +    iomem: Arc<Devres<IoMem>>,
> +    /// Atomic flag for controlling the scheduling pending state.
> +    pending: Atomic<bool>,
> +    /// Dedicated ordered workqueue for reset operations.
> +    wq: workqueue::OrderedQueue,
> +    /// Work item backing async reset processing.
> +    #[pin]
> +    work: Work<Controller>,
> +}
> +
> +kernel::impl_has_work! {
> +    impl HasWork<Controller> for Controller { self.work }
> +}
> +
> +impl workqueue::WorkItem for Controller {
> +    type Pointer = Arc<Self>;
> +
> +    fn run(this: Arc<Self>) {
> +        this.reset_work();
> +    }
> +}
> +
> +impl Controller {
> +    /// Creates a [`Controller`] instance.
> +    fn new(pdev: ARef<platform::Device>, iomem: Arc<Devres<IoMem>>) -> Result<Arc<Self>> {
> +        let wq = workqueue::OrderedQueue::new(c"tyr-reset-wq", 0)?;
> +
> +        Arc::pin_init(
> +            try_pin_init!(Self {
> +                pdev,
> +                iomem,
> +                pending: Atomic::new(false),
> +                wq,
> +                work <- kernel::new_work!("tyr::reset"),
> +            }),
> +            GFP_KERNEL,
> +        )
> +    }
> +
> +    /// Processes one scheduled reset request.
> +    ///
> +    /// Panthor reference:
> +    /// - drivers/gpu/drm/panthor/panthor_device.c::panthor_device_reset_work()
> +    fn reset_work(self: &Arc<Self>) {
> +        dev_info!(self.pdev.as_ref(), "GPU reset work is started.\n");
> +
> +        // SAFETY: `Controller` is part of driver-private data and only exists
> +        // while the platform device is bound.
> +        let pdev = unsafe { self.pdev.as_ref().as_bound() };
> +        if let Err(e) = run_reset(pdev, &self.iomem) {
> +            dev_err!(self.pdev.as_ref(), "GPU reset failed: {:?}\n", e);
> +        } else {
> +            dev_info!(self.pdev.as_ref(), "GPU reset work is done.\n");
> +        }

Can we have more descriptive strings here? A user cares little for
implementation details such as “reset work”, what they care about is
that the hardware is undergoing a reset.

> +
> +        self.pending.store(false, Release);
> +    }
> +}
> +
> +/// Reset handle that shuts down pending work gracefully on drop.
> +pub(crate) struct ResetHandle(Arc<Controller>);
> +

Why is this an Arc? There seems to be a single owner?

> +impl ResetHandle {
> +    /// Creates a [`ResetHandle`] instance.
> +    pub(crate) fn new(pdev: ARef<platform::Device>, iomem: Arc<Devres<IoMem>>) -> Result<Self> {
> +        Ok(Self(Controller::new(pdev, iomem)?))
> +    }
> +
> +    /// Schedules reset work.
> +    #[expect(dead_code)]
> +    pub(crate) fn schedule(&self) {
> +        // TODO: Similar to `panthor_device_schedule_reset()` in Panthor, add a
> +        // power management check once Tyr supports it.
> +
> +        // Keep only one reset request running or queued. If one is already pending,
> +        // we ignore new schedule requests.
> +        if self.0.pending.cmpxchg(false, true, Relaxed).is_ok()
> +            && self.0.wq.enqueue(self.0.clone()).is_err()
> +        {
> +            self.0.pending.store(false, Release);
> +        }
> +    }
> +
> +    /// Returns true if a reset is queued or in progress.
> +    ///
> +    /// Note that the state can change immediately after the return.
> +    #[inline]
> +    #[expect(dead_code)]
> +    pub(crate) fn is_pending(&self) -> bool {
> +        self.0.pending.load(Acquire)
> +    }

> +}
> +
> +impl Drop for ResetHandle {
> +    fn drop(&mut self) {
> +        // Drain queued/running work and block future queueing attempts for this
> +        // work item before clocks/regulators are torn down.
> +        // SAFETY: drop executes in a sleepable context.
> +        unsafe { self.0.work.disable_sync() };
> +    }
> +}
> +
> +/// Issues a soft reset command and waits for reset-complete IRQ status.
> +fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
> +    // Clear any stale reset-complete IRQ state before issuing a new soft reset.
> +    regs::GPU_IRQ_CLEAR.write(dev, iomem, regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED)?;
> +    regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?;
> +
> +    poll::read_poll_timeout(
> +        || regs::GPU_IRQ_RAWSTAT.read(dev, iomem),
> +        |status| *status & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED != 0,
> +        time::Delta::from_millis(1),
> +        time::Delta::from_millis(100),
> +    )
> +    .inspect_err(|_| dev_err!(dev, "GPU reset failed."))?;
> +
> +    Ok(())
> +}
> +
> +/// Runs one synchronous GPU reset pass.
> +///
> +/// Its visibility is `pub(super)` only so the probe path can run an
> +/// initial reset; it is not part of this module's public API.
> +///
> +/// On success, the GPU is left in a state suitable for reinitialization.
> +///
> +/// The reset sequence is as follows:
> +/// 1. Trigger a GPU soft reset.
> +/// 2. Wait for the reset-complete IRQ status.
> +/// 3. Power L2 back on.
> +pub(super) fn run_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) -> Result {
> +    issue_soft_reset(dev, iomem)?;
> +    gpu::l2_power_on(dev, iomem)?;
> +    Ok(())
> +}
> diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs
> index 18b0668bb217..d0349bc49f27 100644
> --- a/drivers/gpu/drm/tyr/tyr.rs
> +++ b/drivers/gpu/drm/tyr/tyr.rs
> @@ -14,6 +14,7 @@
> mod gpu;
> mod mmu;
> mod regs;
> +mod reset;
> mod slot;
> mod vm;
> 
> -- 
> 2.51.2
>

Re: [PATCH v1 RESEND 4/4] drm/tyr: add GPU reset handling

Posted by Onur Özkan 3 weeks, 2 days ago

On Fri, 13 Mar 2026 11:56:58 -0300
Daniel Almeida <daniel.almeida@collabora.com> wrote:

> 
> 
> > On 13 Mar 2026, at 06:16, Onur Özkan <work@onurozkan.dev> wrote:
> > 
> > Move Tyr reset logic into a new reset module and add async reset
> > work.
> > 
> > This adds:
> > - ResetHandle with internal controller state
> > - a dedicated ordered reset workqueue
> > - a pending flag to avoid duplicate queued resets
> > - run_reset() as the shared synchronous reset helper
> > 
> > Probe now calls reset::run_reset() before normal init. Driver data
> > now keeps ResetHandle so reset work is drained before clocks and
> > regulators are dropped.
> > 
> > Tested-by: Deborah Brouwer <deborah.brouwer@collabora.com>
> > Signed-off-by: Onur Özkan <work@onurozkan.dev>
> > ---
> > drivers/gpu/drm/tyr/driver.rs |  40 +++-----
> > drivers/gpu/drm/tyr/reset.rs  | 180
> > ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/tyr/tyr.rs    |
> >  1 + 3 files changed, 192 insertions(+), 29 deletions(-)
> > create mode 100644 drivers/gpu/drm/tyr/reset.rs
> > 
> > diff --git a/drivers/gpu/drm/tyr/driver.rs
> > b/drivers/gpu/drm/tyr/driver.rs index f7951804e4e0..c80238a21ff2
> > 100644 --- a/drivers/gpu/drm/tyr/driver.rs
> > +++ b/drivers/gpu/drm/tyr/driver.rs
> > @@ -6,11 +6,8 @@
> >         OptionalClk, //
> >     },
> >     device::{
> > -        Bound,
> > -        Core,
> > -        Device, //
> > +        Core, //
> >     },
> > -    devres::Devres,
> >     dma::{
> >         Device as DmaDevice,
> >         DmaMask, //
> > @@ -22,10 +19,7 @@
> >         Registered,
> >         UnregisteredDevice, //
> >     },
> > -    io::poll,
> > -    new_mutex,
> > -    of,
> > -    platform,
> > +    new_mutex, of, platform,
> >     prelude::*,
> >     regulator,
> >     regulator::Regulator,
> > @@ -35,17 +29,15 @@
> >         Arc,
> >         Mutex, //
> >     },
> > -    time, //
> > };
> > 
> > use crate::{
> >     file::TyrDrmFileData,
> >     fw::Firmware,
> >     gem::BoData,
> > -    gpu,
> >     gpu::GpuInfo,
> >     mmu::Mmu,
> > -    regs, //
> > +    reset, //
> > };
> > 
> > pub(crate) type IoMem = kernel::io::mem::IoMem<SZ_2M>;
> > @@ -62,6 +54,11 @@ pub(crate) struct TyrPlatformDriverData {
> > 
> > #[pin_data]
> > pub(crate) struct TyrDrmDeviceData {
> > +    // `ResetHandle::drop()` drains queued/running works and this
> > must happen
> > +    // before clocks/regulators are dropped. So keep this field
> > before them to
> > +    // ensure the correct drop order.
> > +    pub(crate) reset: reset::ResetHandle,
> > +
> >     pub(crate) pdev: ARef<platform::Device>,
> > 
> >     pub(crate) fw: Arc<Firmware>,
> > @@ -90,22 +87,6 @@ unsafe impl Send for TyrDrmDeviceData {}
> > // SAFETY: This will be removed in a future patch.
> > unsafe impl Sync for TyrDrmDeviceData {}
> > 
> > -fn issue_soft_reset(dev: &Device<Bound>, iomem: &Devres<IoMem>) ->
> > Result {
> > -    // Clear any stale reset-complete IRQ state before issuing a
> > new soft reset.
> > -    regs::GPU_IRQ_CLEAR.write(dev, iomem,
> > regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED)?;
> > -    regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?;
> > -
> > -    poll::read_poll_timeout(
> > -        || regs::GPU_IRQ_RAWSTAT.read(dev, iomem),
> > -        |status| *status & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED
> > != 0,
> > -        time::Delta::from_millis(1),
> > -        time::Delta::from_millis(100),
> > -    )
> > -    .inspect_err(|_| dev_err!(dev, "GPU reset failed."))?;
> > -
> > -    Ok(())
> > -}
> > -
> > kernel::of_device_table!(
> >     OF_TABLE,
> >     MODULE_OF_TABLE,
> > @@ -138,8 +119,7 @@ fn probe(
> >         let request = pdev.io_request_by_index(0).ok_or(ENODEV)?;
> >         let iomem = Arc::pin_init(request.iomap_sized::<SZ_2M>(),
> > GFP_KERNEL)?;
> > 
> > -        issue_soft_reset(pdev.as_ref(), &iomem)?;
> > -        gpu::l2_power_on(pdev.as_ref(), &iomem)?;
> > +        reset::run_reset(pdev.as_ref(), &iomem)?;
> > 
> >         let gpu_info = GpuInfo::new(pdev.as_ref(), &iomem)?;
> >         gpu_info.log(pdev);
> > @@ -153,6 +133,7 @@ fn probe(
> > 
> >         let uninit_ddev =
> > UnregisteredDevice::<TyrDrmDriver>::new(pdev.as_ref())?; let
> > platform: ARef<platform::Device> = pdev.into();
> > +        let reset = reset::ResetHandle::new(platform.clone(),
> > iomem.clone())?;
> > 
> >         let mmu = Mmu::new(pdev, iomem.as_arc_borrow(), &gpu_info)?;
> > 
> > @@ -178,6 +159,7 @@ fn probe(
> >                     _mali: mali_regulator,
> >                     _sram: sram_regulator,
> >                 }),
> > +                reset,
> >                 gpu_info,
> >         });
> >         let ddev = Registration::new_foreign_owned(uninit_ddev,
> > pdev.as_ref(), data, 0)?; diff --git a/drivers/gpu/drm/tyr/reset.rs
> > b/drivers/gpu/drm/tyr/reset.rs new file mode 100644
> > index 000000000000..29dfae98b0dd
> > --- /dev/null
> > +++ b/drivers/gpu/drm/tyr/reset.rs
> > @@ -0,0 +1,180 @@
> > +// SPDX-License-Identifier: GPL-2.0 or MIT
> > +
> > +//! Provides asynchronous reset handling for the Tyr DRM driver via
> > +//! [`ResetHandle`], which runs reset work on a dedicated ordered
> > +//! workqueue and avoids duplicate pending resets.
> > +
> > +use kernel::{
> > +    device::{
> > +        Bound,
> > +        Device, //
> > +    },
> > +    devres::Devres,
> > +    io::poll,
> > +    platform,
> > +    prelude::*,
> > +    sync::{
> > +        aref::ARef,
> > +        atomic::{
> > +            Acquire,
> > +            Atomic,
> > +            Relaxed,
> > +            Release, //
> > +        },
> > +        Arc,
> > +    },
> > +    time,
> > +    workqueue::{
> > +        self,
> > +        Work, //
> > +    },
> > +};
> > +
> > +use crate::{
> > +    driver::IoMem,
> > +    gpu,
> > +    regs, //
> > +};
> > +
> > +/// Manages asynchronous GPU reset handling and ensures only a
> > single reset +/// work is pending at a time.
> > +#[pin_data]
> > +struct Controller {
> > +    /// Platform device reference needed for reset operations and
> > logging.
> > +    pdev: ARef<platform::Device>,
> > +    /// Mapped register space needed for reset operations.
> > +    iomem: Arc<Devres<IoMem>>,
> > +    /// Atomic flag for controlling the scheduling pending state.
> > +    pending: Atomic<bool>,
> > +    /// Dedicated ordered workqueue for reset operations.
> > +    wq: workqueue::OrderedQueue,
> > +    /// Work item backing async reset processing.
> > +    #[pin]
> > +    work: Work<Controller>,
> > +}
> > +
> > +kernel::impl_has_work! {
> > +    impl HasWork<Controller> for Controller { self.work }
> > +}
> > +
> > +impl workqueue::WorkItem for Controller {
> > +    type Pointer = Arc<Self>;
> > +
> > +    fn run(this: Arc<Self>) {
> > +        this.reset_work();
> > +    }
> > +}
> > +
> > +impl Controller {
> > +    /// Creates a [`Controller`] instance.
> > +    fn new(pdev: ARef<platform::Device>, iomem:
> > Arc<Devres<IoMem>>) -> Result<Arc<Self>> {
> > +        let wq = workqueue::OrderedQueue::new(c"tyr-reset-wq", 0)?;
> > +
> > +        Arc::pin_init(
> > +            try_pin_init!(Self {
> > +                pdev,
> > +                iomem,
> > +                pending: Atomic::new(false),
> > +                wq,
> > +                work <- kernel::new_work!("tyr::reset"),
> > +            }),
> > +            GFP_KERNEL,
> > +        )
> > +    }
> > +
> > +    /// Processes one scheduled reset request.
> > +    ///
> > +    /// Panthor reference:
> > +    /// -
> > drivers/gpu/drm/panthor/panthor_device.c::panthor_device_reset_work()
> > +    fn reset_work(self: &Arc<Self>) {
> > +        dev_info!(self.pdev.as_ref(), "GPU reset work is
> > started.\n"); +
> > +        // SAFETY: `Controller` is part of driver-private data and
> > only exists
> > +        // while the platform device is bound.
> > +        let pdev = unsafe { self.pdev.as_ref().as_bound() };
> > +        if let Err(e) = run_reset(pdev, &self.iomem) {
> > +            dev_err!(self.pdev.as_ref(), "GPU reset failed:
> > {:?}\n", e);
> > +        } else {
> > +            dev_info!(self.pdev.as_ref(), "GPU reset work is
> > done.\n");
> > +        }
> 
> Can we have more descriptive strings here? A user cares little for
> implementation details such as “reset work”, what they care about is
> that the hardware is undergoing a reset.
> 

Sure, I will update it.

> > +
> > +        self.pending.store(false, Release);
> > +    }
> > +}
> > +
> > +/// Reset handle that shuts down pending work gracefully on drop.
> > +pub(crate) struct ResetHandle(Arc<Controller>);
> > +
> 
> Why is this an Arc? There seems to be a single owner?
> 

Once we queue reset work, the workqueue needs its own ref so Controller
stays alive until the worker runs and returns. ResetHandle keeps the
normal ref and the queued work holds the extra one.

Regards,
Onur

> > +impl ResetHandle {
> > +    /// Creates a [`ResetHandle`] instance.
> > +    pub(crate) fn new(pdev: ARef<platform::Device>, iomem:
> > Arc<Devres<IoMem>>) -> Result<Self> {
> > +        Ok(Self(Controller::new(pdev, iomem)?))
> > +    }
> > +
> > +    /// Schedules reset work.
> > +    #[expect(dead_code)]
> > +    pub(crate) fn schedule(&self) {
> > +        // TODO: Similar to `panthor_device_schedule_reset()` in
> > Panthor, add a
> > +        // power management check once Tyr supports it.
> > +
> > +        // Keep only one reset request running or queued. If one
> > is already pending,
> > +        // we ignore new schedule requests.
> > +        if self.0.pending.cmpxchg(false, true, Relaxed).is_ok()
> > +            && self.0.wq.enqueue(self.0.clone()).is_err()
> > +        {
> > +            self.0.pending.store(false, Release);
> > +        }
> > +    }
> > +
> > +    /// Returns true if a reset is queued or in progress.
> > +    ///
> > +    /// Note that the state can change immediately after the
> > return.
> > +    #[inline]
> > +    #[expect(dead_code)]
> > +    pub(crate) fn is_pending(&self) -> bool {
> > +        self.0.pending.load(Acquire)
> > +    }
> 
> > +}
> > +
> > +impl Drop for ResetHandle {
> > +    fn drop(&mut self) {
> > +        // Drain queued/running work and block future queueing
> > attempts for this
> > +        // work item before clocks/regulators are torn down.
> > +        // SAFETY: drop executes in a sleepable context.
> > +        unsafe { self.0.work.disable_sync() };
> > +    }
> > +}
> > +
> > +/// Issues a soft reset command and waits for reset-complete IRQ
> > status. +fn issue_soft_reset(dev: &Device<Bound>, iomem:
> > &Devres<IoMem>) -> Result {
> > +    // Clear any stale reset-complete IRQ state before issuing a
> > new soft reset.
> > +    regs::GPU_IRQ_CLEAR.write(dev, iomem,
> > regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED)?;
> > +    regs::GPU_CMD.write(dev, iomem, regs::GPU_CMD_SOFT_RESET)?;
> > +
> > +    poll::read_poll_timeout(
> > +        || regs::GPU_IRQ_RAWSTAT.read(dev, iomem),
> > +        |status| *status & regs::GPU_IRQ_RAWSTAT_RESET_COMPLETED
> > != 0,
> > +        time::Delta::from_millis(1),
> > +        time::Delta::from_millis(100),
> > +    )
> > +    .inspect_err(|_| dev_err!(dev, "GPU reset failed."))?;
> > +
> > +    Ok(())
> > +}
> > +
> > +/// Runs one synchronous GPU reset pass.
> > +///
> > +/// Its visibility is `pub(super)` only so the probe path can run
> > an +/// initial reset; it is not part of this module's public API.
> > +///
> > +/// On success, the GPU is left in a state suitable for
> > reinitialization. +///
> > +/// The reset sequence is as follows:
> > +/// 1. Trigger a GPU soft reset.
> > +/// 2. Wait for the reset-complete IRQ status.
> > +/// 3. Power L2 back on.
> > +pub(super) fn run_reset(dev: &Device<Bound>, iomem:
> > &Devres<IoMem>) -> Result {
> > +    issue_soft_reset(dev, iomem)?;
> > +    gpu::l2_power_on(dev, iomem)?;
> > +    Ok(())
> > +}
> > diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs
> > index 18b0668bb217..d0349bc49f27 100644
> > --- a/drivers/gpu/drm/tyr/tyr.rs
> > +++ b/drivers/gpu/drm/tyr/tyr.rs
> > @@ -14,6 +14,7 @@
> > mod gpu;
> > mod mmu;
> > mod regs;
> > +mod reset;
> > mod slot;
> > mod vm;
> > 
> > -- 
> > 2.51.2
> > 
> 
>

[PATCH v1 RESEND 1/4] drm/tyr: clear reset IRQ before soft reset
[PATCH v1 RESEND 2/4] rust: add Work::disable_sync
[PATCH v1 RESEND 3/4] rust: add ordered workqueue wrapper
[PATCH v1 RESEND 4/4] drm/tyr: add GPU reset handling