[PATCH v3] drivers: hv: vmbus: Use kthread for vmbus interrupts on PREEMPT_RT

Jan Kiszka posted 1 patch 2 weeks, 2 days ago
drivers/hv/vmbus_drv.c | 66 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 65 insertions(+), 1 deletion(-)
[PATCH v3] drivers: hv: vmbus: Use kthread for vmbus interrupts on PREEMPT_RT
Posted by Jan Kiszka 2 weeks, 2 days ago
From: Jan Kiszka <jan.kiszka@siemens.com>

Resolves the following lockdep report when booting PREEMPT_RT on Hyper-V
with related guest support enabled:

[    1.127941] hv_vmbus: registering driver hyperv_drm

[    1.132518] =============================
[    1.132519] [ BUG: Invalid wait context ]
[    1.132521] 6.19.0-rc8+ #9 Not tainted
[    1.132524] -----------------------------
[    1.132525] swapper/0/0 is trying to lock:
[    1.132526] ffff8b9381bb3c90 (&channel->sched_lock){....}-{3:3}, at: vmbus_chan_sched+0xc4/0x2b0
[    1.132543] other info that might help us debug this:
[    1.132544] context-{2:2}
[    1.132545] 1 lock held by swapper/0/0:
[    1.132547]  #0: ffffffffa010c4c0 (rcu_read_lock){....}-{1:3}, at: vmbus_chan_sched+0x31/0x2b0
[    1.132557] stack backtrace:
[    1.132560] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.19.0-rc8+ #9 PREEMPT_{RT,(lazy)}
[    1.132565] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 09/25/2025
[    1.132567] Call Trace:
[    1.132570]  <IRQ>
[    1.132573]  dump_stack_lvl+0x6e/0xa0
[    1.132581]  __lock_acquire+0xee0/0x21b0
[    1.132592]  lock_acquire+0xd5/0x2d0
[    1.132598]  ? vmbus_chan_sched+0xc4/0x2b0
[    1.132606]  ? lock_acquire+0xd5/0x2d0
[    1.132613]  ? vmbus_chan_sched+0x31/0x2b0
[    1.132619]  rt_spin_lock+0x3f/0x1f0
[    1.132623]  ? vmbus_chan_sched+0xc4/0x2b0
[    1.132629]  ? vmbus_chan_sched+0x31/0x2b0
[    1.132634]  vmbus_chan_sched+0xc4/0x2b0
[    1.132641]  vmbus_isr+0x2c/0x150
[    1.132648]  __sysvec_hyperv_callback+0x5f/0xa0
[    1.132654]  sysvec_hyperv_callback+0x88/0xb0
[    1.132658]  </IRQ>
[    1.132659]  <TASK>
[    1.132660]  asm_sysvec_hyperv_callback+0x1a/0x20

As code paths that handle vmbus IRQs use sleepy locks under PREEMPT_RT,
the vmbus_isr execution needs to be moved into thread context. Open-
coding this allows to skip the IPI that irq_work would additionally
bring and which we do not need, being an IRQ, never an NMI.

This affects both x86 and arm64, therefore hook into the common driver
logic.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---

Changes in v3:
 - move logic to generic vmbus driver, targeting arm64 as well
 - annotate non-RT path with lockdep_hardirq_threaded
 - only teardown if setup ran

Changes in v2:
 - reorder vmbus_irq_pending clearing to fix a race condition

 drivers/hv/vmbus_drv.c | 66 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 6785ad63a9cb..749a2e68af05 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -25,6 +25,7 @@
 #include <linux/cpu.h>
 #include <linux/sched/isolation.h>
 #include <linux/sched/task_stack.h>
+#include <linux/smpboot.h>
 
 #include <linux/delay.h>
 #include <linux/panic_notifier.h>
@@ -1350,7 +1351,7 @@ static void vmbus_message_sched(struct hv_per_cpu_context *hv_cpu, void *message
 	}
 }
 
-void vmbus_isr(void)
+static void __vmbus_isr(void)
 {
 	struct hv_per_cpu_context *hv_cpu
 		= this_cpu_ptr(hv_context.cpu_context);
@@ -1363,6 +1364,53 @@ void vmbus_isr(void)
 
 	add_interrupt_randomness(vmbus_interrupt);
 }
+
+static DEFINE_PER_CPU(bool, vmbus_irq_pending);
+static DEFINE_PER_CPU(struct task_struct *, vmbus_irqd);
+
+static void vmbus_irqd_wake(void)
+{
+	struct task_struct *tsk = __this_cpu_read(vmbus_irqd);
+
+	__this_cpu_write(vmbus_irq_pending, true);
+	wake_up_process(tsk);
+}
+
+static void vmbus_irqd_setup(unsigned int cpu)
+{
+	sched_set_fifo(current);
+}
+
+static int vmbus_irqd_should_run(unsigned int cpu)
+{
+	return __this_cpu_read(vmbus_irq_pending);
+}
+
+static void run_vmbus_irqd(unsigned int cpu)
+{
+	__this_cpu_write(vmbus_irq_pending, false);
+	__vmbus_isr();
+}
+
+static bool vmbus_irq_initialized;
+
+static struct smp_hotplug_thread vmbus_irq_threads = {
+	.store                  = &vmbus_irqd,
+	.setup			= vmbus_irqd_setup,
+	.thread_should_run      = vmbus_irqd_should_run,
+	.thread_fn              = run_vmbus_irqd,
+	.thread_comm            = "vmbus_irq/%u",
+};
+
+void vmbus_isr(void)
+{
+	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+		vmbus_irqd_wake();
+	} else {
+		lockdep_hardirq_threaded();
+		__vmbus_isr();
+	}
+}
 EXPORT_SYMBOL_FOR_MODULES(vmbus_isr, "mshv_vtl");
 
 static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
@@ -1462,6 +1510,13 @@ static int vmbus_bus_init(void)
 	 * the VMbus interrupt handler.
 	 */
 
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && !vmbus_irq_initialized) {
+		ret = smpboot_register_percpu_thread(&vmbus_irq_threads);
+		if (ret)
+			goto err_kthread;
+		vmbus_irq_initialized = true;
+	}
+
 	if (vmbus_irq == -1) {
 		hv_setup_vmbus_handler(vmbus_isr);
 	} else {
@@ -1507,6 +1562,11 @@ static int vmbus_bus_init(void)
 		free_percpu(vmbus_evt);
 	}
 err_setup:
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
+		smpboot_unregister_percpu_thread(&vmbus_irq_threads);
+		vmbus_irq_initialized = false;
+	}
+err_kthread:
 	bus_unregister(&hv_bus);
 	return ret;
 }
@@ -2976,6 +3036,10 @@ static void __exit vmbus_exit(void)
 		free_percpu_irq(vmbus_irq, vmbus_evt);
 		free_percpu(vmbus_evt);
 	}
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) {
+		smpboot_unregister_percpu_thread(&vmbus_irq_threads);
+		vmbus_irq_initialized = false;
+	}
 	for_each_online_cpu(cpu) {
 		struct hv_per_cpu_context *hv_cpu
 			= per_cpu_ptr(hv_context.cpu_context, cpu);
-- 
2.47.3
Re: [PATCH v3] drivers: hv: vmbus: Use kthread for vmbus interrupts on PREEMPT_RT
Posted by Bezdeka, Florian 2 weeks, 1 day ago
On Mon, 2026-02-16 at 17:24 +0100, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> Resolves the following lockdep report when booting PREEMPT_RT on Hyper-V
> with related guest support enabled:
> 
> [    1.127941] hv_vmbus: registering driver hyperv_drm
> 
> [    1.132518] =============================
> [    1.132519] [ BUG: Invalid wait context ]
> [    1.132521] 6.19.0-rc8+ #9 Not tainted
> [    1.132524] -----------------------------
> [    1.132525] swapper/0/0 is trying to lock:
> [    1.132526] ffff8b9381bb3c90 (&channel->sched_lock){....}-{3:3}, at: vmbus_chan_sched+0xc4/0x2b0
> [    1.132543] other info that might help us debug this:
> [    1.132544] context-{2:2}
> [    1.132545] 1 lock held by swapper/0/0:
> [    1.132547]  #0: ffffffffa010c4c0 (rcu_read_lock){....}-{1:3}, at: vmbus_chan_sched+0x31/0x2b0
> [    1.132557] stack backtrace:
> [    1.132560] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.19.0-rc8+ #9 PREEMPT_{RT,(lazy)}
> [    1.132565] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 09/25/2025
> [    1.132567] Call Trace:
> [    1.132570]  <IRQ>
> [    1.132573]  dump_stack_lvl+0x6e/0xa0
> [    1.132581]  __lock_acquire+0xee0/0x21b0
> [    1.132592]  lock_acquire+0xd5/0x2d0
> [    1.132598]  ? vmbus_chan_sched+0xc4/0x2b0
> [    1.132606]  ? lock_acquire+0xd5/0x2d0
> [    1.132613]  ? vmbus_chan_sched+0x31/0x2b0
> [    1.132619]  rt_spin_lock+0x3f/0x1f0
> [    1.132623]  ? vmbus_chan_sched+0xc4/0x2b0
> [    1.132629]  ? vmbus_chan_sched+0x31/0x2b0
> [    1.132634]  vmbus_chan_sched+0xc4/0x2b0
> [    1.132641]  vmbus_isr+0x2c/0x150
> [    1.132648]  __sysvec_hyperv_callback+0x5f/0xa0
> [    1.132654]  sysvec_hyperv_callback+0x88/0xb0
> [    1.132658]  </IRQ>
> [    1.132659]  <TASK>
> [    1.132660]  asm_sysvec_hyperv_callback+0x1a/0x20
> 
> As code paths that handle vmbus IRQs use sleepy locks under PREEMPT_RT,
> the vmbus_isr execution needs to be moved into thread context. Open-
> coding this allows to skip the IPI that irq_work would additionally
> bring and which we do not need, being an IRQ, never an NMI.
> 
> This affects both x86 and arm64, therefore hook into the common driver
> logic.

I tested this patch in combination with the related SCSI driver patch.
The tests were done on x86 with both VM generations provided by Hyper-v.

Lockdep was enabled and there were no splat reports within 24 hours of
massive load produced by stress-ng.

With that:

Reviewed-by: Florian Bezdeka <florian.bezdeka@siemens.com>
Tested-by: Florian Bezdeka <florian.bezdeka@siemens.com>


Side note: We did some backports down to 6.1 already, just in case
someone is interested. We recognized a massive network performance drop
in 6.1. The root cause has been identified and is not related to this
patch. It's simply another RT regression caused by a missing stable-rt
backport. Upstreaming in progress...

Best regards,
Florian
Re: [PATCH v3] drivers: hv: vmbus: Use kthread for vmbus interrupts on PREEMPT_RT
Posted by Jan Kiszka 2 weeks ago
On 18.02.26 00:03, Bezdeka, Florian (FT RPD CED OES-DE) wrote:
> On Mon, 2026-02-16 at 17:24 +0100, Jan Kiszka wrote:
>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>
>> Resolves the following lockdep report when booting PREEMPT_RT on Hyper-V
>> with related guest support enabled:
>>
>> [    1.127941] hv_vmbus: registering driver hyperv_drm
>>
>> [    1.132518] =============================
>> [    1.132519] [ BUG: Invalid wait context ]
>> [    1.132521] 6.19.0-rc8+ #9 Not tainted
>> [    1.132524] -----------------------------
>> [    1.132525] swapper/0/0 is trying to lock:
>> [    1.132526] ffff8b9381bb3c90 (&channel->sched_lock){....}-{3:3}, at: vmbus_chan_sched+0xc4/0x2b0
>> [    1.132543] other info that might help us debug this:
>> [    1.132544] context-{2:2}
>> [    1.132545] 1 lock held by swapper/0/0:
>> [    1.132547]  #0: ffffffffa010c4c0 (rcu_read_lock){....}-{1:3}, at: vmbus_chan_sched+0x31/0x2b0
>> [    1.132557] stack backtrace:
>> [    1.132560] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.19.0-rc8+ #9 PREEMPT_{RT,(lazy)}
>> [    1.132565] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 09/25/2025
>> [    1.132567] Call Trace:
>> [    1.132570]  <IRQ>
>> [    1.132573]  dump_stack_lvl+0x6e/0xa0
>> [    1.132581]  __lock_acquire+0xee0/0x21b0
>> [    1.132592]  lock_acquire+0xd5/0x2d0
>> [    1.132598]  ? vmbus_chan_sched+0xc4/0x2b0
>> [    1.132606]  ? lock_acquire+0xd5/0x2d0
>> [    1.132613]  ? vmbus_chan_sched+0x31/0x2b0
>> [    1.132619]  rt_spin_lock+0x3f/0x1f0
>> [    1.132623]  ? vmbus_chan_sched+0xc4/0x2b0
>> [    1.132629]  ? vmbus_chan_sched+0x31/0x2b0
>> [    1.132634]  vmbus_chan_sched+0xc4/0x2b0
>> [    1.132641]  vmbus_isr+0x2c/0x150
>> [    1.132648]  __sysvec_hyperv_callback+0x5f/0xa0
>> [    1.132654]  sysvec_hyperv_callback+0x88/0xb0
>> [    1.132658]  </IRQ>
>> [    1.132659]  <TASK>
>> [    1.132660]  asm_sysvec_hyperv_callback+0x1a/0x20
>>
>> As code paths that handle vmbus IRQs use sleepy locks under PREEMPT_RT,
>> the vmbus_isr execution needs to be moved into thread context. Open-
>> coding this allows to skip the IPI that irq_work would additionally
>> bring and which we do not need, being an IRQ, never an NMI.
>>
>> This affects both x86 and arm64, therefore hook into the common driver
>> logic.
> 
> I tested this patch in combination with the related SCSI driver patch.
> The tests were done on x86 with both VM generations provided by Hyper-v.
> 
> Lockdep was enabled and there were no splat reports within 24 hours of
> massive load produced by stress-ng.
> 
> With that:
> 
> Reviewed-by: Florian Bezdeka <florian.bezdeka@siemens.com>
> Tested-by: Florian Bezdeka <florian.bezdeka@siemens.com>
> 
> 
> Side note: We did some backports down to 6.1 already, just in case
> someone is interested. We recognized a massive network performance drop
> in 6.1. The root cause has been identified and is not related to this
> patch. It's simply another RT regression caused by a missing stable-rt
> backport. Upstreaming in progress...
> 

Submitted:
https://lore.kernel.org/stable/05ae6b87-0b53-4948-a1ed-2a3235a5f82b@siemens.com/T/#u

Jan

-- 
Siemens AG, Foundational Technologies
Linux Expert Center
Re: [PATCH v3] drivers: hv: vmbus: Use kthread for vmbus interrupts on PREEMPT_RT
Posted by Wei Liu 2 weeks ago
On Mon, Feb 16, 2026 at 05:24:56PM +0100, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> Resolves the following lockdep report when booting PREEMPT_RT on Hyper-V
> with related guest support enabled:
> 
> [    1.127941] hv_vmbus: registering driver hyperv_drm
> 
> [    1.132518] =============================
> [    1.132519] [ BUG: Invalid wait context ]
> [    1.132521] 6.19.0-rc8+ #9 Not tainted
> [    1.132524] -----------------------------
> [    1.132525] swapper/0/0 is trying to lock:
> [    1.132526] ffff8b9381bb3c90 (&channel->sched_lock){....}-{3:3}, at: vmbus_chan_sched+0xc4/0x2b0
> [    1.132543] other info that might help us debug this:
> [    1.132544] context-{2:2}
> [    1.132545] 1 lock held by swapper/0/0:
> [    1.132547]  #0: ffffffffa010c4c0 (rcu_read_lock){....}-{1:3}, at: vmbus_chan_sched+0x31/0x2b0
> [    1.132557] stack backtrace:
> [    1.132560] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.19.0-rc8+ #9 PREEMPT_{RT,(lazy)}
> [    1.132565] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 09/25/2025
> [    1.132567] Call Trace:
> [    1.132570]  <IRQ>
> [    1.132573]  dump_stack_lvl+0x6e/0xa0
> [    1.132581]  __lock_acquire+0xee0/0x21b0
> [    1.132592]  lock_acquire+0xd5/0x2d0
> [    1.132598]  ? vmbus_chan_sched+0xc4/0x2b0
> [    1.132606]  ? lock_acquire+0xd5/0x2d0
> [    1.132613]  ? vmbus_chan_sched+0x31/0x2b0
> [    1.132619]  rt_spin_lock+0x3f/0x1f0
> [    1.132623]  ? vmbus_chan_sched+0xc4/0x2b0
> [    1.132629]  ? vmbus_chan_sched+0x31/0x2b0
> [    1.132634]  vmbus_chan_sched+0xc4/0x2b0
> [    1.132641]  vmbus_isr+0x2c/0x150
> [    1.132648]  __sysvec_hyperv_callback+0x5f/0xa0
> [    1.132654]  sysvec_hyperv_callback+0x88/0xb0
> [    1.132658]  </IRQ>
> [    1.132659]  <TASK>
> [    1.132660]  asm_sysvec_hyperv_callback+0x1a/0x20
> 
> As code paths that handle vmbus IRQs use sleepy locks under PREEMPT_RT,
> the vmbus_isr execution needs to be moved into thread context. Open-
> coding this allows to skip the IPI that irq_work would additionally
> bring and which we do not need, being an IRQ, never an NMI.
> 
> This affects both x86 and arm64, therefore hook into the common driver
> logic.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>

Applied to hyperv-next. Thanks.

Saurabh and Naman, I want to get this submitted in this merge window. If
you find any more issues with this patch, we can address them in the RC
phase. In the worst case, we can revert this patch later.

Wei
Re: [PATCH v3] drivers: hv: vmbus: Use kthread for vmbus interrupts on PREEMPT_RT
Posted by Saurabh Singh Sengar 2 weeks ago
On Wed, Feb 18, 2026 at 07:05:57AM +0000, Wei Liu wrote:
> On Mon, Feb 16, 2026 at 05:24:56PM +0100, Jan Kiszka wrote:
> > From: Jan Kiszka <jan.kiszka@siemens.com>
> > 
> > Resolves the following lockdep report when booting PREEMPT_RT on Hyper-V
> > with related guest support enabled:
> > 
> > [    1.127941] hv_vmbus: registering driver hyperv_drm
> > 
> > [    1.132518] =============================
> > [    1.132519] [ BUG: Invalid wait context ]
> > [    1.132521] 6.19.0-rc8+ #9 Not tainted
> > [    1.132524] -----------------------------
> > [    1.132525] swapper/0/0 is trying to lock:
> > [    1.132526] ffff8b9381bb3c90 (&channel->sched_lock){....}-{3:3}, at: vmbus_chan_sched+0xc4/0x2b0
> > [    1.132543] other info that might help us debug this:
> > [    1.132544] context-{2:2}
> > [    1.132545] 1 lock held by swapper/0/0:
> > [    1.132547]  #0: ffffffffa010c4c0 (rcu_read_lock){....}-{1:3}, at: vmbus_chan_sched+0x31/0x2b0
> > [    1.132557] stack backtrace:
> > [    1.132560] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.19.0-rc8+ #9 PREEMPT_{RT,(lazy)}
> > [    1.132565] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 09/25/2025
> > [    1.132567] Call Trace:
> > [    1.132570]  <IRQ>
> > [    1.132573]  dump_stack_lvl+0x6e/0xa0
> > [    1.132581]  __lock_acquire+0xee0/0x21b0
> > [    1.132592]  lock_acquire+0xd5/0x2d0
> > [    1.132598]  ? vmbus_chan_sched+0xc4/0x2b0
> > [    1.132606]  ? lock_acquire+0xd5/0x2d0
> > [    1.132613]  ? vmbus_chan_sched+0x31/0x2b0
> > [    1.132619]  rt_spin_lock+0x3f/0x1f0
> > [    1.132623]  ? vmbus_chan_sched+0xc4/0x2b0
> > [    1.132629]  ? vmbus_chan_sched+0x31/0x2b0
> > [    1.132634]  vmbus_chan_sched+0xc4/0x2b0
> > [    1.132641]  vmbus_isr+0x2c/0x150
> > [    1.132648]  __sysvec_hyperv_callback+0x5f/0xa0
> > [    1.132654]  sysvec_hyperv_callback+0x88/0xb0
> > [    1.132658]  </IRQ>
> > [    1.132659]  <TASK>
> > [    1.132660]  asm_sysvec_hyperv_callback+0x1a/0x20
> > 
> > As code paths that handle vmbus IRQs use sleepy locks under PREEMPT_RT,
> > the vmbus_isr execution needs to be moved into thread context. Open-
> > coding this allows to skip the IPI that irq_work would additionally
> > bring and which we do not need, being an IRQ, never an NMI.
> > 
> > This affects both x86 and arm64, therefore hook into the common driver
> > logic.
> > 
> > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> 
> Applied to hyperv-next. Thanks.
> 
> Saurabh and Naman, I want to get this submitted in this merge window. If
> you find any more issues with this patch, we can address them in the RC
> phase. In the worst case, we can revert this patch later.
> 
> Wei

I was in the process of completing the final round of testing; however, since
the change has now been merged, it will receive broader coverage, I will rely
on that.

Overall, the patch looks good to me.

- Saurabh