Switch to using the generic infrastructure to check for and handle pending
work before transitioning into guest mode.
xfer_to_guest_mode_handle_work() does a few more things than the current
code does when deciding whether or not to exit the __vcpu_run() loop. The
exittime tests from kvm-unit-tests, in my tests, were +/-3% compared to
before this series, which is within noise tolerance.
Co-developed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
---
The way I've implemented this, I do the check between vcpu_pre_run() and
entering the guest, and bail out of the loop if
kvm_xfer_to_guest_mode_handle_work() returns nonzero, without calling
vcpu_post_run(). My impression is that this is safe, but it does mean
there is an sie_enter vcpu event and trace event which isn't matched with
corresponding exit events. Is this a problem?
---
arch/s390/kvm/Kconfig | 1 +
arch/s390/kvm/kvm-s390.c | 25 ++++++++++++++++++-------
arch/s390/kvm/vsie.c | 17 ++++++++++++-----
3 files changed, 31 insertions(+), 12 deletions(-)
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index cae908d645501ef7eb4edbe87b8431f6499370a4..0ca9d6587243c98034d086c0ebd4ef085e504faf 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -30,6 +30,7 @@ config KVM
select HAVE_KVM_NO_POLL
select KVM_VFIO
select MMU_NOTIFIER
+ select VIRT_XFER_TO_GUEST_WORK
help
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3cad08662b3d80aaf6f5f8891fc08b383c3c44d4..759158695bcdbb7c96c9708b2c7529d6e4484304 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -14,6 +14,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/compiler.h>
+#include <linux/entry-virt.h>
#include <linux/export.h>
#include <linux/err.h>
#include <linux/fs.h>
@@ -4788,9 +4789,6 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
- if (need_resched())
- schedule();
-
if (!kvm_is_ucontrol(vcpu->kvm)) {
rc = kvm_s390_deliver_pending_interrupts(vcpu);
if (rc || guestdbg_exit_pending(vcpu))
@@ -5095,12 +5093,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
*/
kvm_vcpu_srcu_read_lock(vcpu);
- do {
+ while (true) {
rc = vcpu_pre_run(vcpu);
+ kvm_vcpu_srcu_read_unlock(vcpu);
if (rc || guestdbg_exit_pending(vcpu))
break;
- kvm_vcpu_srcu_read_unlock(vcpu);
/*
* As PF_VCPU will be used in fault handler, between
* guest_timing_enter_irqoff and guest_timing_exit_irqoff
@@ -5113,6 +5111,16 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
}
local_irq_disable();
+
+ xfer_to_guest_mode_prepare();
+ if (xfer_to_guest_mode_work_pending()) {
+ local_irq_enable();
+ rc = kvm_xfer_to_guest_mode_handle_work(vcpu);
+ if (rc)
+ break;
+ local_irq_disable();
+ }
+
guest_timing_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
@@ -5142,9 +5150,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
kvm_vcpu_srcu_read_lock(vcpu);
rc = vcpu_post_run(vcpu, exit_reason);
- } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
+ if (rc || guestdbg_exit_pending(vcpu)) {
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ break;
+ }
+ };
- kvm_vcpu_srcu_read_unlock(vcpu);
return rc;
}
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 347268f89f2f186bea623a3adff7376cabc305b2..3a5219d0587343c2d0ea17adff356ad3284a5f33 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1181,11 +1181,21 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
barrier();
if (!kvm_s390_vcpu_sie_inhibited(vcpu)) {
local_irq_disable();
+ xfer_to_guest_mode_prepare();
+ if (xfer_to_guest_mode_work_pending()) {
+ local_irq_enable();
+ rc = kvm_xfer_to_guest_mode_handle_work(vcpu);
+ if (rc)
+ goto skip_sie;
+ local_irq_disable();
+ }
guest_timing_enter_irqoff();
rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
guest_timing_exit_irqoff();
local_irq_enable();
}
+
+skip_sie:
barrier();
vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
@@ -1345,13 +1355,11 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
* but rewind the PSW to re-enter SIE once that's completed
* instead of passing a "no action" intercept to the guest.
*/
- if (signal_pending(current) ||
- kvm_s390_vcpu_has_irq(vcpu, 0) ||
+ if (kvm_s390_vcpu_has_irq(vcpu, 0) ||
kvm_s390_vcpu_sie_inhibited(vcpu)) {
kvm_s390_rewind_psw(vcpu, 4);
break;
}
- cond_resched();
}
if (rc == -EFAULT) {
@@ -1483,8 +1491,7 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
if (unlikely(scb_addr & 0x1ffUL))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
- kvm_s390_vcpu_sie_inhibited(vcpu)) {
+ if (kvm_s390_vcpu_has_irq(vcpu, 0) || kvm_s390_vcpu_sie_inhibited(vcpu)) {
kvm_s390_rewind_psw(vcpu, 4);
return 0;
}
--
2.52.0
On Tue, Nov 25, 2025 at 06:45:54PM +1100, Andrew Donnellan wrote:
> Switch to using the generic infrastructure to check for and handle pending
> work before transitioning into guest mode.
>
> xfer_to_guest_mode_handle_work() does a few more things than the current
> code does when deciding whether or not to exit the __vcpu_run() loop. The
> exittime tests from kvm-unit-tests, in my tests, were +/-3% compared to
> before this series, which is within noise tolerance.
...
> local_irq_disable();
> +
> + xfer_to_guest_mode_prepare();
> + if (xfer_to_guest_mode_work_pending()) {
> + local_irq_enable();
> + rc = kvm_xfer_to_guest_mode_handle_work(vcpu);
> + if (rc)
> + break;
> + local_irq_disable();
> + }
> +
> guest_timing_enter_irqoff();
> __disable_cpu_timer_accounting(vcpu);
This looks racy: kvm_xfer_to_guest_mode_handle_work() returns with
interrupts enabled and before interrupts are disabled again more work
might have been become pending. But that is ignored and guest state is
entered instead. Why not change the above simply to something like
this to avoid this:
again:
local_irq_disable();
xfer_to_guest_mode_prepare();
if (xfer_to_guest_mode_work_pending()) {
local_irq_enable();
rc = kvm_xfer_to_guest_mode_handle_work(vcpu);
if (rc)
break;
goto again;
}
guest_timing_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
But maybe I'm missing something?
> @@ -1181,11 +1181,21 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> barrier();
> if (!kvm_s390_vcpu_sie_inhibited(vcpu)) {
> local_irq_disable();
> + xfer_to_guest_mode_prepare();
> + if (xfer_to_guest_mode_work_pending()) {
> + local_irq_enable();
> + rc = kvm_xfer_to_guest_mode_handle_work(vcpu);
> + if (rc)
> + goto skip_sie;
> + local_irq_disable();
> + }
> guest_timing_enter_irqoff();
> rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce);
Same here.
On Tue, 2025-11-25 at 12:16 +0100, Heiko Carstens wrote:
> On Tue, Nov 25, 2025 at 06:45:54PM +1100, Andrew Donnellan wrote:
> > Switch to using the generic infrastructure to check for and handle pending
> > work before transitioning into guest mode.
> >
> > xfer_to_guest_mode_handle_work() does a few more things than the current
> > code does when deciding whether or not to exit the __vcpu_run() loop. The
> > exittime tests from kvm-unit-tests, in my tests, were +/-3% compared to
> > before this series, which is within noise tolerance.
>
> ...
>
> > local_irq_disable();
> > +
> > + xfer_to_guest_mode_prepare();
> > + if (xfer_to_guest_mode_work_pending()) {
> > + local_irq_enable();
> > + rc = kvm_xfer_to_guest_mode_handle_work(vcpu);
> > + if (rc)
> > + break;
> > + local_irq_disable();
> > + }
> > +
> > guest_timing_enter_irqoff();
> > __disable_cpu_timer_accounting(vcpu);
>
> This looks racy: kvm_xfer_to_guest_mode_handle_work() returns with
> interrupts enabled and before interrupts are disabled again more work
> might have been become pending. But that is ignored and guest state is
> entered instead. Why not change the above simply to something like
> this to avoid this:
>
> again:
> local_irq_disable();
> xfer_to_guest_mode_prepare();
> if (xfer_to_guest_mode_work_pending()) {
> local_irq_enable();
> rc = kvm_xfer_to_guest_mode_handle_work(vcpu);
> if (rc)
> break;
> goto again;
> }
>
> guest_timing_enter_irqoff();
> __disable_cpu_timer_accounting(vcpu);
>
> But maybe I'm missing something?
Agreed, I'll restructure this and respin.
--
Andrew Donnellan OzLabs, ADL Canberra
ajd@linux.ibm.com IBM Australia Limited
© 2016 - 2025 Red Hat, Inc.