S390 needs a fast path for irq injection, and along those lines we
introduce kvm_arch_set_irq_inatomic. Instead of placing all interrupts on
the global work queue as it does today, this patch provides a fast path for
irq injection.
The inatomic fast path cannot lose control since it is running with
interrupts disabled. This meant making the following changes that exist on
the slow path today. First, the adapter_indicators page needs to be mapped
since it is accessed with interrupts disabled, so we added map/unmap
functions. Second, access to shared resources between the fast and slow
paths needed to be changed from mutex and semaphores to spin_lock's.
Finally, the memory allocation on the slow path utilizes GFP_KERNEL_ACCOUNT
but we had to implement the fast path with GFP_ATOMIC allocation. Each of
these enhancements were required to prevent blocking on the fast inject
path.
Fencing of Fast Inject in Secure Execution environments is enabled in the
patch series by not mapping adapter indicator pages. In Secure Execution
environments the path of execution available before this patch is followed.
Statistical counters have been added to enable analysis of irq injection on
the fast path and slow path including io_390_inatomic, io_flic_inject_airq,
io_set_adapter_int and io_390_inatomic_adapter_masked.
Signed-off-by: Douglas Freimuth <freimuth@linux.ibm.com>
---
arch/s390/include/asm/kvm_host.h | 6 +-
arch/s390/kvm/interrupt.c | 160 +++++++++++++++++++++++++++----
arch/s390/kvm/kvm-s390.c | 24 ++++-
arch/s390/kvm/kvm-s390.h | 3 +-
4 files changed, 169 insertions(+), 24 deletions(-)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a078420751a1..90b1a19074ce 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -359,7 +359,7 @@ struct kvm_s390_float_interrupt {
struct kvm_s390_mchk_info mchk;
struct kvm_s390_ext_info srv_signal;
int last_sleep_cpu;
- struct mutex ais_lock;
+ spinlock_t ais_lock;
u8 simm;
u8 nimm;
};
@@ -450,6 +450,10 @@ struct kvm_vm_stat {
u64 inject_io;
u64 io_390_adapter_map;
u64 io_390_adapter_unmap;
+ u64 io_390_inatomic;
+ u64 io_flic_inject_airq;
+ u64 io_set_adapter_int;
+ u64 io_390_inatomic_adapter_masked;
u64 inject_float_mchk;
u64 inject_pfault_done;
u64 inject_service_signal;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f3183c9ec7f1..ead54f968a79 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1963,15 +1963,10 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
}
int kvm_s390_inject_vm(struct kvm *kvm,
- struct kvm_s390_interrupt *s390int)
+ struct kvm_s390_interrupt *s390int, struct kvm_s390_interrupt_info *inti)
{
- struct kvm_s390_interrupt_info *inti;
int rc;
- inti = kzalloc_obj(*inti, GFP_KERNEL_ACCOUNT);
- if (!inti)
- return -ENOMEM;
-
inti->type = s390int->type;
switch (inti->type) {
case KVM_S390_INT_VIRTIO:
@@ -2007,6 +2002,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
2);
rc = __inject_vm(kvm, inti);
+ /* memory allocation is done by the caller and inti is passed in, we free it here */
if (rc)
kfree(inti);
return rc;
@@ -2284,6 +2280,7 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
struct kvm_s390_ais_all ais;
+ unsigned long flags;
if (attr->attr < sizeof(ais))
return -EINVAL;
@@ -2291,10 +2288,10 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
if (!test_kvm_facility(kvm, 72))
return -EOPNOTSUPP;
- mutex_lock(&fi->ais_lock);
+ spin_lock_irqsave(&fi->ais_lock, flags);
ais.simm = fi->simm;
ais.nimm = fi->nimm;
- mutex_unlock(&fi->ais_lock);
+ spin_unlock_irqrestore(&fi->ais_lock, flags);
if (copy_to_user((void __user *)attr->addr, &ais, sizeof(ais)))
return -EFAULT;
@@ -2638,6 +2635,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
struct kvm_s390_ais_req req;
int ret = 0;
+ unsigned long flags;
if (!test_kvm_facility(kvm, 72))
return -EOPNOTSUPP;
@@ -2654,7 +2652,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
2 : KVM_S390_AIS_MODE_SINGLE :
KVM_S390_AIS_MODE_ALL, req.mode);
- mutex_lock(&fi->ais_lock);
+ spin_lock_irqsave(&fi->ais_lock, flags);
switch (req.mode) {
case KVM_S390_AIS_MODE_ALL:
fi->simm &= ~AIS_MODE_MASK(req.isc);
@@ -2667,7 +2665,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
default:
ret = -EINVAL;
}
- mutex_unlock(&fi->ais_lock);
+ spin_unlock_irqrestore(&fi->ais_lock, flags);
return ret;
}
@@ -2681,25 +2679,33 @@ static int kvm_s390_inject_airq(struct kvm *kvm,
.parm = 0,
.parm64 = isc_to_int_word(adapter->isc),
};
+ struct kvm_s390_interrupt_info *inti;
+ unsigned long flags;
+
int ret = 0;
+ inti = kzalloc_obj(*inti, GFP_KERNEL_ACCOUNT);
+ if (!inti)
+ return -ENOMEM;
+
if (!test_kvm_facility(kvm, 72) || !adapter->suppressible)
- return kvm_s390_inject_vm(kvm, &s390int);
+ return kvm_s390_inject_vm(kvm, &s390int, inti);
- mutex_lock(&fi->ais_lock);
+ spin_lock_irqsave(&fi->ais_lock, flags);
if (fi->nimm & AIS_MODE_MASK(adapter->isc)) {
trace_kvm_s390_airq_suppressed(adapter->id, adapter->isc);
+ kfree(inti);
goto out;
}
- ret = kvm_s390_inject_vm(kvm, &s390int);
+ ret = kvm_s390_inject_vm(kvm, &s390int, inti);
if (!ret && (fi->simm & AIS_MODE_MASK(adapter->isc))) {
fi->nimm |= AIS_MODE_MASK(adapter->isc);
trace_kvm_s390_modify_ais_mode(adapter->isc,
KVM_S390_AIS_MODE_SINGLE, 2);
}
out:
- mutex_unlock(&fi->ais_lock);
+ spin_unlock_irqrestore(&fi->ais_lock, flags);
return ret;
}
@@ -2708,6 +2714,8 @@ static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr)
unsigned int id = attr->attr;
struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+ kvm->stat.io_flic_inject_airq++;
+
if (!adapter)
return -EINVAL;
@@ -2718,6 +2726,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
struct kvm_s390_ais_all ais;
+ unsigned long flags;
if (!test_kvm_facility(kvm, 72))
return -EOPNOTSUPP;
@@ -2725,10 +2734,10 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais)))
return -EFAULT;
- mutex_lock(&fi->ais_lock);
+ spin_lock_irqsave(&fi->ais_lock, flags);
fi->simm = ais.simm;
fi->nimm = ais.nimm;
- mutex_unlock(&fi->ais_lock);
+ spin_unlock_irqrestore(&fi->ais_lock, flags);
return 0;
}
@@ -2894,6 +2903,7 @@ static int adapter_indicators_set(struct kvm *kvm,
set_bit(bit, map);
spin_unlock_irqrestore(&adapter->maps_lock, flags);
}
+
spin_lock_irqsave(&adapter->maps_lock, flags);
summary_info = get_map_info(adapter, adapter_int->summary_addr);
if (!summary_info) {
@@ -2926,6 +2936,44 @@ static int adapter_indicators_set(struct kvm *kvm,
return summary_set ? 0 : 1;
}
+static int adapter_indicators_set_fast(struct kvm *kvm,
+ struct s390_io_adapter *adapter,
+ struct kvm_s390_adapter_int *adapter_int,
+ int setbit)
+{
+ unsigned long bit;
+ int summary_set;
+ struct s390_map_info *ind_info, *summary_info;
+ void *map;
+
+ spin_lock(&adapter->maps_lock);
+ ind_info = get_map_info(adapter, adapter_int->ind_addr);
+ if (!ind_info) {
+ spin_unlock(&adapter->maps_lock);
+ return -EWOULDBLOCK;
+ }
+ map = page_address(ind_info->page);
+ bit = get_ind_bit(ind_info->addr, adapter_int->ind_offset, adapter->swap);
+ if (setbit)
+ set_bit(bit, map);
+ else
+ clear_bit(bit, map);
+ summary_info = get_map_info(adapter, adapter_int->summary_addr);
+ if (!summary_info) {
+ spin_unlock(&adapter->maps_lock);
+ return -EWOULDBLOCK;
+ }
+ map = page_address(summary_info->page);
+ bit = get_ind_bit(summary_info->addr, adapter_int->summary_offset,
+ adapter->swap);
+ if (setbit)
+ summary_set = test_and_set_bit(bit, map);
+ else
+ summary_set = test_and_clear_bit(bit, map);
+ spin_unlock(&adapter->maps_lock);
+ return summary_set ? 0 : 1;
+}
+
/*
* < 0 - not injected due to error
* = 0 - coalesced, summary indicator already active
@@ -2938,6 +2986,8 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
int ret;
struct s390_io_adapter *adapter;
+ kvm->stat.io_set_adapter_int++;
+
/* We're only interested in the 0->1 transition. */
if (!level)
return 0;
@@ -3006,7 +3056,6 @@ int kvm_set_routing_entry(struct kvm *kvm,
int idx;
switch (ue->type) {
- /* we store the userspace addresses instead of the guest addresses */
case KVM_IRQ_ROUTING_S390_ADAPTER:
if (kvm_is_ucontrol(kvm))
return -EINVAL;
@@ -3597,3 +3646,80 @@ int __init kvm_s390_gib_init(u8 nisc)
out:
return rc;
}
+
+/*
+ * kvm_arch_set_irq_inatomic: fast-path for irqfd injection
+ */
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
+{
+ int ret, setbit;
+ struct s390_io_adapter *adapter;
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_interrupt_info *inti;
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_IO(1, 0, 0, 0),
+ .parm = 0,
+ };
+
+ kvm->stat.io_390_inatomic++;
+
+ /* We're only interested in the 0->1 transition. */
+ if (!level)
+ return -EWOULDBLOCK;
+ if (e->type != KVM_IRQ_ROUTING_S390_ADAPTER)
+ return -EWOULDBLOCK;
+
+ adapter = get_io_adapter(kvm, e->adapter.adapter_id);
+ if (!adapter)
+ return -EWOULDBLOCK;
+
+ s390int.parm64 = isc_to_int_word(adapter->isc);
+ setbit = 1;
+ ret = adapter_indicators_set_fast(kvm, adapter, &e->adapter, setbit);
+ if (ret < 0)
+ return -EWOULDBLOCK;
+ if (!ret || adapter->masked) {
+ kvm->stat.io_390_inatomic_adapter_masked++;
+ return 0;
+ }
+
+ inti = kzalloc_obj(*inti, GFP_ATOMIC);
+ if (!inti)
+ return -EWOULDBLOCK;
+
+ if (!test_kvm_facility(kvm, 72) || !adapter->suppressible) {
+ ret = kvm_s390_inject_vm(kvm, &s390int, inti);
+ if (ret == 0) {
+ return ret;
+ } else {
+ setbit = 0;
+ adapter_indicators_set_fast(kvm, adapter, &e->adapter, setbit);
+ return -EWOULDBLOCK;
+ }
+ }
+
+ spin_lock(&fi->ais_lock);
+ if (fi->nimm & AIS_MODE_MASK(adapter->isc)) {
+ trace_kvm_s390_airq_suppressed(adapter->id, adapter->isc);
+ kfree(inti);
+ goto out;
+ }
+
+ ret = kvm_s390_inject_vm(kvm, &s390int, inti);
+ if (!ret && (fi->simm & AIS_MODE_MASK(adapter->isc))) {
+ fi->nimm |= AIS_MODE_MASK(adapter->isc);
+ trace_kvm_s390_modify_ais_mode(adapter->isc,
+ KVM_S390_AIS_MODE_SINGLE, 2);
+ } else if (ret) {
+ spin_unlock(&fi->ais_lock);
+ setbit = 0;
+ adapter_indicators_set_fast(kvm, adapter, &e->adapter, setbit);
+ return -EWOULDBLOCK;
+ }
+
+out:
+ spin_unlock(&fi->ais_lock);
+ return 0;
+}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 4eada48c6e27..72d083e9afa8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -70,6 +70,10 @@ const struct kvm_stats_desc kvm_vm_stats_desc[] = {
STATS_DESC_COUNTER(VM, inject_io),
STATS_DESC_COUNTER(VM, io_390_adapter_map),
STATS_DESC_COUNTER(VM, io_390_adapter_unmap),
+ STATS_DESC_COUNTER(VM, io_390_inatomic),
+ STATS_DESC_COUNTER(VM, io_flic_inject_airq),
+ STATS_DESC_COUNTER(VM, io_set_adapter_int),
+ STATS_DESC_COUNTER(VM, io_390_inatomic_adapter_masked),
STATS_DESC_COUNTER(VM, inject_float_mchk),
STATS_DESC_COUNTER(VM, inject_pfault_done),
STATS_DESC_COUNTER(VM, inject_service_signal),
@@ -2869,6 +2873,7 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
void __user *argp = (void __user *)arg;
struct kvm_device_attr attr;
int r;
+ struct kvm_s390_interrupt_info *inti;
switch (ioctl) {
case KVM_S390_INTERRUPT: {
@@ -2877,7 +2882,10 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
r = -EFAULT;
if (copy_from_user(&s390int, argp, sizeof(s390int)))
break;
- r = kvm_s390_inject_vm(kvm, &s390int);
+ inti = kzalloc_obj(*inti, GFP_KERNEL_ACCOUNT);
+ if (!inti)
+ return -ENOMEM;
+ r = kvm_s390_inject_vm(kvm, &s390int, inti);
break;
}
case KVM_CREATE_IRQCHIP: {
@@ -3275,7 +3283,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
mutex_unlock(&kvm->lock);
}
- mutex_init(&kvm->arch.float_int.ais_lock);
+ spin_lock_init(&kvm->arch.float_int.ais_lock);
spin_lock_init(&kvm->arch.float_int.lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++)
INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
@@ -4396,11 +4404,16 @@ int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clo
return 1;
}
-static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
- unsigned long token)
+static int __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
+ unsigned long token)
{
struct kvm_s390_interrupt inti;
struct kvm_s390_irq irq;
+ struct kvm_s390_interrupt_info *inti_mem;
+
+ inti_mem = kzalloc_obj(*inti_mem, GFP_KERNEL_ACCOUNT);
+ if (!inti_mem)
+ return -ENOMEM;
if (start_token) {
irq.u.ext.ext_params2 = token;
@@ -4409,8 +4422,9 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
} else {
inti.type = KVM_S390_INT_PFAULT_DONE;
inti.parm64 = token;
- WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
+ WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti, inti_mem));
}
+ return true;
}
bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index bf1d7798c1af..2f2da868a040 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -373,7 +373,8 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
void kvm_s390_clear_float_irqs(struct kvm *kvm);
int __must_check kvm_s390_inject_vm(struct kvm *kvm,
- struct kvm_s390_interrupt *s390int);
+ struct kvm_s390_interrupt *s390int,
+ struct kvm_s390_interrupt_info *inti);
int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
struct kvm_s390_irq *irq);
static inline int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
--
2.52.0
>
> -static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
> - unsigned long token)
> +static int __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
> + unsigned long token)
> {
> struct kvm_s390_interrupt inti;
> struct kvm_s390_irq irq;
> + struct kvm_s390_interrupt_info *inti_mem;
> +
> + inti_mem = kzalloc_obj(*inti_mem, GFP_KERNEL_ACCOUNT);
> + if (!inti_mem)
> + return -ENOMEM;
You change this function to possibly return this value but you do not
change the callers of this routine to actually look at the new return value?
AFAICT there are 2 callers of this today in arch/s390/kvm/kvm-s390.c - I
assume one or both need updating, otherwise why do we need this change?
>
> if (start_token) {
> irq.u.ext.ext_params2 = token;
> @@ -4409,8 +4422,9 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
> } else {
> inti.type = KVM_S390_INT_PFAULT_DONE;
> inti.parm64 = token;
> - WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
> + WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti, inti_mem));
> }
> + return true;
Since return value is an integer, return 0?
On 4/6/26 2:44 AM, Douglas Freimuth wrote: > S390 needs a fast path for irq injection, and along those lines we > introduce kvm_arch_set_irq_inatomic. Instead of placing all interrupts on > the global work queue as it does today, this patch provides a fast path for > irq injection. > > The inatomic fast path cannot lose control since it is running with > interrupts disabled. This meant making the following changes that exist on > the slow path today. First, the adapter_indicators page needs to be mapped > since it is accessed with interrupts disabled, so we added map/unmap > functions. Second, access to shared resources between the fast and slow > paths needed to be changed from mutex and semaphores to spin_lock's. > Finally, the memory allocation on the slow path utilizes GFP_KERNEL_ACCOUNT > but we had to implement the fast path with GFP_ATOMIC allocation. Each of > these enhancements were required to prevent blocking on the fast inject > path. > > Fencing of Fast Inject in Secure Execution environments is enabled in the > patch series by not mapping adapter indicator pages. In Secure Execution > environments the path of execution available before this patch is followed. > > Statistical counters have been added to enable analysis of irq injection on > the fast path and slow path including io_390_inatomic, io_flic_inject_airq, > io_set_adapter_int and io_390_inatomic_adapter_masked. > > Signed-off-by: Douglas Freimuth <freimuth@linux.ibm.com> Sashiko complains about PREEMPT_RT kernels and spinlocks being sleepable in this case which would break the whole point of kvm_arch_set_irq_inatomic. I suspect actually the kvm_arch_set_irq_inatomic() call itself shouldn't be used in this case, or in other words it wouldn't be an issue with just this s390 implementation but rather all of arch implementations? I did not try enabling it and running a test, but I did do some searching of the codebase and I can found at least 1 spinlock acquired somewhere along the inatomic path for the existing implementations... longarch (pch_pic_set_irq) arm64 (vgic_its_inject_cached_translation) powerpc (icp_deliver_irq) riscv (kvm_riscv_aia_aplic_inject) For x86 I didn't find a spinlock -- maybe I didn't look hard enough! -- but I did find a path that uses RCU (kvm_irq_delivery_to_apic_fast) which AFAIU would also become preemptible under PREEMPT_RT. So for this series it seems reasonable to me to proceed as-is, with an open question whether there should be a KVM-wide avoidance of kvm_arch_set_irq_inatomic() under PREEMPT_RT?
On Mon, Apr 06, 2026, Matthew Rosato wrote: > On 4/6/26 2:44 AM, Douglas Freimuth wrote: > > S390 needs a fast path for irq injection, and along those lines we > > introduce kvm_arch_set_irq_inatomic. Instead of placing all interrupts on > > the global work queue as it does today, this patch provides a fast path for > > irq injection. > > > > The inatomic fast path cannot lose control since it is running with > > interrupts disabled. This meant making the following changes that exist on > > the slow path today. First, the adapter_indicators page needs to be mapped > > since it is accessed with interrupts disabled, so we added map/unmap > > functions. Second, access to shared resources between the fast and slow > > paths needed to be changed from mutex and semaphores to spin_lock's. > > Finally, the memory allocation on the slow path utilizes GFP_KERNEL_ACCOUNT > > but we had to implement the fast path with GFP_ATOMIC allocation. Each of > > these enhancements were required to prevent blocking on the fast inject > > path. > > > > Fencing of Fast Inject in Secure Execution environments is enabled in the > > patch series by not mapping adapter indicator pages. In Secure Execution > > environments the path of execution available before this patch is followed. > > > > Statistical counters have been added to enable analysis of irq injection on > > the fast path and slow path including io_390_inatomic, io_flic_inject_airq, > > io_set_adapter_int and io_390_inatomic_adapter_masked. > > > > Signed-off-by: Douglas Freimuth <freimuth@linux.ibm.com> > > > Sashiko complains about PREEMPT_RT kernels and spinlocks being sleepable > in this case which would break the whole point of kvm_arch_set_irq_inatomic. Just make it a raw spinlock so that it stays an actual spinlock. > I suspect actually the kvm_arch_set_irq_inatomic() call itself shouldn't > be used in this case, or in other words it wouldn't be an issue with > just this s390 implementation but rather all of arch implementations? > > I did not try enabling it and running a test, but I did do some > searching of the codebase and I can found at least 1 spinlock acquired > somewhere along the inatomic path for the existing implementations... > > longarch (pch_pic_set_irq) I doubt anyone runs PREEMPT_RT VMs on LoongArch at this point. > arm64 (vgic_its_inject_cached_translation) Uses raw. > powerpc (icp_deliver_irq) Presumably arch_spin_lock() is also a "raw" version? PPC KVM is barely maintained at this point, so I wouldn't worry much about it. > riscv (kvm_riscv_aia_aplic_inject) Uses "raw". > For x86 I didn't find a spinlock -- maybe I didn't look hard enough! -- > but I did find a path that uses RCU (kvm_irq_delivery_to_apic_fast) > which AFAIU would also become preemptible under PREEMPT_RT. This isn't about becoming preemptible per se, it's about non-raw spinlocks becoming sleepable locks. RCU can be made preemptible, but rcu_read_lock() doesn't become sleepable. > So for this series it seems reasonable to me to proceed as-is, with an > open question whether there should be a KVM-wide avoidance of > kvm_arch_set_irq_inatomic() under PREEMPT_RT? s390 should use a raw spinlock, same as arm64 and RISC-V.
>> >> Sashiko complains about PREEMPT_RT kernels and spinlocks being sleepable >> in this case which would break the whole point of kvm_arch_set_irq_inatomic. > > Just make it a raw spinlock so that it stays an actual spinlock. [...] > > s390 should use a raw spinlock, same as arm64 and RISC-V. Ahh, I missed that subtlety. Thanks for the explanation!
© 2016 - 2026 Red Hat, Inc.