arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/avic.c | 3 +-- arch/x86/kvm/vmx/posted_intr.c | 7 +++---- arch/x86/kvm/x86.c | 9 ++++++++- 4 files changed, 13 insertions(+), 7 deletions(-)
Add a module param to allow disabling device posted interrupts without
having to sacrifice all of APICv/AVIC, and to also effectively enumerate
to userspace whether or not KVM may be utilizing device posted IRQs.
Disabling device posted interrupts is very desirable for testing, and can
even be desirable for production environments, e.g. if the host kernel
wants to interpose on device interrupts.
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/svm/avic.c | 3 +--
arch/x86/kvm/vmx/posted_intr.c | 7 +++----
arch/x86/kvm/x86.c | 9 ++++++++-
4 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d881e7d276b1..bf11c5ee50cb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1922,6 +1922,7 @@ struct kvm_arch_async_pf {
extern u32 __read_mostly kvm_nr_uret_msrs;
extern bool __read_mostly allow_smaller_maxphyaddr;
extern bool __read_mostly enable_apicv;
+extern bool __read_mostly enable_device_posted_irqs;
extern struct kvm_x86_ops kvm_x86_ops;
#define kvm_x86_call(func) static_call(kvm_x86_##func)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 65fd245a9953..e0f519565393 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -898,8 +898,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
struct kvm_irq_routing_table *irq_rt;
int idx, ret = 0;
- if (!kvm_arch_has_assigned_device(kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP))
+ if (!kvm_arch_has_assigned_device(kvm) || !enable_device_posted_irqs)
return 0;
pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index ec08fa3caf43..a03988a138c5 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -134,9 +134,8 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
static bool vmx_can_use_vtd_pi(struct kvm *kvm)
{
- return irqchip_in_kernel(kvm) && enable_apicv &&
- kvm_arch_has_assigned_device(kvm) &&
- irq_remapping_cap(IRQ_POSTING_CAP);
+ return irqchip_in_kernel(kvm) && enable_device_posted_irqs &&
+ kvm_arch_has_assigned_device(kvm);
}
/*
@@ -254,7 +253,7 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
*/
void vmx_pi_start_assignment(struct kvm *kvm)
{
- if (!irq_remapping_cap(IRQ_POSTING_CAP))
+ if (!enable_device_posted_irqs)
return;
kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 69c20a68a3f0..1b14319975b7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -227,6 +227,10 @@ EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
bool __read_mostly enable_apicv = true;
EXPORT_SYMBOL_GPL(enable_apicv);
+bool __read_mostly enable_device_posted_irqs = true;
+module_param(enable_device_posted_irqs, bool, 0444);
+EXPORT_SYMBOL_GPL(enable_device_posted_irqs);
+
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS(),
STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
@@ -9772,6 +9776,9 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (r != 0)
goto out_mmu_exit;
+ enable_device_posted_irqs = enable_device_posted_irqs && enable_apicv &&
+ irq_remapping_cap(IRQ_POSTING_CAP);
+
kvm_ops_update(ops);
for_each_online_cpu(cpu) {
@@ -13552,7 +13559,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
bool kvm_arch_has_irq_bypass(void)
{
- return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP);
+ return enable_device_posted_irqs;
}
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
base-commit: c9ea48bb6ee6b28bbc956c1e8af98044618fed5e
--
2.49.0.rc1.451.g8f38331e32-goog
On Fri, Mar 14, 2025 at 07:56:15PM -0700, Sean Christopherson wrote:
> Add a module param to allow disabling device posted interrupts without
> having to sacrifice all of APICv/AVIC, and to also effectively enumerate
> to userspace whether or not KVM may be utilizing device posted IRQs.
> Disabling device posted interrupts is very desirable for testing, and can
> even be desirable for production environments, e.g. if the host kernel
> wants to interpose on device interrupts.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/svm/avic.c | 3 +--
> arch/x86/kvm/vmx/posted_intr.c | 7 +++----
> arch/x86/kvm/x86.c | 9 ++++++++-
> 4 files changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d881e7d276b1..bf11c5ee50cb 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1922,6 +1922,7 @@ struct kvm_arch_async_pf {
> extern u32 __read_mostly kvm_nr_uret_msrs;
> extern bool __read_mostly allow_smaller_maxphyaddr;
> extern bool __read_mostly enable_apicv;
> +extern bool __read_mostly enable_device_posted_irqs;
> extern struct kvm_x86_ops kvm_x86_ops;
>
> #define kvm_x86_call(func) static_call(kvm_x86_##func)
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 65fd245a9953..e0f519565393 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -898,8 +898,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> struct kvm_irq_routing_table *irq_rt;
> int idx, ret = 0;
>
> - if (!kvm_arch_has_assigned_device(kvm) ||
> - !irq_remapping_cap(IRQ_POSTING_CAP))
> + if (!kvm_arch_has_assigned_device(kvm) || !enable_device_posted_irqs)
This function will now also be skipped if enable_apicv is false. Is this
always the case here for some reason? Sorry if I missed something
obvious.
> return 0;
>
> pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
> diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> index ec08fa3caf43..a03988a138c5 100644
> --- a/arch/x86/kvm/vmx/posted_intr.c
> +++ b/arch/x86/kvm/vmx/posted_intr.c
> @@ -134,9 +134,8 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
>
> static bool vmx_can_use_vtd_pi(struct kvm *kvm)
> {
> - return irqchip_in_kernel(kvm) && enable_apicv &&
> - kvm_arch_has_assigned_device(kvm) &&
> - irq_remapping_cap(IRQ_POSTING_CAP);
> + return irqchip_in_kernel(kvm) && enable_device_posted_irqs &&
> + kvm_arch_has_assigned_device(kvm);
> }
>
> /*
> @@ -254,7 +253,7 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
> */
> void vmx_pi_start_assignment(struct kvm *kvm)
> {
> - if (!irq_remapping_cap(IRQ_POSTING_CAP))
> + if (!enable_device_posted_irqs)
Ditto here.
> return;
>
> kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 69c20a68a3f0..1b14319975b7 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -227,6 +227,10 @@ EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
> bool __read_mostly enable_apicv = true;
> EXPORT_SYMBOL_GPL(enable_apicv);
>
> +bool __read_mostly enable_device_posted_irqs = true;
> +module_param(enable_device_posted_irqs, bool, 0444);
> +EXPORT_SYMBOL_GPL(enable_device_posted_irqs);
> +
> const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
> KVM_GENERIC_VM_STATS(),
> STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
> @@ -9772,6 +9776,9 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
> if (r != 0)
> goto out_mmu_exit;
>
> + enable_device_posted_irqs = enable_device_posted_irqs && enable_apicv &&
> + irq_remapping_cap(IRQ_POSTING_CAP);
Maybe this is clearer:
enable_device_posted_irqs &= enable_avivc && irq_remapping_cap(IRQ_POSTING_CAP);
> +
> kvm_ops_update(ops);
>
> for_each_online_cpu(cpu) {
> @@ -13552,7 +13559,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
>
> bool kvm_arch_has_irq_bypass(void)
> {
> - return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP);
> + return enable_device_posted_irqs;
> }
>
> int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
>
> base-commit: c9ea48bb6ee6b28bbc956c1e8af98044618fed5e
> --
> 2.49.0.rc1.451.g8f38331e32-goog
>
>
On Mon, Mar 17, 2025, Yosry Ahmed wrote:
> On Fri, Mar 14, 2025 at 07:56:15PM -0700, Sean Christopherson wrote:
> > Add a module param to allow disabling device posted interrupts without
> > having to sacrifice all of APICv/AVIC, and to also effectively enumerate
> > to userspace whether or not KVM may be utilizing device posted IRQs.
> > Disabling device posted interrupts is very desirable for testing, and can
> > even be desirable for production environments, e.g. if the host kernel
> > wants to interpose on device interrupts.
> >
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> > arch/x86/include/asm/kvm_host.h | 1 +
> > arch/x86/kvm/svm/avic.c | 3 +--
> > arch/x86/kvm/vmx/posted_intr.c | 7 +++----
> > arch/x86/kvm/x86.c | 9 ++++++++-
> > 4 files changed, 13 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index d881e7d276b1..bf11c5ee50cb 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1922,6 +1922,7 @@ struct kvm_arch_async_pf {
> > extern u32 __read_mostly kvm_nr_uret_msrs;
> > extern bool __read_mostly allow_smaller_maxphyaddr;
> > extern bool __read_mostly enable_apicv;
> > +extern bool __read_mostly enable_device_posted_irqs;
> > extern struct kvm_x86_ops kvm_x86_ops;
> >
> > #define kvm_x86_call(func) static_call(kvm_x86_##func)
> > diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> > index 65fd245a9953..e0f519565393 100644
> > --- a/arch/x86/kvm/svm/avic.c
> > +++ b/arch/x86/kvm/svm/avic.c
> > @@ -898,8 +898,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> > struct kvm_irq_routing_table *irq_rt;
> > int idx, ret = 0;
> >
> > - if (!kvm_arch_has_assigned_device(kvm) ||
> > - !irq_remapping_cap(IRQ_POSTING_CAP))
> > + if (!kvm_arch_has_assigned_device(kvm) || !enable_device_posted_irqs)
>
> This function will now also be skipped if enable_apicv is false. Is this
> always the case here for some reason? Sorry if I missed something
> obvious.
Working as intended, though I failed to document it. Hrm, but I wasn't expecting
this to be a functional change. Oh, I know what happened. I had originally
tacked this on to a big series to clean up the IRTE stuff (spoiler alert), and in
that series common code checked kvm_arch_has_irq_bypass() (which incorporates
enable_apicv) before calling pi_update_irte().
I'll prepend a patch or three to do minimal cleanup before introducing the new
module param.
> > @@ -9772,6 +9776,9 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
> > if (r != 0)
> > goto out_mmu_exit;
> >
> > + enable_device_posted_irqs = enable_device_posted_irqs && enable_apicv &&
> > + irq_remapping_cap(IRQ_POSTING_CAP);
>
> Maybe this is clearer:
>
> enable_device_posted_irqs &= enable_avivc && irq_remapping_cap(IRQ_POSTING_CAP);
I don't have a strong opinion. I went with the "self check" approach purely
because SVM does so for a few params, e.b.
nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
Anyone else care either way? If not, I'll go with Yosry's suggestion.
On Mon, Mar 17, 2025 at 12:43:53PM -0700, Sean Christopherson wrote:
> On Mon, Mar 17, 2025, Yosry Ahmed wrote:
> > On Fri, Mar 14, 2025 at 07:56:15PM -0700, Sean Christopherson wrote:
> > > Add a module param to allow disabling device posted interrupts without
> > > having to sacrifice all of APICv/AVIC, and to also effectively enumerate
> > > to userspace whether or not KVM may be utilizing device posted IRQs.
> > > Disabling device posted interrupts is very desirable for testing, and can
> > > even be desirable for production environments, e.g. if the host kernel
> > > wants to interpose on device interrupts.
> > >
> > > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > > ---
> > > arch/x86/include/asm/kvm_host.h | 1 +
> > > arch/x86/kvm/svm/avic.c | 3 +--
> > > arch/x86/kvm/vmx/posted_intr.c | 7 +++----
> > > arch/x86/kvm/x86.c | 9 ++++++++-
> > > 4 files changed, 13 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > index d881e7d276b1..bf11c5ee50cb 100644
> > > --- a/arch/x86/include/asm/kvm_host.h
> > > +++ b/arch/x86/include/asm/kvm_host.h
> > > @@ -1922,6 +1922,7 @@ struct kvm_arch_async_pf {
> > > extern u32 __read_mostly kvm_nr_uret_msrs;
> > > extern bool __read_mostly allow_smaller_maxphyaddr;
> > > extern bool __read_mostly enable_apicv;
> > > +extern bool __read_mostly enable_device_posted_irqs;
> > > extern struct kvm_x86_ops kvm_x86_ops;
> > >
> > > #define kvm_x86_call(func) static_call(kvm_x86_##func)
> > > diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> > > index 65fd245a9953..e0f519565393 100644
> > > --- a/arch/x86/kvm/svm/avic.c
> > > +++ b/arch/x86/kvm/svm/avic.c
> > > @@ -898,8 +898,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> > > struct kvm_irq_routing_table *irq_rt;
> > > int idx, ret = 0;
> > >
> > > - if (!kvm_arch_has_assigned_device(kvm) ||
> > > - !irq_remapping_cap(IRQ_POSTING_CAP))
> > > + if (!kvm_arch_has_assigned_device(kvm) || !enable_device_posted_irqs)
> >
> > This function will now also be skipped if enable_apicv is false. Is this
> > always the case here for some reason? Sorry if I missed something
> > obvious.
>
> Working as intended, though I failed to document it. Hrm, but I wasn't expecting
> this to be a functional change. Oh, I know what happened. I had originally
> tacked this on to a big series to clean up the IRTE stuff (spoiler alert), and in
> that series common code checked kvm_arch_has_irq_bypass() (which incorporates
> enable_apicv) before calling pi_update_irte().
>
> I'll prepend a patch or three to do minimal cleanup before introducing the new
> module param.
>
> > > @@ -9772,6 +9776,9 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
> > > if (r != 0)
> > > goto out_mmu_exit;
> > >
> > > + enable_device_posted_irqs = enable_device_posted_irqs && enable_apicv &&
> > > + irq_remapping_cap(IRQ_POSTING_CAP);
> >
> > Maybe this is clearer:
> >
> > enable_device_posted_irqs &= enable_avivc && irq_remapping_cap(IRQ_POSTING_CAP);
>
> I don't have a strong opinion. I went with the "self check" approach purely
> because SVM does so for a few params, e.b.
>
> nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
>
> Anyone else care either way? If not, I'll go with Yosry's suggestion.
I can understand a consistency argument, so I am fine either way too.
The main reason I suggested this is that it took me a second to realize
this is the same thing on both sides of the assignment.
© 2016 - 2025 Red Hat, Inc.