[PATCH v5 4/3] KVM: selftests: Add test cases for EOI suppression modes

Khushit Shah posted 3 patches 1 month, 1 week ago
[PATCH v5 4/3] KVM: selftests: Add test cases for EOI suppression modes
Posted by David Woodhouse 1 week, 4 days ago
From: David Woodhouse <dwmw@amazon.co.uk>

Rather than being frightened of doing the right thing for the in-kernel
I/O APIC because "there might be bugs", let's add selftests for it to
make sure it behaves correctly. For both in-kernel I/O APIC and
userspace, exercise the following modes:

 • Legacy "quirk" behaviour (this test shows the same results on both
   old kernels and on kernels with this patch series in default mode).
 • KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST mode, both with the guest
   enabling APIC_SPIV_DIRECTED_EOI and without.
 • KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST mode.

Testing quirk mode (no flags)...
  IOAPIC v0x11, LVR directed_eoi=0, SPIV directed_eoi=0, remote_irr=0
Testing explicit enable...
  IOAPIC v0x20, LVR directed_eoi=1, SPIV directed_eoi=1, remote_irr=1
Testing explicit enable (guest doesn't use)...
  IOAPIC v0x20, LVR directed_eoi=1, SPIV directed_eoi=0, remote_irr=0
Testing explicit disable...
  IOAPIC v0x11, LVR directed_eoi=0, SPIV directed_eoi=0, remote_irr=0
All tests passed

=== Testing split IRQCHIP mode ===
Testing quirk mode (no flags)...
  Split IRQCHIP: LVR directed_eoi=1, SPIV directed_eoi=0, got_eoi_exit=1
Testing explicit enable...
  Split IRQCHIP: LVR directed_eoi=1, SPIV directed_eoi=1, got_eoi_exit=0
Testing explicit enable (guest doesn't use)...
  Split IRQCHIP: LVR directed_eoi=1, SPIV directed_eoi=0, got_eoi_exit=1
Testing explicit disable...
  Split IRQCHIP: LVR directed_eoi=0, SPIV directed_eoi=0, got_eoi_exit=1
All tests passed

There didn't seem to be a way for selftests to use split irqchip mode
until now, so this adds vm_irqchip_mode modelled on the existing
vm_guest_mode enum.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---

diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 148d427ff24b..01c59bf8b79f 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -122,6 +122,7 @@ TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test
 TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
 TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
 TEST_GEN_PROGS_x86 += x86/xapic_state_test
+TEST_GEN_PROGS_x86 += x86/suppress_eoi_test
 TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test
 TEST_GEN_PROGS_x86 += x86/xss_msr_test
 TEST_GEN_PROGS_x86 += x86/debug_regs
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index d3f3e455c031..c4eb0e95bae9 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -209,6 +209,14 @@ kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
 	shape;					\
 })
 
+enum vm_irqchip_mode {
+	VM_IRQCHIP_AUTO,
+	VM_IRQCHIP_KERNEL,
+	VM_IRQCHIP_SPLIT,
+};
+
+extern enum vm_irqchip_mode vm_irqchip_mode;
+
 #if defined(__aarch64__)
 
 extern enum vm_guest_mode vm_mode_default;
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 1a93d6361671..4858c10f7530 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1687,21 +1687,34 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
 	return (void *) ((uintptr_t) region->host_alias + offset);
 }
 
+enum vm_irqchip_mode vm_irqchip_mode = VM_IRQCHIP_AUTO;
+
 /* Create an interrupt controller chip for the specified VM. */
 void vm_create_irqchip(struct kvm_vm *vm)
 {
 	int r;
 
 	/*
-	 * Allocate a fully in-kernel IRQ chip by default, but fall back to a
-	 * split model (x86 only) if that fails (KVM x86 allows compiling out
-	 * support for KVM_CREATE_IRQCHIP).
+	 * Create IRQ chip based on vm_irqchip_mode:
+	 * - VM_IRQCHIP_AUTO: Try in-kernel, fall back to split if not supported
+	 * - VM_IRQCHIP_KERNEL: Force in-kernel IRQ chip
+	 * - VM_IRQCHIP_SPLIT: Force split IRQ chip (x86 only)
 	 */
-	r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
-	if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP))
+	if (vm_irqchip_mode == VM_IRQCHIP_SPLIT) {
+		TEST_ASSERT(kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP),
+			    "Split IRQ chip not supported");
 		vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);
-	else
+	} else if (vm_irqchip_mode == VM_IRQCHIP_KERNEL) {
+		r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
 		TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm);
+	} else {
+		/* VM_IRQCHIP_AUTO */
+		r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+		if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP))
+			vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);
+		else
+			TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm);
+	}
 
 	vm->has_irqchip = true;
 }
diff --git a/tools/testing/selftests/kvm/x86/suppress_eoi_test.c b/tools/testing/selftests/kvm/x86/suppress_eoi_test.c
new file mode 100644
index 000000000000..ea14690b3116
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/suppress_eoi_test.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test KVM's handling of Suppress EOI Broadcast in x2APIC mode.
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "apic.h"
+
+#define TEST_VECTOR		0xa2
+#define TEST_IRQ		5
+#define APIC_LVR_DIRECTED_EOI	(1 << 24)
+#define APIC_SPIV_DIRECTED_EOI	(1 << 12)
+
+#define APIC_ISR	0x100
+#define APIC_LVTPC	0x340
+#define APIC_LVT0	0x350
+#define APIC_LVT1	0x360
+#define APIC_LVTERR	0x370
+
+#define IOAPIC_BASE_GPA		0xfec00000
+#define IOAPIC_REG_SELECT	0x00
+#define IOAPIC_REG_WINDOW	0x10
+#define IOAPIC_REG_VERSION	0x01
+#define IOAPIC_REG_EOI		0x40
+
+static uint32_t ioapic_version;
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+}
+
+static uint32_t ioapic_read_reg(uint32_t reg)
+{
+	volatile uint32_t *ioapic = (volatile uint32_t *)IOAPIC_BASE_GPA;
+	ioapic[0] = reg;
+	return ioapic[4];
+}
+
+static void ioapic_write_reg(uint32_t reg, uint32_t val)
+{
+	volatile uint32_t *ioapic = (volatile uint32_t *)IOAPIC_BASE_GPA;
+	ioapic[0] = reg;
+	ioapic[4] = val;
+}
+
+static void guest_code(uint64_t use_directed)
+{
+	uint64_t apic_base;
+	uint32_t spiv;
+
+	/* Enable x2APIC mode */
+	apic_base = rdmsr(MSR_IA32_APICBASE);
+	wrmsr(MSR_IA32_APICBASE, apic_base | X2APIC_ENABLE);
+
+	/* Mask all LVT entries to prevent spurious interrupts/NMIs */
+	x2apic_write_reg(APIC_LVTT, APIC_LVT_MASKED);
+	x2apic_write_reg(APIC_LVTPC, APIC_LVT_MASKED);
+	x2apic_write_reg(APIC_LVT0, APIC_LVT_MASKED);
+	x2apic_write_reg(APIC_LVT1, APIC_LVT_MASKED);
+	x2apic_write_reg(APIC_LVTERR, APIC_LVT_MASKED);
+
+	/* Enable APIC */
+	x2apic_write_reg(APIC_SPIV, APIC_SPIV_APIC_ENABLED | TEST_VECTOR);
+
+	/* Read IOAPIC version */
+	ioapic_version = ioapic_read_reg(IOAPIC_REG_VERSION);
+
+	/* Conditionally set APIC_SPIV_DIRECTED_EOI based on flag */
+	if (use_directed) {
+		spiv = x2apic_read_reg(APIC_SPIV);
+		x2apic_write_reg(APIC_SPIV, spiv | APIC_SPIV_DIRECTED_EOI);
+	}
+	spiv = x2apic_read_reg(APIC_SPIV);
+
+	GUEST_SYNC(ioapic_version | (spiv << 16));
+
+	/* Enable interrupts and wait for interrupt to be delivered */
+	asm volatile("sti; hlt");
+
+	/* Write EOI to trigger broadcast (or not) */
+	x2apic_write_reg(APIC_EOI, 0);
+
+	GUEST_SYNC(0);
+
+	/* If IOAPIC v0x20, write directed EOI to clear remote_irr */
+	if ((ioapic_version & 0xff) == 0x20) {
+		ioapic_write_reg(IOAPIC_REG_EOI, TEST_VECTOR);
+		GUEST_SYNC(1);
+	}
+
+	GUEST_DONE();
+}
+
+static void test_suppress_eoi(uint64_t x2apic_flags, bool expect_advertised, bool expect_implemented,
+			      bool use_directed)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct kvm_lapic_state lapic;
+	struct kvm_irqchip chip;
+	struct kvm_irq_level irq_level;
+	struct ucall uc;
+	uint32_t lvr, ioapic_ver, spiv_after;
+	bool remote_irr_set;
+
+	use_directed = use_directed;
+
+	vm = vm_create(1);
+
+	if (x2apic_flags)
+		vm_enable_cap(vm, KVM_CAP_X2APIC_API, x2apic_flags);
+
+	if (!vm->has_irqchip)
+		vm_create_irqchip(vm);
+
+	vcpu = vm_vcpu_add(vm, 0, guest_code);
+	vcpu_args_set(vcpu, 1, use_directed);
+
+	vm_install_exception_handler(vm, TEST_VECTOR, guest_irq_handler);
+
+	/* Map IOAPIC for guest access */
+	virt_map(vm, IOAPIC_BASE_GPA, IOAPIC_BASE_GPA, 1);
+
+	/* Configure level-triggered interrupt in IOAPIC */
+	chip.chip_id = KVM_IRQCHIP_IOAPIC;
+	vm_ioctl(vm, KVM_GET_IRQCHIP, &chip);
+
+	chip.chip.ioapic.redirtbl[TEST_IRQ].fields.vector = TEST_VECTOR;
+	chip.chip.ioapic.redirtbl[TEST_IRQ].fields.delivery_mode = 0; /* fixed */
+	chip.chip.ioapic.redirtbl[TEST_IRQ].fields.dest_mode = 0; /* physical */
+	chip.chip.ioapic.redirtbl[TEST_IRQ].fields.mask = 0; /* unmasked */
+	chip.chip.ioapic.redirtbl[TEST_IRQ].fields.trig_mode = 1; /* level */
+	chip.chip.ioapic.redirtbl[TEST_IRQ].fields.dest_id = vcpu->id;
+
+	vm_ioctl(vm, KVM_SET_IRQCHIP, &chip);
+
+	vcpu_run(vcpu);
+	TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+	ioapic_ver = uc.args[1] & 0xff;
+	spiv_after = (uc.args[1] >> 16) & 0xffff;
+
+	/* Inject level-triggered interrupt */
+	irq_level.irq = TEST_IRQ;
+	irq_level.level = 1;
+	vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
+
+	/* De-assert immediately so we only get one interrupt */
+	irq_level.level = 0;
+	vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
+
+	/* Guest receives interrupt and writes EOI */
+	vcpu_run(vcpu);
+
+	/* Check what ucall we got */
+	int ucall_type = get_ucall(vcpu, &uc);
+
+	/* Handle guest completion based on what it did */
+	if (ucall_type == UCALL_SYNC) {
+		/* Guest has more to do */
+		vcpu_run(vcpu);
+		ucall_type = get_ucall(vcpu, &uc);
+
+		if (ucall_type == UCALL_SYNC) {
+			/* Guest wrote EOIR */
+			vcpu_run(vcpu);
+			TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_DONE);
+		} else {
+			TEST_ASSERT_EQ(ucall_type, UCALL_DONE);
+		}
+	} else {
+		TEST_ASSERT_EQ(ucall_type, UCALL_DONE);
+	}
+
+	/* Check remote_irr after all guest EOI activity */
+	chip.chip_id = KVM_IRQCHIP_IOAPIC;
+	vm_ioctl(vm, KVM_GET_IRQCHIP, &chip);
+	remote_irr_set = chip.chip.ioapic.redirtbl[TEST_IRQ].fields.remote_irr;
+
+	/* De-assert IRQ line */
+	irq_level.level = 0;
+	vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
+
+	/* Check LAPIC LVR */
+	vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic);
+	lvr = *(u32 *)&lapic.regs[APIC_LVR];
+
+	printf("  IOAPIC v0x%x, LVR directed_eoi=%d, SPIV directed_eoi=%d, remote_irr=%d\n",
+	       ioapic_ver, !!(lvr & APIC_LVR_DIRECTED_EOI),
+	       !!(spiv_after & APIC_SPIV_DIRECTED_EOI), remote_irr_set);
+
+	if (expect_advertised) {
+		TEST_ASSERT(lvr & APIC_LVR_DIRECTED_EOI,
+			    "Expected APIC_LVR_DIRECTED_EOI, got LVR=0x%x", lvr);
+	} else {
+		TEST_ASSERT(!(lvr & APIC_LVR_DIRECTED_EOI),
+			    "Expected no APIC_LVR_DIRECTED_EOI, got LVR=0x%x", lvr);
+	}
+
+	/* Check IOAPIC version */
+	if (expect_implemented) {
+		TEST_ASSERT(ioapic_ver == 0x20,
+			    "Expected IOAPIC v0x20 (with EOIR), got 0x%x", ioapic_ver);
+	} else {
+		TEST_ASSERT(ioapic_ver == 0x11,
+			    "Expected IOAPIC v0x11 (no EOIR), got 0x%x", ioapic_ver);
+	}
+
+	/* Check SPIV and remote_irr based on whether guest used directed EOI */
+	if (use_directed) {
+		TEST_ASSERT(spiv_after & APIC_SPIV_DIRECTED_EOI,
+			    "Expected APIC_SPIV_DIRECTED_EOI set, got SPIV=0x%x", spiv_after);
+		TEST_ASSERT(remote_irr_set,
+			    "Expected remote_irr set (EOI suppressed), got cleared");
+	} else {
+		TEST_ASSERT(!(spiv_after & APIC_SPIV_DIRECTED_EOI),
+			    "Expected APIC_SPIV_DIRECTED_EOI clear, got SPIV=0x%x", spiv_after);
+		TEST_ASSERT(!remote_irr_set,
+			    "Expected remote_irr cleared (EOI broadcast), got set");
+	}
+
+	kvm_vm_free(vm);
+}
+
+static void guest_code_split(uint64_t use_directed)
+{
+	uint64_t apic_base;
+	uint32_t spiv;
+
+	/* Enable x2APIC mode */
+	apic_base = rdmsr(MSR_IA32_APICBASE);
+	wrmsr(MSR_IA32_APICBASE, apic_base | X2APIC_ENABLE);
+
+	/* Mask all LVT entries */
+	x2apic_write_reg(APIC_LVTT, APIC_LVT_MASKED);
+	x2apic_write_reg(APIC_LVTPC, APIC_LVT_MASKED);
+	x2apic_write_reg(APIC_LVT0, APIC_LVT_MASKED);
+	x2apic_write_reg(APIC_LVT1, APIC_LVT_MASKED);
+	x2apic_write_reg(APIC_LVTERR, APIC_LVT_MASKED);
+
+	/* Enable APIC */
+	x2apic_write_reg(APIC_SPIV, APIC_SPIV_APIC_ENABLED | TEST_VECTOR);
+
+	/* Conditionally set APIC_SPIV_DIRECTED_EOI */
+	if (use_directed) {
+		spiv = x2apic_read_reg(APIC_SPIV);
+		x2apic_write_reg(APIC_SPIV, spiv | APIC_SPIV_DIRECTED_EOI);
+	}
+	spiv = x2apic_read_reg(APIC_SPIV);
+
+	GUEST_SYNC(spiv);
+
+	/* Enable interrupts and wait for interrupt */
+	asm volatile("sti; hlt");
+
+	/* Write EOI */
+	x2apic_write_reg(APIC_EOI, 0);
+
+	GUEST_DONE();
+}
+
+static void test_suppress_eoi_split(uint64_t x2apic_flags, bool expect_advertised, bool use_directed)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct kvm_lapic_state lapic;
+	struct ucall uc;
+	uint32_t lvr, spiv_after;
+	bool got_eoi_exit;
+	enum vm_irqchip_mode saved_mode = vm_irqchip_mode;
+
+	vm_irqchip_mode = VM_IRQCHIP_SPLIT;
+	vm = vm_create(1);
+	vm_irqchip_mode = saved_mode;
+
+	if (x2apic_flags)
+		vm_enable_cap(vm, KVM_CAP_X2APIC_API, x2apic_flags);
+
+	vcpu = vm_vcpu_add(vm, 0, guest_code_split);
+	vcpu_args_set(vcpu, 1, use_directed);
+
+	/* Set up IRQ routing so kernel knows userspace IOAPIC handles TEST_IRQ */
+	struct kvm_irq_routing *routing = calloc(1, sizeof(*routing) + sizeof(routing->entries[0]));
+	routing->nr = 1;
+	routing->entries[0].gsi = TEST_IRQ;
+	routing->entries[0].type = KVM_IRQ_ROUTING_MSI;
+	routing->entries[0].u.msi.address_lo = 0xfee00000;  /* Dest ID 0 */
+	routing->entries[0].u.msi.address_hi = 0;
+	routing->entries[0].u.msi.data = TEST_VECTOR | (1 << 15);  /* Level-triggered */
+	__vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+	free(routing);
+
+	vm_install_exception_handler(vm, TEST_VECTOR, guest_irq_handler);
+
+	vcpu_run(vcpu);
+	TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+	spiv_after = uc.args[1];
+
+	/* Inject via GSI (which routes to MSI) */
+	struct kvm_irq_level irq_level = {
+		.irq = TEST_IRQ,
+		.level = 1,
+	};
+	vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
+	irq_level.level = 0;
+	vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
+
+	/* Guest receives interrupt and writes EOI */
+	vcpu_run(vcpu);
+
+
+	/* Check if we got KVM_EXIT_IOAPIC_EOI */
+	got_eoi_exit = (vcpu->run->exit_reason == KVM_EXIT_IOAPIC_EOI &&
+			vcpu->run->eoi.vector == TEST_VECTOR);
+
+	/* If we got EOI exit, continue guest to finish */
+	if (got_eoi_exit) {
+		vcpu_run(vcpu);
+	}
+
+	/* Let guest finish */
+	int ucall_type = get_ucall(vcpu, &uc);
+	if (ucall_type == UCALL_SYNC) {
+		vcpu_run(vcpu);
+		ucall_type = get_ucall(vcpu, &uc);
+		if (ucall_type == UCALL_SYNC) {
+			vcpu_run(vcpu);
+			TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_DONE);
+		} else {
+			TEST_ASSERT_EQ(ucall_type, UCALL_DONE);
+		}
+	} else {
+		TEST_ASSERT_EQ(ucall_type, UCALL_DONE);
+	}
+
+	/* Check LAPIC LVR */
+	vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic);
+	lvr = *(u32 *)&lapic.regs[APIC_LVR];
+
+	printf("  Split IRQCHIP: LVR directed_eoi=%d, SPIV directed_eoi=%d, got_eoi_exit=%d\n",
+	       !!(lvr & APIC_LVR_DIRECTED_EOI),
+	       !!(spiv_after & APIC_SPIV_DIRECTED_EOI), got_eoi_exit);
+
+	if (expect_advertised) {
+		TEST_ASSERT(lvr & APIC_LVR_DIRECTED_EOI,
+			    "Expected APIC_LVR_DIRECTED_EOI, got LVR=0x%x", lvr);
+	} else {
+		TEST_ASSERT(!(lvr & APIC_LVR_DIRECTED_EOI),
+			    "Expected no APIC_LVR_DIRECTED_EOI, got LVR=0x%x", lvr);
+	}
+
+	/* Check EOI exit based on whether guest used directed EOI */
+	if (use_directed) {
+		TEST_ASSERT(spiv_after & APIC_SPIV_DIRECTED_EOI,
+			    "Expected APIC_SPIV_DIRECTED_EOI set, got SPIV=0x%x", spiv_after);
+		TEST_ASSERT(!got_eoi_exit,
+			    "Expected no EOI exit (suppressed), but got one");
+	} else {
+		TEST_ASSERT(!(spiv_after & APIC_SPIV_DIRECTED_EOI),
+			    "Expected APIC_SPIV_DIRECTED_EOI clear, got SPIV=0x%x", spiv_after);
+		if (expect_advertised) {
+			/* Quirk mode: advertised but should still broadcast */
+			if (!got_eoi_exit) {
+				printf("  Note: No EOI exit in quirk mode (old kernel behavior)\n");
+			}
+		} else {
+			/* Feature not advertised, no EOI exits expected */
+		}
+	}
+
+	kvm_vm_free(vm);
+}
+
+int main(void)
+{
+	int cap;
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_X2APIC_API));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_IRQCHIP));
+
+	cap = kvm_check_cap(KVM_CAP_X2APIC_API);
+
+	/*
+	 * Test that KVM correctly handles the suppress EOI broadcast flags.
+	 * Note: The actual behavior depends on the kernel implementation.
+	 * This test documents the expected behavior per the commit messages.
+	 *
+	 * Quirk mode: Don't advertise or implement (legacy behavior)
+	 * Explicit enable: Advertise and implement
+	 * Explicit disable: Don't advertise or implement
+	 */
+
+	printf("Testing quirk mode (no flags)...\n");
+	test_suppress_eoi(0, false, false, false);
+
+	if (cap & KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST) {
+		printf("Testing explicit enable...\n");
+		test_suppress_eoi(KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST, true, true, true);
+
+		printf("Testing explicit enable (guest doesn't use)...\n");
+		test_suppress_eoi(KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST, true, true, false);
+	} else {
+		printf("Skipping explicit enable (not supported)...\n");
+	}
+
+	if (cap & KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST) {
+		printf("Testing explicit disable...\n");
+		test_suppress_eoi(KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST, false, false, false);
+	} else {
+		printf("Skipping explicit disable (not supported)...\n");
+	}
+
+	printf("All tests passed\n");
+
+	/* Test split irqchip mode */
+	printf("\n=== Testing split IRQCHIP mode ===\n");
+
+	printf("Testing quirk mode (no flags)...\n");
+	test_suppress_eoi_split(0, true, false);  /* Quirk: advertised in split mode */
+
+	if (cap & KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST) {
+		printf("Testing explicit enable...\n");
+		test_suppress_eoi_split(KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST, true, true);
+
+		printf("Testing explicit enable (guest doesn't use)...\n");
+		test_suppress_eoi_split(KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST, true, false);
+	} else {
+		printf("Skipping explicit enable (not supported)...\n");
+	}
+
+	if (cap & KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST) {
+		printf("Testing explicit disable...\n");
+		test_suppress_eoi_split(KVM_X2APIC_DISABLE_SUPPRESS_EOI_BROADCAST, false, false);
+	} else {
+		printf("Skipping explicit disable (not supported)...\n");
+	}
+
+	printf("All tests passed\n");
+	return 0;
+}
+

Re: [PATCH v5 4/3] KVM: selftests: Add test cases for EOI suppression modes
Posted by Sean Christopherson 1 week, 4 days ago
On Wed, Jan 28, 2026, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> Rather than being frightened of doing the right thing for the in-kernel
> I/O APIC because "there might be bugs", 

I'm not worried about bugs per se, I'm worried about breaking existing guests.
Even if KVM is 100% perfect, changes in behavior can still break guests,
especially for a feature like this where it seems like everyone got it wrong.

And as I said before, I'm not opposed to supporting directed EOI in the in-kernel
I/O APIC, but (a) I don't want to do it in conjunction with the fixes for stable@,
and (b) I'd prefer to not bother unless there's an actual use case for doing so.
The in-kernel I/O APIC isn't being deprecated, but AFAIK it's being de-prioritized
by pretty much every VMM.  I.e. the risk vs. reward isn't there for me.
Re: [PATCH v5 4/3] KVM: selftests: Add test cases for EOI suppression modes
Posted by David Woodhouse 1 week, 4 days ago
On Thu, 2026-01-29 at 07:19 -0800, Sean Christopherson wrote:
> On Wed, Jan 28, 2026, David Woodhouse wrote:
> > From: David Woodhouse <dwmw@amazon.co.uk>
> > 
> > Rather than being frightened of doing the right thing for the in-kernel
> > I/O APIC because "there might be bugs", 
> 
> I'm not worried about bugs per se, I'm worried about breaking existing guests.
> Even if KVM is 100% perfect, changes in behavior can still break guests,
> especially for a feature like this where it seems like everyone got it wrong.

There's the potential for guest bugs when the local APIC actually
starts honouring the DIRECTED_EOI bit in the SPIV register, sure. At
that point, the guest *has* to do the direct EOI (and it has to work).

But that's why we kept the 'quirk' mode as the default unless userspace
explicitly opts in. And it's true for the split-irqchip too; fixing the
behaviour is the whole point of this exercise.

I don't see why supporting precisely the same behaviour in the kernel
irqchip is any different in that respect.

> And as I said before, I'm not opposed to supporting directed EOI in the in-kernel
> I/O APIC, but (a) I don't want to do it in conjunction with the fixes for stable@,
> and (b) I'd prefer to not bother unless there's an actual use case for doing so.
> The in-kernel I/O APIC isn't being deprecated, but AFAIK it's being de-prioritized
> by pretty much every VMM.  I.e. the risk vs. reward isn't there for me.

I tend to favour the simplicity, with _ENABLE and _DISABLE just quietly
doing what their name implies without any of that nonsense about
"except if you have a kernel irqchip".

But as you wish. Most of this test case should be fine on v6 of the
patch which dropped in-kernel I/O APIC support. All the tests are
conditional on the corresponding support being advertised, so it just
needs updating to correctly detect the in-kernel _ENABLE support in
case that does get added. How did we say we would advertise that?
Re: [PATCH v5 4/3] KVM: selftests: Add test cases for EOI suppression modes
Posted by Sean Christopherson 5 days, 19 hours ago
On Thu, Jan 29, 2026, David Woodhouse wrote:
> On Thu, 2026-01-29 at 07:19 -0800, Sean Christopherson wrote:
> > On Wed, Jan 28, 2026, David Woodhouse wrote:
> > > From: David Woodhouse <dwmw@amazon.co.uk>
> > > 
> > > Rather than being frightened of doing the right thing for the in-kernel
> > > I/O APIC because "there might be bugs", 
> > 
> > I'm not worried about bugs per se, I'm worried about breaking existing guests.
> > Even if KVM is 100% perfect, changes in behavior can still break guests,
> > especially for a feature like this where it seems like everyone got it wrong.
> 
> There's the potential for guest bugs when the local APIC actually
> starts honouring the DIRECTED_EOI bit in the SPIV register, sure. At
> that point, the guest *has* to do the direct EOI (and it has to work).
> 
> But that's why we kept the 'quirk' mode as the default unless userspace
> explicitly opts in. And it's true for the split-irqchip too; fixing the
> behaviour is the whole point of this exercise.
> 
> I don't see why supporting precisely the same behaviour in the kernel
> irqchip is any different in that respect.

Conceptually, nothing.  But fixing the in-kernel I/O APIC is more invasive, it's
not currently broken (KVM doesn't advertise DIRECTED_EOI or SUPPRESS_EOI_BROADCAST),
no one is lining up to actually utilizes the functionality, *and* there are some
historical warts in KVM that need to be addressed.

Add it all up, and for me at least, the risk vs. reward is very different for
split vs. fully in-kernel irqchips.

> > And as I said before, I'm not opposed to supporting directed EOI in the in-kernel
> > I/O APIC, but (a) I don't want to do it in conjunction with the fixes for stable@,
> > and (b) I'd prefer to not bother unless there's an actual use case for doing so.
> > The in-kernel I/O APIC isn't being deprecated, but AFAIK it's being de-prioritized
> > by pretty much every VMM.  I.e. the risk vs. reward isn't there for me.
> 
> I tend to favour the simplicity, with _ENABLE and _DISABLE just quietly
> doing what their name implies without any of that nonsense about
> "except if you have a kernel irqchip".

But they _don't_ do what their name implies if there's no in-kernel local APIC.
I.e. userspace needs to read the docs and do the right thing anyways.

> But as you wish. Most of this test case should be fine on v6 of the
> patch which dropped in-kernel I/O APIC support. All the tests are
> conditional on the corresponding support being advertised, so it just
> needs updating to correctly detect the in-kernel _ENABLE support in
> case that does get added. How did we say we would advertise that?

A doc update plus this:

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 67e666921a12..d711493f9c69 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4934,7 +4934,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                break;
        case KVM_CAP_X2APIC_API:
                r = KVM_X2APIC_API_VALID_FLAGS;
-               if (kvm && !irqchip_split(kvm))
+               if (kvm && !irqchip_in_kernel(kvm))
                        r &= ~KVM_X2APIC_ENABLE_SUPPRESS_EOI_BROADCAST;
                break;
        case KVM_CAP_NESTED_STATE: