Add read() and write() APIC callback functions to read and write x2APIC
registers directly from the guest APIC backing page of a vCPU.
The x2APIC registers are mapped at an offset within the guest APIC
backing page which is same as their x2APIC MMIO offset. Secure AVIC
adds new registers such as ALLOWED_IRRs (which are at 4-byte offset
within the IRR register offset range) and NMI_REQ to the APIC register
space.
When Secure AVIC is enabled, guest's rdmsr/wrmsr of APIC registers
result in VC exception (for non-accelerated register accesses) with
error code VMEXIT_AVIC_NOACCEL. The VC exception handler can read/write
the x2APIC register in the guest APIC backing page to complete the
rdmsr/wrmsr. Since doing this would increase the latency of accessing
x2APIC registers, instead of doing rdmsr/wrmsr based reg accesses
and handling reads/writes in VC exception, directly read/write APIC
registers from/to the guest APIC backing page of the vCPU in read()
and write() callbacks of the Secure AVIC APIC driver.
Co-developed-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Signed-off-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Reviewed-by: Tianyu Lan <tiala@microsoft.com>
Signed-off-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
Changes since v8:
- Added Tianyu's Reviewed-by.
- Code cleanup to use struct secure_avic_page pointer.
arch/x86/include/asm/apicdef.h | 2 +
arch/x86/kernel/apic/x2apic_savic.c | 113 +++++++++++++++++++++++++++-
2 files changed, 113 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 094106b6a538..be39a543fbe5 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -135,6 +135,8 @@
#define APIC_TDR_DIV_128 0xA
#define APIC_EFEAT 0x400
#define APIC_ECTRL 0x410
+#define APIC_SEOI 0x420
+#define APIC_IER 0x480
#define APIC_EILVTn(n) (0x500 + 0x10 * n)
#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */
#define APIC_EILVT_NR_AMD_10H 4
diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c
index 1c70b7c111f0..86a522685230 100644
--- a/arch/x86/kernel/apic/x2apic_savic.c
+++ b/arch/x86/kernel/apic/x2apic_savic.c
@@ -9,6 +9,7 @@
#include <linux/cc_platform.h>
#include <linux/percpu-defs.h>
+#include <linux/align.h>
#include <asm/apic.h>
#include <asm/sev.h>
@@ -26,6 +27,114 @@ static int savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC);
}
+#define SAVIC_ALLOWED_IRR 0x204
+
+static u32 savic_read(u32 reg)
+{
+ void *ap = this_cpu_ptr(secure_avic_page);
+
+ /*
+ * When Secure AVIC is enabled, rdmsr/wrmsr of APIC registers
+ * result in VC exception (for non-accelerated register accesses)
+ * with VMEXIT_AVIC_NOACCEL error code. The VC exception handler
+ * can read/write the x2APIC register in the guest APIC backing page.
+ * Since doing this would increase the latency of accessing x2APIC
+ * registers, instead of doing rdmsr/wrmsr based accesses and
+ * handling apic register reads/writes in VC exception, the read()
+ * and write() callbacks directly read/write APIC register from/to
+ * the vCPU APIC backing page.
+ */
+ switch (reg) {
+ case APIC_LVTT:
+ case APIC_TMICT:
+ case APIC_TMCCT:
+ case APIC_TDCR:
+ case APIC_ID:
+ case APIC_LVR:
+ case APIC_TASKPRI:
+ case APIC_ARBPRI:
+ case APIC_PROCPRI:
+ case APIC_LDR:
+ case APIC_SPIV:
+ case APIC_ESR:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTERR:
+ case APIC_EFEAT:
+ case APIC_ECTRL:
+ case APIC_SEOI:
+ case APIC_IER:
+ case APIC_EILVTn(0) ... APIC_EILVTn(3):
+ return apic_get_reg(ap, reg);
+ case APIC_ICR:
+ return (u32) apic_get_reg64(ap, reg);
+ case APIC_ISR ... APIC_ISR + 0x70:
+ case APIC_TMR ... APIC_TMR + 0x70:
+ if (WARN_ONCE(!IS_ALIGNED(reg, 16),
+ "APIC reg read offset 0x%x not aligned at 16 bytes", reg))
+ return 0;
+ return apic_get_reg(ap, reg);
+ /* IRR and ALLOWED_IRR offset range */
+ case APIC_IRR ... APIC_IRR + 0x74:
+ /*
+ * Either aligned at 16 bytes for valid IRR reg offset or a
+ * valid Secure AVIC ALLOWED_IRR offset.
+ */
+ if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
+ IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)),
+ "Misaligned IRR/ALLOWED_IRR APIC reg read offset 0x%x", reg))
+ return 0;
+ return apic_get_reg(ap, reg);
+ default:
+ pr_err("Permission denied: read of Secure AVIC reg offset 0x%x\n", reg);
+ return 0;
+ }
+}
+
+#define SAVIC_NMI_REQ 0x278
+
+static void savic_write(u32 reg, u32 data)
+{
+ void *ap = this_cpu_ptr(secure_avic_page);
+
+ switch (reg) {
+ case APIC_LVTT:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_TMICT:
+ case APIC_TDCR:
+ case APIC_SELF_IPI:
+ case APIC_TASKPRI:
+ case APIC_EOI:
+ case APIC_SPIV:
+ case SAVIC_NMI_REQ:
+ case APIC_ESR:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVTERR:
+ case APIC_ECTRL:
+ case APIC_SEOI:
+ case APIC_IER:
+ case APIC_EILVTn(0) ... APIC_EILVTn(3):
+ apic_set_reg(ap, reg, data);
+ break;
+ case APIC_ICR:
+ apic_set_reg64(ap, reg, (u64) data);
+ break;
+ /* ALLOWED_IRR offsets are writable */
+ case SAVIC_ALLOWED_IRR ... SAVIC_ALLOWED_IRR + 0x70:
+ if (IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)) {
+ apic_set_reg(ap, reg, data);
+ break;
+ }
+ fallthrough;
+ default:
+ pr_err("Permission denied: write to Secure AVIC reg offset 0x%x\n", reg);
+ }
+}
+
static void savic_setup(void)
{
void *ap = this_cpu_ptr(secure_avic_page);
@@ -88,8 +197,8 @@ static struct apic apic_x2apic_savic __ro_after_init = {
.nmi_to_offline_cpu = true,
- .read = native_apic_msr_read,
- .write = native_apic_msr_write,
+ .read = savic_read,
+ .write = savic_write,
.eoi = native_apic_msr_eoi,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
--
2.34.1
On Mon, Aug 11, 2025 at 03:14:29PM +0530, Neeraj Upadhyay wrote: > Add read() and write() APIC callback functions to read and write x2APIC > registers directly from the guest APIC backing page of a vCPU. > > The x2APIC registers are mapped at an offset within the guest APIC > backing page which is same as their x2APIC MMIO offset. Secure AVIC > adds new registers such as ALLOWED_IRRs (which are at 4-byte offset > within the IRR register offset range) and NMI_REQ to the APIC register > space. > > When Secure AVIC is enabled, guest's rdmsr/wrmsr of APIC registers > result in VC exception (for non-accelerated register accesses) with s/VC/#VC/g since you're talking about an exception vector. > error code VMEXIT_AVIC_NOACCEL. The VC exception handler can read/write > the x2APIC register in the guest APIC backing page to complete the > rdmsr/wrmsr. All x86 insns in caps pls: RDMSR/WRMSR. > +static u32 savic_read(u32 reg) > +{ > + void *ap = this_cpu_ptr(secure_avic_page); > + > + /* > + * When Secure AVIC is enabled, rdmsr/wrmsr of APIC registers > + * result in VC exception (for non-accelerated register accesses) > + * with VMEXIT_AVIC_NOACCEL error code. The VC exception handler > + * can read/write the x2APIC register in the guest APIC backing page. > + * Since doing this would increase the latency of accessing x2APIC > + * registers, instead of doing rdmsr/wrmsr based accesses and > + * handling apic register reads/writes in VC exception, the read() s/apic/APIC/g Please be consistent across the whole set. Acronyms are in all caps. Insn names too. > + * and write() callbacks directly read/write APIC register from/to > + * the vCPU APIC backing page. > + */ Move that comment above the function. And also split it in paragraphs: when it becomes more than 4-5 lines, split the next one with a new line. > + switch (reg) { > + case APIC_LVTT: > + case APIC_TMICT: > + case APIC_TMCCT: > + case APIC_TDCR: > + case APIC_ID: > + case APIC_LVR: > + case APIC_TASKPRI: > + case APIC_ARBPRI: > + case APIC_PROCPRI: > + case APIC_LDR: > + case APIC_SPIV: > + case APIC_ESR: > + case APIC_LVTTHMR: > + case APIC_LVTPC: > + case APIC_LVT0: > + case APIC_LVT1: > + case APIC_LVTERR: > + case APIC_EFEAT: > + case APIC_ECTRL: > + case APIC_SEOI: > + case APIC_IER: > + case APIC_EILVTn(0) ... APIC_EILVTn(3): > + return apic_get_reg(ap, reg); > + case APIC_ICR: > + return (u32) apic_get_reg64(ap, reg); ^ no need for that space. > + case APIC_ISR ... APIC_ISR + 0x70: > + case APIC_TMR ... APIC_TMR + 0x70: > + if (WARN_ONCE(!IS_ALIGNED(reg, 16), > + "APIC reg read offset 0x%x not aligned at 16 bytes", reg)) > + return 0; > + return apic_get_reg(ap, reg); > + /* IRR and ALLOWED_IRR offset range */ > + case APIC_IRR ... APIC_IRR + 0x74: > + /* > + * Either aligned at 16 bytes for valid IRR reg offset or a > + * valid Secure AVIC ALLOWED_IRR offset. > + */ > + if (WARN_ONCE(!(IS_ALIGNED(reg, 16) || > + IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)), > + "Misaligned IRR/ALLOWED_IRR APIC reg read offset 0x%x", reg)) What is that second thing supposed to catch? reg can be aligned to 16 but reg - SAVIC_ALLOWED_IRR cannot be? I can't follow the comment... perhaps write it out and not try to be clever. > + return 0; > + return apic_get_reg(ap, reg); > + default: > + pr_err("Permission denied: read of Secure AVIC reg offset 0x%x\n", reg); Permission denied? "Error reading unknown Secure AVIC reg offset..." I'd say. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 8/18/2025 4:56 PM, Borislav Petkov wrote: > On Mon, Aug 11, 2025 at 03:14:29PM +0530, Neeraj Upadhyay wrote: >> Add read() and write() APIC callback functions to read and write x2APIC >> registers directly from the guest APIC backing page of a vCPU. >> >> The x2APIC registers are mapped at an offset within the guest APIC >> backing page which is same as their x2APIC MMIO offset. Secure AVIC >> adds new registers such as ALLOWED_IRRs (which are at 4-byte offset >> within the IRR register offset range) and NMI_REQ to the APIC register >> space. >> >> When Secure AVIC is enabled, guest's rdmsr/wrmsr of APIC registers >> result in VC exception (for non-accelerated register accesses) with > > s/VC/#VC/g since you're talking about an exception vector. > Ok >> error code VMEXIT_AVIC_NOACCEL. The VC exception handler can read/write >> the x2APIC register in the guest APIC backing page to complete the >> rdmsr/wrmsr. > > All x86 insns in caps pls: RDMSR/WRMSR. > Ok >> +static u32 savic_read(u32 reg) >> +{ >> + void *ap = this_cpu_ptr(secure_avic_page); >> + >> + /* >> + * When Secure AVIC is enabled, rdmsr/wrmsr of APIC registers >> + * result in VC exception (for non-accelerated register accesses) >> + * with VMEXIT_AVIC_NOACCEL error code. The VC exception handler >> + * can read/write the x2APIC register in the guest APIC backing page. >> + * Since doing this would increase the latency of accessing x2APIC >> + * registers, instead of doing rdmsr/wrmsr based accesses and >> + * handling apic register reads/writes in VC exception, the read() > > s/apic/APIC/g > > Please be consistent across the whole set. Acronyms are in all caps. Insn > names too. > Ok >> + * and write() callbacks directly read/write APIC register from/to >> + * the vCPU APIC backing page. >> + */ > > Move that comment above the function. And also split it in paragraphs: when it > becomes more than 4-5 lines, split the next one with a new line. > Ok >> + switch (reg) { >> + case APIC_LVTT: >> + case APIC_TMICT: >> + case APIC_TMCCT: >> + case APIC_TDCR: >> + case APIC_ID: >> + case APIC_LVR: >> + case APIC_TASKPRI: >> + case APIC_ARBPRI: >> + case APIC_PROCPRI: >> + case APIC_LDR: >> + case APIC_SPIV: >> + case APIC_ESR: >> + case APIC_LVTTHMR: >> + case APIC_LVTPC: >> + case APIC_LVT0: >> + case APIC_LVT1: >> + case APIC_LVTERR: >> + case APIC_EFEAT: >> + case APIC_ECTRL: >> + case APIC_SEOI: >> + case APIC_IER: >> + case APIC_EILVTn(0) ... APIC_EILVTn(3): >> + return apic_get_reg(ap, reg); >> + case APIC_ICR: >> + return (u32) apic_get_reg64(ap, reg); > ^ > > no need for that space. > Ok, will remove. >> + case APIC_ISR ... APIC_ISR + 0x70: >> + case APIC_TMR ... APIC_TMR + 0x70: >> + if (WARN_ONCE(!IS_ALIGNED(reg, 16), >> + "APIC reg read offset 0x%x not aligned at 16 bytes", reg)) >> + return 0; >> + return apic_get_reg(ap, reg); >> + /* IRR and ALLOWED_IRR offset range */ >> + case APIC_IRR ... APIC_IRR + 0x74: >> + /* >> + * Either aligned at 16 bytes for valid IRR reg offset or a >> + * valid Secure AVIC ALLOWED_IRR offset. >> + */ >> + if (WARN_ONCE(!(IS_ALIGNED(reg, 16) || >> + IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)), >> + "Misaligned IRR/ALLOWED_IRR APIC reg read offset 0x%x", reg)) > > What is that second thing supposed to catch? > > reg can be aligned to 16 but reg - SAVIC_ALLOWED_IRR cannot be? > APIC_IRR register offsets are: #Offset #bits Description 0x200 31:0 vectors 0-31 0x210 31:0 vectors 32-63 ... 0x270 31:0 vectors 224-255 ALLOWED_IRR register offsets are: #Offset #bits Description 0x204 31:0 vectors 0-31 0x214 31:0 vectors 32-63 ... 0x274 31:0 vectors 224-255 IS_ALIGNED(reg, 16) is when 'reg' is an APIC_IRR register, which are 16 byte aligned. IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16) is for the case when 'reg' is a SAVIC_ALLOWED_IRR register. which are at 16 byte strides from the SAVIC_ALLOWED_IRR base offset. Expected values of (reg - SAVIC_ALLOWED_IRR) are 0, 0x10, 0x20, ..., 0x70. If both checks fail, that is a invalid offset ('!' is on the final ORed value). !(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)) > I can't follow the comment... perhaps write it out and not try to be clever. > Maybe change it to below? /* * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides * from their respective base offset. */ if (WARN_ONCE(!(IS_ALIGNED(reg - APIC_IRR, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)), "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg)) >> + return 0; >> + return apic_get_reg(ap, reg); >> + default: >> + pr_err("Permission denied: read of Secure AVIC reg offset 0x%x\n", reg); > > Permission denied? > > "Error reading unknown Secure AVIC reg offset..." > > I'd say. > Ok, yes it is clearer. - Neeraj
On Tue, Aug 19, 2025 at 09:45:02AM +0530, Upadhyay, Neeraj wrote: > Maybe change it to below? > > /* > * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides > * from their respective base offset. > */ > > if (WARN_ONCE(!(IS_ALIGNED(reg - APIC_IRR, 16) || > IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)), > "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset > 0x%x", > reg)) Let's beef that up some more with a crystal-clear explanation what is going on here so that readers don't have to stop and stare for 5 mins before they grok what this is doing: /* * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from * their respective base offset. APIC_IRRs are in the range * * (0x200, 0x210, ..., 0x270) * * while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the rangea * * (0x204, 0x214, ..., 0x274). * * Filter out everything else. */ if (WARN_ONCE(!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - 4, 16)), "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg)); Thx. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 8/19/2025 8:02 PM, Borislav Petkov wrote: > On Tue, Aug 19, 2025 at 09:45:02AM +0530, Upadhyay, Neeraj wrote: >> Maybe change it to below? >> >> /* >> * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides >> * from their respective base offset. >> */ >> >> if (WARN_ONCE(!(IS_ALIGNED(reg - APIC_IRR, 16) || >> IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)), >> "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset >> 0x%x", >> reg)) > > Let's beef that up some more with a crystal-clear explanation what is going on > here so that readers don't have to stop and stare for 5 mins before they grok > what this is doing: > > /* > * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from > * their respective base offset. APIC_IRRs are in the range > * > * (0x200, 0x210, ..., 0x270) > * > * while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the rangea > * > * (0x204, 0x214, ..., 0x274). > * > * Filter out everything else. > */ > if (WARN_ONCE(!(IS_ALIGNED(reg, 16) || > IS_ALIGNED(reg - 4, 16)), > "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg)); > Ok, looks good. Thanks! - Neeraj
© 2016 - 2025 Red Hat, Inc.