Add read() and write() APIC callback functions to read and write x2APIC
registers directly from the guest APIC backing page of a vCPU.
The x2APIC registers are mapped at an offset within the guest APIC
backing page which is same as their x2APIC MMIO offset. Secure AVIC
adds new registers such as ALLOWED_IRRs (which are at 4-byte offset
within the IRR register offset range) and NMI_REQ to the APIC register
space.
When Secure AVIC is enabled, guest's rdmsr/wrmsr of APIC registers
result in VC exception (for non-accelerated register accesses) with
error code VMEXIT_AVIC_NOACCEL. The VC exception handler can read/write
the x2APIC register in the guest APIC backing page to complete the
rdmsr/wrmsr. Since doing this would increase the latency of accessing
x2APIC registers, instead of doing rdmsr/wrmsr based reg accesses
and handling reads/writes in VC exception, directly read/write APIC
registers from/to the guest APIC backing page of the vCPU in read()
and write() callbacks of the Secure AVIC APIC driver.
Co-developed-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Signed-off-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Reviewed-by: Tianyu Lan <tiala@microsoft.com>
Signed-off-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
Changes since v8:
- Added Tianyu's Reviewed-by.
- Code cleanup to use struct secure_avic_page pointer.
arch/x86/include/asm/apicdef.h | 2 +
arch/x86/kernel/apic/x2apic_savic.c | 113 +++++++++++++++++++++++++++-
2 files changed, 113 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 094106b6a538..be39a543fbe5 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -135,6 +135,8 @@
#define APIC_TDR_DIV_128 0xA
#define APIC_EFEAT 0x400
#define APIC_ECTRL 0x410
+#define APIC_SEOI 0x420
+#define APIC_IER 0x480
#define APIC_EILVTn(n) (0x500 + 0x10 * n)
#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */
#define APIC_EILVT_NR_AMD_10H 4
diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c
index 1c70b7c111f0..86a522685230 100644
--- a/arch/x86/kernel/apic/x2apic_savic.c
+++ b/arch/x86/kernel/apic/x2apic_savic.c
@@ -9,6 +9,7 @@
#include <linux/cc_platform.h>
#include <linux/percpu-defs.h>
+#include <linux/align.h>
#include <asm/apic.h>
#include <asm/sev.h>
@@ -26,6 +27,114 @@ static int savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC);
}
+#define SAVIC_ALLOWED_IRR 0x204
+
+static u32 savic_read(u32 reg)
+{
+ void *ap = this_cpu_ptr(secure_avic_page);
+
+ /*
+ * When Secure AVIC is enabled, rdmsr/wrmsr of APIC registers
+ * result in VC exception (for non-accelerated register accesses)
+ * with VMEXIT_AVIC_NOACCEL error code. The VC exception handler
+ * can read/write the x2APIC register in the guest APIC backing page.
+ * Since doing this would increase the latency of accessing x2APIC
+ * registers, instead of doing rdmsr/wrmsr based accesses and
+ * handling apic register reads/writes in VC exception, the read()
+ * and write() callbacks directly read/write APIC register from/to
+ * the vCPU APIC backing page.
+ */
+ switch (reg) {
+ case APIC_LVTT:
+ case APIC_TMICT:
+ case APIC_TMCCT:
+ case APIC_TDCR:
+ case APIC_ID:
+ case APIC_LVR:
+ case APIC_TASKPRI:
+ case APIC_ARBPRI:
+ case APIC_PROCPRI:
+ case APIC_LDR:
+ case APIC_SPIV:
+ case APIC_ESR:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTERR:
+ case APIC_EFEAT:
+ case APIC_ECTRL:
+ case APIC_SEOI:
+ case APIC_IER:
+ case APIC_EILVTn(0) ... APIC_EILVTn(3):
+ return apic_get_reg(ap, reg);
+ case APIC_ICR:
+ return (u32) apic_get_reg64(ap, reg);
+ case APIC_ISR ... APIC_ISR + 0x70:
+ case APIC_TMR ... APIC_TMR + 0x70:
+ if (WARN_ONCE(!IS_ALIGNED(reg, 16),
+ "APIC reg read offset 0x%x not aligned at 16 bytes", reg))
+ return 0;
+ return apic_get_reg(ap, reg);
+ /* IRR and ALLOWED_IRR offset range */
+ case APIC_IRR ... APIC_IRR + 0x74:
+ /*
+ * Either aligned at 16 bytes for valid IRR reg offset or a
+ * valid Secure AVIC ALLOWED_IRR offset.
+ */
+ if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
+ IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)),
+ "Misaligned IRR/ALLOWED_IRR APIC reg read offset 0x%x", reg))
+ return 0;
+ return apic_get_reg(ap, reg);
+ default:
+ pr_err("Permission denied: read of Secure AVIC reg offset 0x%x\n", reg);
+ return 0;
+ }
+}
+
+#define SAVIC_NMI_REQ 0x278
+
+static void savic_write(u32 reg, u32 data)
+{
+ void *ap = this_cpu_ptr(secure_avic_page);
+
+ switch (reg) {
+ case APIC_LVTT:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_TMICT:
+ case APIC_TDCR:
+ case APIC_SELF_IPI:
+ case APIC_TASKPRI:
+ case APIC_EOI:
+ case APIC_SPIV:
+ case SAVIC_NMI_REQ:
+ case APIC_ESR:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVTERR:
+ case APIC_ECTRL:
+ case APIC_SEOI:
+ case APIC_IER:
+ case APIC_EILVTn(0) ... APIC_EILVTn(3):
+ apic_set_reg(ap, reg, data);
+ break;
+ case APIC_ICR:
+ apic_set_reg64(ap, reg, (u64) data);
+ break;
+ /* ALLOWED_IRR offsets are writable */
+ case SAVIC_ALLOWED_IRR ... SAVIC_ALLOWED_IRR + 0x70:
+ if (IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)) {
+ apic_set_reg(ap, reg, data);
+ break;
+ }
+ fallthrough;
+ default:
+ pr_err("Permission denied: write to Secure AVIC reg offset 0x%x\n", reg);
+ }
+}
+
static void savic_setup(void)
{
void *ap = this_cpu_ptr(secure_avic_page);
@@ -88,8 +197,8 @@ static struct apic apic_x2apic_savic __ro_after_init = {
.nmi_to_offline_cpu = true,
- .read = native_apic_msr_read,
- .write = native_apic_msr_write,
+ .read = savic_read,
+ .write = savic_write,
.eoi = native_apic_msr_eoi,
.icr_read = native_x2apic_icr_read,
.icr_write = native_x2apic_icr_write,
--
2.34.1
On Mon, Aug 11, 2025 at 03:14:29PM +0530, Neeraj Upadhyay wrote:
> Add read() and write() APIC callback functions to read and write x2APIC
> registers directly from the guest APIC backing page of a vCPU.
>
> The x2APIC registers are mapped at an offset within the guest APIC
> backing page which is same as their x2APIC MMIO offset. Secure AVIC
> adds new registers such as ALLOWED_IRRs (which are at 4-byte offset
> within the IRR register offset range) and NMI_REQ to the APIC register
> space.
>
> When Secure AVIC is enabled, guest's rdmsr/wrmsr of APIC registers
> result in VC exception (for non-accelerated register accesses) with
s/VC/#VC/g since you're talking about an exception vector.
> error code VMEXIT_AVIC_NOACCEL. The VC exception handler can read/write
> the x2APIC register in the guest APIC backing page to complete the
> rdmsr/wrmsr.
All x86 insns in caps pls: RDMSR/WRMSR.
> +static u32 savic_read(u32 reg)
> +{
> + void *ap = this_cpu_ptr(secure_avic_page);
> +
> + /*
> + * When Secure AVIC is enabled, rdmsr/wrmsr of APIC registers
> + * result in VC exception (for non-accelerated register accesses)
> + * with VMEXIT_AVIC_NOACCEL error code. The VC exception handler
> + * can read/write the x2APIC register in the guest APIC backing page.
> + * Since doing this would increase the latency of accessing x2APIC
> + * registers, instead of doing rdmsr/wrmsr based accesses and
> + * handling apic register reads/writes in VC exception, the read()
s/apic/APIC/g
Please be consistent across the whole set. Acronyms are in all caps. Insn
names too.
> + * and write() callbacks directly read/write APIC register from/to
> + * the vCPU APIC backing page.
> + */
Move that comment above the function. And also split it in paragraphs: when it
becomes more than 4-5 lines, split the next one with a new line.
> + switch (reg) {
> + case APIC_LVTT:
> + case APIC_TMICT:
> + case APIC_TMCCT:
> + case APIC_TDCR:
> + case APIC_ID:
> + case APIC_LVR:
> + case APIC_TASKPRI:
> + case APIC_ARBPRI:
> + case APIC_PROCPRI:
> + case APIC_LDR:
> + case APIC_SPIV:
> + case APIC_ESR:
> + case APIC_LVTTHMR:
> + case APIC_LVTPC:
> + case APIC_LVT0:
> + case APIC_LVT1:
> + case APIC_LVTERR:
> + case APIC_EFEAT:
> + case APIC_ECTRL:
> + case APIC_SEOI:
> + case APIC_IER:
> + case APIC_EILVTn(0) ... APIC_EILVTn(3):
> + return apic_get_reg(ap, reg);
> + case APIC_ICR:
> + return (u32) apic_get_reg64(ap, reg);
^
no need for that space.
> + case APIC_ISR ... APIC_ISR + 0x70:
> + case APIC_TMR ... APIC_TMR + 0x70:
> + if (WARN_ONCE(!IS_ALIGNED(reg, 16),
> + "APIC reg read offset 0x%x not aligned at 16 bytes", reg))
> + return 0;
> + return apic_get_reg(ap, reg);
> + /* IRR and ALLOWED_IRR offset range */
> + case APIC_IRR ... APIC_IRR + 0x74:
> + /*
> + * Either aligned at 16 bytes for valid IRR reg offset or a
> + * valid Secure AVIC ALLOWED_IRR offset.
> + */
> + if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
> + IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)),
> + "Misaligned IRR/ALLOWED_IRR APIC reg read offset 0x%x", reg))
What is that second thing supposed to catch?
reg can be aligned to 16 but reg - SAVIC_ALLOWED_IRR cannot be?
I can't follow the comment... perhaps write it out and not try to be clever.
> + return 0;
> + return apic_get_reg(ap, reg);
> + default:
> + pr_err("Permission denied: read of Secure AVIC reg offset 0x%x\n", reg);
Permission denied?
"Error reading unknown Secure AVIC reg offset..."
I'd say.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 8/18/2025 4:56 PM, Borislav Petkov wrote:
> On Mon, Aug 11, 2025 at 03:14:29PM +0530, Neeraj Upadhyay wrote:
>> Add read() and write() APIC callback functions to read and write x2APIC
>> registers directly from the guest APIC backing page of a vCPU.
>>
>> The x2APIC registers are mapped at an offset within the guest APIC
>> backing page which is same as their x2APIC MMIO offset. Secure AVIC
>> adds new registers such as ALLOWED_IRRs (which are at 4-byte offset
>> within the IRR register offset range) and NMI_REQ to the APIC register
>> space.
>>
>> When Secure AVIC is enabled, guest's rdmsr/wrmsr of APIC registers
>> result in VC exception (for non-accelerated register accesses) with
>
> s/VC/#VC/g since you're talking about an exception vector.
>
Ok
>> error code VMEXIT_AVIC_NOACCEL. The VC exception handler can read/write
>> the x2APIC register in the guest APIC backing page to complete the
>> rdmsr/wrmsr.
>
> All x86 insns in caps pls: RDMSR/WRMSR.
>
Ok
>> +static u32 savic_read(u32 reg)
>> +{
>> + void *ap = this_cpu_ptr(secure_avic_page);
>> +
>> + /*
>> + * When Secure AVIC is enabled, rdmsr/wrmsr of APIC registers
>> + * result in VC exception (for non-accelerated register accesses)
>> + * with VMEXIT_AVIC_NOACCEL error code. The VC exception handler
>> + * can read/write the x2APIC register in the guest APIC backing page.
>> + * Since doing this would increase the latency of accessing x2APIC
>> + * registers, instead of doing rdmsr/wrmsr based accesses and
>> + * handling apic register reads/writes in VC exception, the read()
>
> s/apic/APIC/g
>
> Please be consistent across the whole set. Acronyms are in all caps. Insn
> names too.
>
Ok
>> + * and write() callbacks directly read/write APIC register from/to
>> + * the vCPU APIC backing page.
>> + */
>
> Move that comment above the function. And also split it in paragraphs: when it
> becomes more than 4-5 lines, split the next one with a new line.
>
Ok
>> + switch (reg) {
>> + case APIC_LVTT:
>> + case APIC_TMICT:
>> + case APIC_TMCCT:
>> + case APIC_TDCR:
>> + case APIC_ID:
>> + case APIC_LVR:
>> + case APIC_TASKPRI:
>> + case APIC_ARBPRI:
>> + case APIC_PROCPRI:
>> + case APIC_LDR:
>> + case APIC_SPIV:
>> + case APIC_ESR:
>> + case APIC_LVTTHMR:
>> + case APIC_LVTPC:
>> + case APIC_LVT0:
>> + case APIC_LVT1:
>> + case APIC_LVTERR:
>> + case APIC_EFEAT:
>> + case APIC_ECTRL:
>> + case APIC_SEOI:
>> + case APIC_IER:
>> + case APIC_EILVTn(0) ... APIC_EILVTn(3):
>> + return apic_get_reg(ap, reg);
>> + case APIC_ICR:
>> + return (u32) apic_get_reg64(ap, reg);
> ^
>
> no need for that space.
>
Ok, will remove.
>> + case APIC_ISR ... APIC_ISR + 0x70:
>> + case APIC_TMR ... APIC_TMR + 0x70:
>> + if (WARN_ONCE(!IS_ALIGNED(reg, 16),
>> + "APIC reg read offset 0x%x not aligned at 16 bytes", reg))
>> + return 0;
>> + return apic_get_reg(ap, reg);
>> + /* IRR and ALLOWED_IRR offset range */
>> + case APIC_IRR ... APIC_IRR + 0x74:
>> + /*
>> + * Either aligned at 16 bytes for valid IRR reg offset or a
>> + * valid Secure AVIC ALLOWED_IRR offset.
>> + */
>> + if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
>> + IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)),
>> + "Misaligned IRR/ALLOWED_IRR APIC reg read offset 0x%x", reg))
>
> What is that second thing supposed to catch?
>
> reg can be aligned to 16 but reg - SAVIC_ALLOWED_IRR cannot be?
>
APIC_IRR register offsets are:
#Offset #bits Description
0x200 31:0 vectors 0-31
0x210 31:0 vectors 32-63
...
0x270 31:0 vectors 224-255
ALLOWED_IRR register offsets are:
#Offset #bits Description
0x204 31:0 vectors 0-31
0x214 31:0 vectors 32-63
...
0x274 31:0 vectors 224-255
IS_ALIGNED(reg, 16) is when 'reg' is an APIC_IRR register, which are 16
byte aligned.
IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16) is for the case when 'reg' is
a SAVIC_ALLOWED_IRR register. which are at 16 byte strides from the
SAVIC_ALLOWED_IRR base offset. Expected values of (reg -
SAVIC_ALLOWED_IRR) are 0, 0x10, 0x20, ..., 0x70.
If both checks fail, that is a invalid offset ('!' is on the final ORed
value).
!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16))
> I can't follow the comment... perhaps write it out and not try to be clever.
>
Maybe change it to below?
/*
* Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides
* from their respective base offset.
*/
if (WARN_ONCE(!(IS_ALIGNED(reg - APIC_IRR, 16) ||
IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)),
"Misaligned APIC_IRR/ALLOWED_IRR APIC register read
offset 0x%x",
reg))
>> + return 0;
>> + return apic_get_reg(ap, reg);
>> + default:
>> + pr_err("Permission denied: read of Secure AVIC reg offset 0x%x\n", reg);
>
> Permission denied?
>
> "Error reading unknown Secure AVIC reg offset..."
>
> I'd say.
>
Ok, yes it is clearer.
- Neeraj
On Tue, Aug 19, 2025 at 09:45:02AM +0530, Upadhyay, Neeraj wrote:
> Maybe change it to below?
>
> /*
> * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides
> * from their respective base offset.
> */
>
> if (WARN_ONCE(!(IS_ALIGNED(reg - APIC_IRR, 16) ||
> IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)),
> "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset
> 0x%x",
> reg))
Let's beef that up some more with a crystal-clear explanation what is going on
here so that readers don't have to stop and stare for 5 mins before they grok
what this is doing:
/*
* Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from
* their respective base offset. APIC_IRRs are in the range
*
* (0x200, 0x210, ..., 0x270)
*
* while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the rangea
*
* (0x204, 0x214, ..., 0x274).
*
* Filter out everything else.
*/
if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
IS_ALIGNED(reg - 4, 16)),
"Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg));
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 8/19/2025 8:02 PM, Borislav Petkov wrote: > On Tue, Aug 19, 2025 at 09:45:02AM +0530, Upadhyay, Neeraj wrote: >> Maybe change it to below? >> >> /* >> * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides >> * from their respective base offset. >> */ >> >> if (WARN_ONCE(!(IS_ALIGNED(reg - APIC_IRR, 16) || >> IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)), >> "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset >> 0x%x", >> reg)) > > Let's beef that up some more with a crystal-clear explanation what is going on > here so that readers don't have to stop and stare for 5 mins before they grok > what this is doing: > > /* > * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from > * their respective base offset. APIC_IRRs are in the range > * > * (0x200, 0x210, ..., 0x270) > * > * while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the rangea > * > * (0x204, 0x214, ..., 0x274). > * > * Filter out everything else. > */ > if (WARN_ONCE(!(IS_ALIGNED(reg, 16) || > IS_ALIGNED(reg - 4, 16)), > "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg)); > Ok, looks good. Thanks! - Neeraj
© 2016 - 2026 Red Hat, Inc.