[PATCH v9 03/18] x86/apic: Populate .read()/.write() callbacks of Secure AVIC driver

Neeraj Upadhyay posted 18 patches 1 month, 3 weeks ago
There is a newer version of this series
[PATCH v9 03/18] x86/apic: Populate .read()/.write() callbacks of Secure AVIC driver
Posted by Neeraj Upadhyay 1 month, 3 weeks ago
Add read() and write() APIC callback functions to read and write x2APIC
registers directly from the guest APIC backing page of a vCPU.

The x2APIC registers are mapped at an offset within the guest APIC
backing page which is same as their x2APIC MMIO offset. Secure AVIC
adds new registers such as ALLOWED_IRRs (which are at 4-byte offset
within the IRR register offset range) and NMI_REQ to the APIC register
space.

When Secure AVIC is enabled, guest's rdmsr/wrmsr of APIC registers
result in VC exception (for non-accelerated register accesses) with
error code VMEXIT_AVIC_NOACCEL. The VC exception handler can read/write
the x2APIC register in the guest APIC backing page to complete the
rdmsr/wrmsr. Since doing this would increase the latency of accessing
x2APIC registers, instead of doing rdmsr/wrmsr based reg accesses
and handling reads/writes in VC exception, directly read/write APIC
registers from/to the guest APIC backing page of the vCPU in read()
and write() callbacks of the Secure AVIC APIC driver.

Co-developed-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Signed-off-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Reviewed-by: Tianyu Lan <tiala@microsoft.com>
Signed-off-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
Changes since v8:
 - Added Tianyu's Reviewed-by.
 - Code cleanup to use struct secure_avic_page pointer.

 arch/x86/include/asm/apicdef.h      |   2 +
 arch/x86/kernel/apic/x2apic_savic.c | 113 +++++++++++++++++++++++++++-
 2 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 094106b6a538..be39a543fbe5 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -135,6 +135,8 @@
 #define		APIC_TDR_DIV_128	0xA
 #define	APIC_EFEAT	0x400
 #define	APIC_ECTRL	0x410
+#define APIC_SEOI	0x420
+#define APIC_IER	0x480
 #define APIC_EILVTn(n)	(0x500 + 0x10 * n)
 #define		APIC_EILVT_NR_AMD_K8	1	/* # of extended interrupts */
 #define		APIC_EILVT_NR_AMD_10H	4
diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c
index 1c70b7c111f0..86a522685230 100644
--- a/arch/x86/kernel/apic/x2apic_savic.c
+++ b/arch/x86/kernel/apic/x2apic_savic.c
@@ -9,6 +9,7 @@
 
 #include <linux/cc_platform.h>
 #include <linux/percpu-defs.h>
+#include <linux/align.h>
 
 #include <asm/apic.h>
 #include <asm/sev.h>
@@ -26,6 +27,114 @@ static int savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC);
 }
 
+#define SAVIC_ALLOWED_IRR	0x204
+
+static u32 savic_read(u32 reg)
+{
+	void *ap = this_cpu_ptr(secure_avic_page);
+
+	/*
+	 * When Secure AVIC is enabled, rdmsr/wrmsr of APIC registers
+	 * result in VC exception (for non-accelerated register accesses)
+	 * with VMEXIT_AVIC_NOACCEL error code. The VC exception handler
+	 * can read/write the x2APIC register in the guest APIC backing page.
+	 * Since doing this would increase the latency of accessing x2APIC
+	 * registers, instead of doing rdmsr/wrmsr based accesses and
+	 * handling apic register reads/writes in VC exception, the read()
+	 * and write() callbacks directly read/write APIC register from/to
+	 * the vCPU APIC backing page.
+	 */
+	switch (reg) {
+	case APIC_LVTT:
+	case APIC_TMICT:
+	case APIC_TMCCT:
+	case APIC_TDCR:
+	case APIC_ID:
+	case APIC_LVR:
+	case APIC_TASKPRI:
+	case APIC_ARBPRI:
+	case APIC_PROCPRI:
+	case APIC_LDR:
+	case APIC_SPIV:
+	case APIC_ESR:
+	case APIC_LVTTHMR:
+	case APIC_LVTPC:
+	case APIC_LVT0:
+	case APIC_LVT1:
+	case APIC_LVTERR:
+	case APIC_EFEAT:
+	case APIC_ECTRL:
+	case APIC_SEOI:
+	case APIC_IER:
+	case APIC_EILVTn(0) ... APIC_EILVTn(3):
+		return apic_get_reg(ap, reg);
+	case APIC_ICR:
+		return (u32) apic_get_reg64(ap, reg);
+	case APIC_ISR ... APIC_ISR + 0x70:
+	case APIC_TMR ... APIC_TMR + 0x70:
+		if (WARN_ONCE(!IS_ALIGNED(reg, 16),
+			      "APIC reg read offset 0x%x not aligned at 16 bytes", reg))
+			return 0;
+		return apic_get_reg(ap, reg);
+	/* IRR and ALLOWED_IRR offset range */
+	case APIC_IRR ... APIC_IRR + 0x74:
+		/*
+		 * Either aligned at 16 bytes for valid IRR reg offset or a
+		 * valid Secure AVIC ALLOWED_IRR offset.
+		 */
+		if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
+				IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)),
+			      "Misaligned IRR/ALLOWED_IRR APIC reg read offset 0x%x", reg))
+			return 0;
+		return apic_get_reg(ap, reg);
+	default:
+		pr_err("Permission denied: read of Secure AVIC reg offset 0x%x\n", reg);
+		return 0;
+	}
+}
+
+#define SAVIC_NMI_REQ		0x278
+
+static void savic_write(u32 reg, u32 data)
+{
+	void *ap = this_cpu_ptr(secure_avic_page);
+
+	switch (reg) {
+	case APIC_LVTT:
+	case APIC_LVT0:
+	case APIC_LVT1:
+	case APIC_TMICT:
+	case APIC_TDCR:
+	case APIC_SELF_IPI:
+	case APIC_TASKPRI:
+	case APIC_EOI:
+	case APIC_SPIV:
+	case SAVIC_NMI_REQ:
+	case APIC_ESR:
+	case APIC_LVTTHMR:
+	case APIC_LVTPC:
+	case APIC_LVTERR:
+	case APIC_ECTRL:
+	case APIC_SEOI:
+	case APIC_IER:
+	case APIC_EILVTn(0) ... APIC_EILVTn(3):
+		apic_set_reg(ap, reg, data);
+		break;
+	case APIC_ICR:
+		apic_set_reg64(ap, reg, (u64) data);
+		break;
+	/* ALLOWED_IRR offsets are writable */
+	case SAVIC_ALLOWED_IRR ... SAVIC_ALLOWED_IRR + 0x70:
+		if (IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)) {
+			apic_set_reg(ap, reg, data);
+			break;
+		}
+		fallthrough;
+	default:
+		pr_err("Permission denied: write to Secure AVIC reg offset 0x%x\n", reg);
+	}
+}
+
 static void savic_setup(void)
 {
 	void *ap = this_cpu_ptr(secure_avic_page);
@@ -88,8 +197,8 @@ static struct apic apic_x2apic_savic __ro_after_init = {
 
 	.nmi_to_offline_cpu		= true,
 
-	.read				= native_apic_msr_read,
-	.write				= native_apic_msr_write,
+	.read				= savic_read,
+	.write				= savic_write,
 	.eoi				= native_apic_msr_eoi,
 	.icr_read			= native_x2apic_icr_read,
 	.icr_write			= native_x2apic_icr_write,
-- 
2.34.1
Re: [PATCH v9 03/18] x86/apic: Populate .read()/.write() callbacks of Secure AVIC driver
Posted by Borislav Petkov 1 month, 2 weeks ago
On Mon, Aug 11, 2025 at 03:14:29PM +0530, Neeraj Upadhyay wrote:
> Add read() and write() APIC callback functions to read and write x2APIC
> registers directly from the guest APIC backing page of a vCPU.
> 
> The x2APIC registers are mapped at an offset within the guest APIC
> backing page which is same as their x2APIC MMIO offset. Secure AVIC
> adds new registers such as ALLOWED_IRRs (which are at 4-byte offset
> within the IRR register offset range) and NMI_REQ to the APIC register
> space.
> 
> When Secure AVIC is enabled, guest's rdmsr/wrmsr of APIC registers
> result in VC exception (for non-accelerated register accesses) with

s/VC/#VC/g since you're talking about an exception vector.

> error code VMEXIT_AVIC_NOACCEL. The VC exception handler can read/write
> the x2APIC register in the guest APIC backing page to complete the
> rdmsr/wrmsr.

All x86 insns in caps pls: RDMSR/WRMSR.

> +static u32 savic_read(u32 reg)
> +{
> +	void *ap = this_cpu_ptr(secure_avic_page);
> +
> +	/*
> +	 * When Secure AVIC is enabled, rdmsr/wrmsr of APIC registers
> +	 * result in VC exception (for non-accelerated register accesses)
> +	 * with VMEXIT_AVIC_NOACCEL error code. The VC exception handler
> +	 * can read/write the x2APIC register in the guest APIC backing page.
> +	 * Since doing this would increase the latency of accessing x2APIC
> +	 * registers, instead of doing rdmsr/wrmsr based accesses and
> +	 * handling apic register reads/writes in VC exception, the read()

s/apic/APIC/g

Please be consistent across the whole set. Acronyms are in all caps. Insn
names too.

> +	 * and write() callbacks directly read/write APIC register from/to
> +	 * the vCPU APIC backing page.
> +	 */

Move that comment above the function. And also split it in paragraphs: when it
becomes more than 4-5 lines, split the next one with a new line.

> +	switch (reg) {
> +	case APIC_LVTT:
> +	case APIC_TMICT:
> +	case APIC_TMCCT:
> +	case APIC_TDCR:
> +	case APIC_ID:
> +	case APIC_LVR:
> +	case APIC_TASKPRI:
> +	case APIC_ARBPRI:
> +	case APIC_PROCPRI:
> +	case APIC_LDR:
> +	case APIC_SPIV:
> +	case APIC_ESR:
> +	case APIC_LVTTHMR:
> +	case APIC_LVTPC:
> +	case APIC_LVT0:
> +	case APIC_LVT1:
> +	case APIC_LVTERR:
> +	case APIC_EFEAT:
> +	case APIC_ECTRL:
> +	case APIC_SEOI:
> +	case APIC_IER:
> +	case APIC_EILVTn(0) ... APIC_EILVTn(3):
> +		return apic_get_reg(ap, reg);
> +	case APIC_ICR:
> +		return (u32) apic_get_reg64(ap, reg);
			    ^

no need for that space.

> +	case APIC_ISR ... APIC_ISR + 0x70:
> +	case APIC_TMR ... APIC_TMR + 0x70:
> +		if (WARN_ONCE(!IS_ALIGNED(reg, 16),
> +			      "APIC reg read offset 0x%x not aligned at 16 bytes", reg))
> +			return 0;
> +		return apic_get_reg(ap, reg);
> +	/* IRR and ALLOWED_IRR offset range */
> +	case APIC_IRR ... APIC_IRR + 0x74:
> +		/*
> +		 * Either aligned at 16 bytes for valid IRR reg offset or a
> +		 * valid Secure AVIC ALLOWED_IRR offset.
> +		 */
> +		if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
> +				IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)),
> +			      "Misaligned IRR/ALLOWED_IRR APIC reg read offset 0x%x", reg))

What is that second thing supposed to catch?

reg can be aligned to 16 but reg - SAVIC_ALLOWED_IRR cannot be?

I can't follow the comment... perhaps write it out and not try to be clever.

> +			return 0;
> +		return apic_get_reg(ap, reg);
> +	default:
> +		pr_err("Permission denied: read of Secure AVIC reg offset 0x%x\n", reg);

Permission denied?

"Error reading unknown Secure AVIC reg offset..."

I'd say.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v9 03/18] x86/apic: Populate .read()/.write() callbacks of Secure AVIC driver
Posted by Upadhyay, Neeraj 1 month, 2 weeks ago

On 8/18/2025 4:56 PM, Borislav Petkov wrote:
> On Mon, Aug 11, 2025 at 03:14:29PM +0530, Neeraj Upadhyay wrote:
>> Add read() and write() APIC callback functions to read and write x2APIC
>> registers directly from the guest APIC backing page of a vCPU.
>>
>> The x2APIC registers are mapped at an offset within the guest APIC
>> backing page which is same as their x2APIC MMIO offset. Secure AVIC
>> adds new registers such as ALLOWED_IRRs (which are at 4-byte offset
>> within the IRR register offset range) and NMI_REQ to the APIC register
>> space.
>>
>> When Secure AVIC is enabled, guest's rdmsr/wrmsr of APIC registers
>> result in VC exception (for non-accelerated register accesses) with
> 
> s/VC/#VC/g since you're talking about an exception vector.
> 

Ok

>> error code VMEXIT_AVIC_NOACCEL. The VC exception handler can read/write
>> the x2APIC register in the guest APIC backing page to complete the
>> rdmsr/wrmsr.
> 
> All x86 insns in caps pls: RDMSR/WRMSR.
> 

Ok

>> +static u32 savic_read(u32 reg)
>> +{
>> +	void *ap = this_cpu_ptr(secure_avic_page);
>> +
>> +	/*
>> +	 * When Secure AVIC is enabled, rdmsr/wrmsr of APIC registers
>> +	 * result in VC exception (for non-accelerated register accesses)
>> +	 * with VMEXIT_AVIC_NOACCEL error code. The VC exception handler
>> +	 * can read/write the x2APIC register in the guest APIC backing page.
>> +	 * Since doing this would increase the latency of accessing x2APIC
>> +	 * registers, instead of doing rdmsr/wrmsr based accesses and
>> +	 * handling apic register reads/writes in VC exception, the read()
> 
> s/apic/APIC/g
> 
> Please be consistent across the whole set. Acronyms are in all caps. Insn
> names too.
> 

Ok

>> +	 * and write() callbacks directly read/write APIC register from/to
>> +	 * the vCPU APIC backing page.
>> +	 */
> 
> Move that comment above the function. And also split it in paragraphs: when it
> becomes more than 4-5 lines, split the next one with a new line.
> 

Ok

>> +	switch (reg) {
>> +	case APIC_LVTT:
>> +	case APIC_TMICT:
>> +	case APIC_TMCCT:
>> +	case APIC_TDCR:
>> +	case APIC_ID:
>> +	case APIC_LVR:
>> +	case APIC_TASKPRI:
>> +	case APIC_ARBPRI:
>> +	case APIC_PROCPRI:
>> +	case APIC_LDR:
>> +	case APIC_SPIV:
>> +	case APIC_ESR:
>> +	case APIC_LVTTHMR:
>> +	case APIC_LVTPC:
>> +	case APIC_LVT0:
>> +	case APIC_LVT1:
>> +	case APIC_LVTERR:
>> +	case APIC_EFEAT:
>> +	case APIC_ECTRL:
>> +	case APIC_SEOI:
>> +	case APIC_IER:
>> +	case APIC_EILVTn(0) ... APIC_EILVTn(3):
>> +		return apic_get_reg(ap, reg);
>> +	case APIC_ICR:
>> +		return (u32) apic_get_reg64(ap, reg);
> 			    ^
> 
> no need for that space.
> 

Ok, will remove.

>> +	case APIC_ISR ... APIC_ISR + 0x70:
>> +	case APIC_TMR ... APIC_TMR + 0x70:
>> +		if (WARN_ONCE(!IS_ALIGNED(reg, 16),
>> +			      "APIC reg read offset 0x%x not aligned at 16 bytes", reg))
>> +			return 0;
>> +		return apic_get_reg(ap, reg);
>> +	/* IRR and ALLOWED_IRR offset range */
>> +	case APIC_IRR ... APIC_IRR + 0x74:
>> +		/*
>> +		 * Either aligned at 16 bytes for valid IRR reg offset or a
>> +		 * valid Secure AVIC ALLOWED_IRR offset.
>> +		 */
>> +		if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
>> +				IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)),
>> +			      "Misaligned IRR/ALLOWED_IRR APIC reg read offset 0x%x", reg))
> 
> What is that second thing supposed to catch?
> 
> reg can be aligned to 16 but reg - SAVIC_ALLOWED_IRR cannot be?
> 

APIC_IRR register offsets are:

#Offset    #bits         Description

0x200      31:0         vectors 0-31
0x210      31:0         vectors 32-63
...
0x270      31:0         vectors 224-255

ALLOWED_IRR register offsets are:

#Offset    #bits         Description

0x204      31:0         vectors 0-31
0x214      31:0         vectors 32-63
...
0x274      31:0         vectors 224-255

IS_ALIGNED(reg, 16) is when 'reg' is an APIC_IRR register, which are 16 
byte aligned.

IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16) is for the case when 'reg' is
a SAVIC_ALLOWED_IRR register. which are at 16 byte strides from the 
SAVIC_ALLOWED_IRR base offset. Expected values of (reg - 
SAVIC_ALLOWED_IRR) are 0, 0x10, 0x20, ..., 0x70.

If both checks fail, that is a invalid offset ('!' is on the final ORed
value).

!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16))




> I can't follow the comment... perhaps write it out and not try to be clever.
> 

Maybe change it to below?

/*
  * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides
  * from their respective base offset.
  */

if (WARN_ONCE(!(IS_ALIGNED(reg - APIC_IRR, 16) ||
                 IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)),
               "Misaligned APIC_IRR/ALLOWED_IRR APIC register read 
offset 0x%x",
               reg))

>> +			return 0;
>> +		return apic_get_reg(ap, reg);
>> +	default:
>> +		pr_err("Permission denied: read of Secure AVIC reg offset 0x%x\n", reg);
> 
> Permission denied?
> 
> "Error reading unknown Secure AVIC reg offset..."
> 
> I'd say.
> 

Ok, yes it is clearer.

- Neeraj
Re: [PATCH v9 03/18] x86/apic: Populate .read()/.write() callbacks of Secure AVIC driver
Posted by Borislav Petkov 1 month, 2 weeks ago
On Tue, Aug 19, 2025 at 09:45:02AM +0530, Upadhyay, Neeraj wrote:
> Maybe change it to below?
> 
> /*
>  * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides
>  * from their respective base offset.
>  */
> 
> if (WARN_ONCE(!(IS_ALIGNED(reg - APIC_IRR, 16) ||
>                 IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)),
>               "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset
> 0x%x",
>               reg))

Let's beef that up some more with a crystal-clear explanation what is going on
here so that readers don't have to stop and stare for 5 mins before they grok
what this is doing:

	/*
	 * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from
	 * their respective base offset. APIC_IRRs are in the range
	 *
	 * (0x200, 0x210,  ..., 0x270)
	 *
	 * while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the rangea
	 * 
	 * (0x204, 0x214, ..., 0x274).
	 *
	 * Filter out everything else.
	 */
	 if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
		 	 IS_ALIGNED(reg - 4, 16)),
		      "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg));

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v9 03/18] x86/apic: Populate .read()/.write() callbacks of Secure AVIC driver
Posted by Upadhyay, Neeraj 1 month, 2 weeks ago

On 8/19/2025 8:02 PM, Borislav Petkov wrote:
> On Tue, Aug 19, 2025 at 09:45:02AM +0530, Upadhyay, Neeraj wrote:
>> Maybe change it to below?
>>
>> /*
>>   * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides
>>   * from their respective base offset.
>>   */
>>
>> if (WARN_ONCE(!(IS_ALIGNED(reg - APIC_IRR, 16) ||
>>                  IS_ALIGNED(reg - SAVIC_ALLOWED_IRR, 16)),
>>                "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset
>> 0x%x",
>>                reg))
> 
> Let's beef that up some more with a crystal-clear explanation what is going on
> here so that readers don't have to stop and stare for 5 mins before they grok
> what this is doing:
> 
> 	/*
> 	 * Valid APIC_IRR/SAVIC_ALLOWED_IRR registers are at 16 bytes strides from
> 	 * their respective base offset. APIC_IRRs are in the range
> 	 *
> 	 * (0x200, 0x210,  ..., 0x270)
> 	 *
> 	 * while the SAVIC_ALLOWED_IRR range starts 4 bytes later, in the rangea
> 	 *
> 	 * (0x204, 0x214, ..., 0x274).
> 	 *
> 	 * Filter out everything else.
> 	 */
> 	 if (WARN_ONCE(!(IS_ALIGNED(reg, 16) ||
> 		 	 IS_ALIGNED(reg - 4, 16)),
> 		      "Misaligned APIC_IRR/ALLOWED_IRR APIC register read offset 0x%x", reg));
> 

Ok, looks good. Thanks!


- Neeraj