With Secure AVIC only Self-IPI is accelerated. To handle all the
other IPIs, add new callbacks for sending IPI. These callbacks write
to the IRR of the target guest vCPU's APIC backing page and issue
GHCB protocol MSR write event for the hypervisor to notify the
target vCPU about the new interrupt request.
For Secure AVIC GHCB APIC MSR writes, reuse GHCB msr handling code in
vc_handle_msr() by exposing a sev-internal sev_es_ghcb_handle_msr().
Co-developed-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Signed-off-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Reviewed-by: Tianyu Lan <tiala@microsoft.com>
Signed-off-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
Changes since v8:
- Added Tianyu's Reviewed-by.
- Use struct secure_avic_page.
arch/x86/coco/sev/core.c | 28 ++++++
arch/x86/coco/sev/vc-handle.c | 11 ++-
arch/x86/include/asm/sev-internal.h | 2 +
arch/x86/include/asm/sev.h | 2 +
arch/x86/kernel/apic/x2apic_savic.c | 138 +++++++++++++++++++++++++++-
5 files changed, 173 insertions(+), 8 deletions(-)
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index 0c59ea82fa99..221a0fc0c387 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -1085,6 +1085,34 @@ int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd)
return 0;
}
+void savic_ghcb_msr_write(u32 reg, u64 value)
+{
+ u64 msr = APIC_BASE_MSR + (reg >> 4);
+ struct pt_regs regs = {
+ .cx = msr,
+ .ax = lower_32_bits(value),
+ .dx = upper_32_bits(value)
+ };
+ struct es_em_ctxt ctxt = { .regs = ®s };
+ struct ghcb_state state;
+ enum es_result res;
+ struct ghcb *ghcb;
+
+ guard(irqsave)();
+
+ ghcb = __sev_get_ghcb(&state);
+ vc_ghcb_invalidate(ghcb);
+
+ res = sev_es_ghcb_handle_msr(ghcb, &ctxt, true);
+ if (res != ES_OK) {
+ pr_err("Secure AVIC msr (0x%llx) write returned error (%d)\n", msr, res);
+ /* MSR writes should never fail. Any failure is fatal error for SNP guest */
+ snp_abort();
+ }
+
+ __sev_put_ghcb(&state);
+}
+
enum es_result savic_register_gpa(u64 gpa)
{
struct ghcb_state state;
diff --git a/arch/x86/coco/sev/vc-handle.c b/arch/x86/coco/sev/vc-handle.c
index faf1fce89ed4..fc770cc9117d 100644
--- a/arch/x86/coco/sev/vc-handle.c
+++ b/arch/x86/coco/sev/vc-handle.c
@@ -401,14 +401,10 @@ static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool wri
return ES_OK;
}
-static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write)
{
struct pt_regs *regs = ctxt->regs;
enum es_result ret;
- bool write;
-
- /* Is it a WRMSR? */
- write = ctxt->insn.opcode.bytes[1] == 0x30;
switch (regs->cx) {
case MSR_SVSM_CAA:
@@ -438,6 +434,11 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ret;
}
+static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ return sev_es_ghcb_handle_msr(ghcb, ctxt, ctxt->insn.opcode.bytes[1] == 0x30);
+}
+
static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
{
int trapnr = ctxt->fi.vector;
diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h
index 3dfd306d1c9e..6876655183a6 100644
--- a/arch/x86/include/asm/sev-internal.h
+++ b/arch/x86/include/asm/sev-internal.h
@@ -97,6 +97,8 @@ static __always_inline void sev_es_wr_ghcb_msr(u64 val)
native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
}
+enum es_result sev_es_ghcb_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write);
+
void snp_register_ghcb_early(unsigned long paddr);
bool sev_es_negotiate_protocol(void);
bool sev_es_check_cpu_features(void);
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 8e5083b46607..e849e616dd24 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -534,6 +534,7 @@ int snp_svsm_vtpm_send_command(u8 *buffer);
void __init snp_secure_tsc_prepare(void);
void __init snp_secure_tsc_init(void);
enum es_result savic_register_gpa(u64 gpa);
+void savic_ghcb_msr_write(u32 reg, u64 value);
static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
{
@@ -607,6 +608,7 @@ static inline int snp_svsm_vtpm_send_command(u8 *buffer) { return -ENODEV; }
static inline void __init snp_secure_tsc_prepare(void) { }
static inline void __init snp_secure_tsc_init(void) { }
static inline enum es_result savic_register_gpa(u64 gpa) { return ES_UNSUPPORTED; }
+static inline void savic_ghcb_msr_write(u32 reg, u64 value) { }
#endif /* CONFIG_AMD_MEM_ENCRYPT */
diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c
index cfe72473f843..dbd488191a16 100644
--- a/arch/x86/kernel/apic/x2apic_savic.c
+++ b/arch/x86/kernel/apic/x2apic_savic.c
@@ -8,6 +8,7 @@
*/
#include <linux/cc_platform.h>
+#include <linux/cpumask.h>
#include <linux/percpu-defs.h>
#include <linux/align.h>
@@ -111,6 +112,73 @@ static u32 savic_read(u32 reg)
#define SAVIC_NMI_REQ 0x278
+static inline void self_ipi_reg_write(unsigned int vector)
+{
+ /*
+ * Secure AVIC hardware accelerates guest's MSR write to SELF_IPI
+ * register. It updates the IRR in the APIC backing page, evaluates
+ * the new IRR for interrupt injection and continues with guest
+ * code execution.
+ */
+ native_apic_msr_write(APIC_SELF_IPI, vector);
+}
+
+static void send_ipi_dest(unsigned int cpu, unsigned int vector)
+{
+ update_vector(cpu, APIC_IRR, vector, true);
+}
+
+static void send_ipi_allbut(unsigned int vector)
+{
+ unsigned int cpu, src_cpu;
+
+ guard(irqsave)();
+
+ src_cpu = raw_smp_processor_id();
+
+ for_each_cpu(cpu, cpu_online_mask) {
+ if (cpu == src_cpu)
+ continue;
+ send_ipi_dest(cpu, vector);
+ }
+}
+
+static inline void self_ipi(unsigned int vector)
+{
+ u32 icr_low = APIC_SELF_IPI | vector;
+
+ native_x2apic_icr_write(icr_low, 0);
+}
+
+static void savic_icr_write(u32 icr_low, u32 icr_high)
+{
+ unsigned int dsh, vector;
+ u64 icr_data;
+
+ dsh = icr_low & APIC_DEST_ALLBUT;
+ vector = icr_low & APIC_VECTOR_MASK;
+
+ switch (dsh) {
+ case APIC_DEST_SELF:
+ self_ipi(vector);
+ break;
+ case APIC_DEST_ALLINC:
+ self_ipi(vector);
+ fallthrough;
+ case APIC_DEST_ALLBUT:
+ send_ipi_allbut(vector);
+ break;
+ default:
+ send_ipi_dest(icr_high, vector);
+ break;
+ }
+
+ icr_data = ((u64)icr_high) << 32 | icr_low;
+ if (dsh != APIC_DEST_SELF)
+ savic_ghcb_msr_write(APIC_ICR, icr_data);
+ apic_set_reg64(this_cpu_ptr(secure_avic_page), APIC_ICR, icr_data);
+}
+
static void savic_write(u32 reg, u32 data)
{
void *ap = this_cpu_ptr(secure_avic_page);
@@ -121,7 +189,6 @@ static void savic_write(u32 reg, u32 data)
case APIC_LVT1:
case APIC_TMICT:
case APIC_TDCR:
- case APIC_SELF_IPI:
case APIC_TASKPRI:
case APIC_EOI:
case APIC_SPIV:
@@ -137,7 +204,10 @@ static void savic_write(u32 reg, u32 data)
apic_set_reg(ap, reg, data);
break;
case APIC_ICR:
- apic_set_reg64(ap, reg, (u64) data);
+ savic_icr_write(data, 0);
+ break;
+ case APIC_SELF_IPI:
+ self_ipi_reg_write(data);
break;
/* ALLOWED_IRR offsets are writable */
case SAVIC_ALLOWED_IRR ... SAVIC_ALLOWED_IRR + 0x70:
@@ -151,6 +221,61 @@ static void savic_write(u32 reg, u32 data)
}
}
+static void send_ipi(u32 dest, unsigned int vector, unsigned int dsh)
+{
+ unsigned int icr_low;
+
+ icr_low = __prepare_ICR(dsh, vector, APIC_DEST_PHYSICAL);
+ savic_icr_write(icr_low, dest);
+}
+
+static void savic_send_ipi(int cpu, int vector)
+{
+ u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
+
+ send_ipi(dest, vector, 0);
+}
+
+static void send_ipi_mask(const struct cpumask *mask, unsigned int vector, bool excl_self)
+{
+ unsigned int cpu, this_cpu;
+
+ guard(irqsave)();
+
+ this_cpu = raw_smp_processor_id();
+
+ for_each_cpu(cpu, mask) {
+ if (excl_self && cpu == this_cpu)
+ continue;
+ send_ipi(per_cpu(x86_cpu_to_apicid, cpu), vector, 0);
+ }
+}
+
+static void savic_send_ipi_mask(const struct cpumask *mask, int vector)
+{
+ send_ipi_mask(mask, vector, false);
+}
+
+static void savic_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ send_ipi_mask(mask, vector, true);
+}
+
+static void savic_send_ipi_allbutself(int vector)
+{
+ send_ipi(0, vector, APIC_DEST_ALLBUT);
+}
+
+static void savic_send_ipi_all(int vector)
+{
+ send_ipi(0, vector, APIC_DEST_ALLINC);
+}
+
+static void savic_send_ipi_self(int vector)
+{
+ self_ipi_reg_write(vector);
+}
+
static void savic_update_vector(unsigned int cpu, unsigned int vector, bool set)
{
update_vector(cpu, SAVIC_ALLOWED_IRR, vector, set);
@@ -222,13 +347,20 @@ static struct apic apic_x2apic_savic __ro_after_init = {
.calc_dest_apicid = apic_default_calc_apicid,
+ .send_IPI = savic_send_ipi,
+ .send_IPI_mask = savic_send_ipi_mask,
+ .send_IPI_mask_allbutself = savic_send_ipi_mask_allbutself,
+ .send_IPI_allbutself = savic_send_ipi_allbutself,
+ .send_IPI_all = savic_send_ipi_all,
+ .send_IPI_self = savic_send_ipi_self,
+
.nmi_to_offline_cpu = true,
.read = savic_read,
.write = savic_write,
.eoi = native_apic_msr_eoi,
.icr_read = native_x2apic_icr_read,
- .icr_write = native_x2apic_icr_write,
+ .icr_write = savic_icr_write,
.update_vector = savic_update_vector,
};
--
2.34.1
On Mon, Aug 11, 2025 at 03:14:33PM +0530, Neeraj Upadhyay wrote: > With Secure AVIC only Self-IPI is accelerated. To handle all the > other IPIs, add new callbacks for sending IPI. These callbacks write > to the IRR of the target guest vCPU's APIC backing page and issue > GHCB protocol MSR write event for the hypervisor to notify the > target vCPU about the new interrupt request. > > For Secure AVIC GHCB APIC MSR writes, reuse GHCB msr handling code in ^^^^^^^^^^^^^^^^^^ say what now?! > +void savic_ghcb_msr_write(u32 reg, u64 value) I guess this belongs into x2apic_savic.c. > +{ > + u64 msr = APIC_BASE_MSR + (reg >> 4); > + struct pt_regs regs = { > + .cx = msr, > + .ax = lower_32_bits(value), > + .dx = upper_32_bits(value) > + }; > + struct es_em_ctxt ctxt = { .regs = ®s }; > + struct ghcb_state state; > + enum es_result res; > + struct ghcb *ghcb; > + > + guard(irqsave)(); > + > + ghcb = __sev_get_ghcb(&state); > + vc_ghcb_invalidate(ghcb); > + > + res = sev_es_ghcb_handle_msr(ghcb, &ctxt, true); > + if (res != ES_OK) { > + pr_err("Secure AVIC msr (0x%llx) write returned error (%d)\n", msr, res); > + /* MSR writes should never fail. Any failure is fatal error for SNP guest */ > + snp_abort(); > + } > + > + __sev_put_ghcb(&state); > +} ... > +static inline void self_ipi_reg_write(unsigned int vector) > +{ > + /* > + * Secure AVIC hardware accelerates guest's MSR write to SELF_IPI > + * register. It updates the IRR in the APIC backing page, evaluates > + * the new IRR for interrupt injection and continues with guest > + * code execution. > + */ Why is that comment here? It is above a WRMSR write. What acceleration is it talking about? > + native_apic_msr_write(APIC_SELF_IPI, vector); > +} -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 8/20/2025 9:16 PM, Borislav Petkov wrote: > On Mon, Aug 11, 2025 at 03:14:33PM +0530, Neeraj Upadhyay wrote: >> With Secure AVIC only Self-IPI is accelerated. To handle all the >> other IPIs, add new callbacks for sending IPI. These callbacks write >> to the IRR of the target guest vCPU's APIC backing page and issue >> GHCB protocol MSR write event for the hypervisor to notify the >> target vCPU about the new interrupt request. >> >> For Secure AVIC GHCB APIC MSR writes, reuse GHCB msr handling code in > ^^^^^^^^^^^^^^^^^^ > > say what now?! > Is below better? x86/apic: Add support to send IPI for Secure AVIC Secure AVIC hardware only accelerates Self-IPI, i.e. on WRMSR to APIC_SELF_IPI and APIC_ICR (with destination shorthand equal to Self) registers, hardware takes care of updating the APIC_IRR in the APIC backing page of the vCPU. For other IPI types (cross-vCPU, broadcast IPIs), software needs to take care of updating the APIC_IRR state of the target CPUs and to ensure that the target vCPUs notice the new pending interrupt. Add new callbacks in the Secure AVIC driver for sending IPI requests. These callbacks update the IRR in the target guest vCPU's APIC backing page. To ensure that the remote vCPU notices the new pending interrupt, reuse the GHCB MSR handling code in vc_handle_msr() to issue APIC_ICR MSR-write GHCB protocol event to the hypervisor. For Secure AVIC guests, on APIC_ICR write MSR exits, the hypervisor notifies the target vCPU by either sending an AVIC doorbell (if target vCPU is running) or by waking up the non-running target vCPU. >> +void savic_ghcb_msr_write(u32 reg, u64 value) > > I guess this belongs into x2apic_savic.c. > Ok moving it to x2apic_savic.c requires below 4 sev-internal declarations to be moved to arch/x86/include/asm/sev.h struct ghcb_state; struct ghcb *__sev_get_ghcb(struct ghcb_state *state); void __sev_put_ghcb(struct ghcb_state *state); enum es_result sev_es_ghcb_handle_msr(...); >> +{ >> + u64 msr = APIC_BASE_MSR + (reg >> 4); >> + struct pt_regs regs = { >> + .cx = msr, >> + .ax = lower_32_bits(value), >> + .dx = upper_32_bits(value) >> + }; >> + struct es_em_ctxt ctxt = { .regs = ®s }; >> + struct ghcb_state state; >> + enum es_result res; >> + struct ghcb *ghcb; >> + >> + guard(irqsave)(); >> + >> + ghcb = __sev_get_ghcb(&state); >> + vc_ghcb_invalidate(ghcb); >> + >> + res = sev_es_ghcb_handle_msr(ghcb, &ctxt, true); >> + if (res != ES_OK) { >> + pr_err("Secure AVIC msr (0x%llx) write returned error (%d)\n", msr, res); >> + /* MSR writes should never fail. Any failure is fatal error for SNP guest */ >> + snp_abort(); >> + } >> + >> + __sev_put_ghcb(&state); >> +} > > ... > >> +static inline void self_ipi_reg_write(unsigned int vector) >> +{ >> + /* >> + * Secure AVIC hardware accelerates guest's MSR write to SELF_IPI >> + * register. It updates the IRR in the APIC backing page, evaluates >> + * the new IRR for interrupt injection and continues with guest >> + * code execution. >> + */ > > Why is that comment here? It is above a WRMSR write. What acceleration is it > talking about? > This comment explains why WRMSR is sufficient for sending SELF_IPI. On WRMSR by vCPU, Secure AVIC hardware takes care of updating APIC_IRR in backing page. Hardware also ensures that new APIC_IRR state is evaluated for new pending interrupts. So, WRMSR is hardware-accelerated. For non-self-IPI case, software need to do APIC_IRR update and sending of wakeup-request/doorbell to the target vCPU. - Neeraj >> + native_apic_msr_write(APIC_SELF_IPI, vector); >> +} >
On Thu, Aug 21, 2025 at 10:57:24AM +0530, Upadhyay, Neeraj wrote: > Is below better? I was only reacting to that head-spinning, conglomerate of abbreviations "AVIC GHCB APIC MSR". > x86/apic: Add support to send IPI for Secure AVIC > > Secure AVIC hardware only accelerates Self-IPI, i.e. on WRMSR to > APIC_SELF_IPI and APIC_ICR (with destination shorthand equal to Self) > registers, hardware takes care of updating the APIC_IRR in the APIC > backing page of the vCPU. For other IPI types (cross-vCPU, broadcast IPIs), > software needs to take care of updating the APIC_IRR state of the target > CPUs and to ensure that the target vCPUs notice the new pending interrupt. > > Add new callbacks in the Secure AVIC driver for sending IPI requests. These > callbacks update the IRR in the target guest vCPU's APIC backing page. To > ensure that the remote vCPU notices the new pending interrupt, reuse the > GHCB MSR handling code in vc_handle_msr() to issue APIC_ICR MSR-write GHCB > protocol event to the hypervisor. For Secure AVIC guests, on APIC_ICR write > MSR exits, the hypervisor notifies the target vCPU by either sending an AVIC > doorbell (if target vCPU is running) or by waking up the non-running target > vCPU. But I'll take a definitely better commit message too! :-) > Ok moving it to x2apic_savic.c requires below 4 sev-internal declarations to > be moved to arch/x86/include/asm/sev.h > > struct ghcb_state; > struct ghcb *__sev_get_ghcb(struct ghcb_state *state); > void __sev_put_ghcb(struct ghcb_state *state); > enum es_result sev_es_ghcb_handle_msr(...); Well, do you anticipate needing any more sev* facilities for SAVIC? If so, you probably should carve them out into arch/x86/coco/sev/savic.c If only 4 functions, I guess they're probably still ok in .../sev/core.c > This comment explains why WRMSR is sufficient for sending SELF_IPI. On > WRMSR by vCPU, Secure AVIC hardware takes care of updating APIC_IRR in > backing page. Hardware also ensures that new APIC_IRR state is evaluated > for new pending interrupts. So, WRMSR is hardware-accelerated. > > For non-self-IPI case, software need to do APIC_IRR update and sending of > wakeup-request/doorbell to the target vCPU. Yeah, you need to rewrite it like the commit message above - it needs to say that upon the MSR write, hw does this and that and therefore accelerates this type of IPI. Then it is clear what you mean by "acceleration." Thx. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 8/22/2025 10:44 PM, Borislav Petkov wrote: > On Thu, Aug 21, 2025 at 10:57:24AM +0530, Upadhyay, Neeraj wrote: >> Is below better? > > I was only reacting to that head-spinning, conglomerate of abbreviations "AVIC > GHCB APIC MSR". > Ah ok. I thought you were not happy with the commit message wording/structure. >> x86/apic: Add support to send IPI for Secure AVIC >> >> Secure AVIC hardware only accelerates Self-IPI, i.e. on WRMSR to >> APIC_SELF_IPI and APIC_ICR (with destination shorthand equal to Self) >> registers, hardware takes care of updating the APIC_IRR in the APIC >> backing page of the vCPU. For other IPI types (cross-vCPU, broadcast IPIs), >> software needs to take care of updating the APIC_IRR state of the target >> CPUs and to ensure that the target vCPUs notice the new pending interrupt. >> >> Add new callbacks in the Secure AVIC driver for sending IPI requests. These >> callbacks update the IRR in the target guest vCPU's APIC backing page. To >> ensure that the remote vCPU notices the new pending interrupt, reuse the >> GHCB MSR handling code in vc_handle_msr() to issue APIC_ICR MSR-write GHCB >> protocol event to the hypervisor. For Secure AVIC guests, on APIC_ICR write >> MSR exits, the hypervisor notifies the target vCPU by either sending an AVIC >> doorbell (if target vCPU is running) or by waking up the non-running target >> vCPU. > > But I'll take a definitely better commit message too! :-) > Cool! >> Ok moving it to x2apic_savic.c requires below 4 sev-internal declarations to >> be moved to arch/x86/include/asm/sev.h >> >> struct ghcb_state; >> struct ghcb *__sev_get_ghcb(struct ghcb_state *state); >> void __sev_put_ghcb(struct ghcb_state *state); >> enum es_result sev_es_ghcb_handle_msr(...); > > Well, do you anticipate needing any more sev* facilities for SAVIC? > At this point I do not anticipate adding new functions for new SAVIC features. > If so, you probably should carve them out into arch/x86/coco/sev/savic.c > > If only 4 functions, I guess they're probably still ok in .../sev/core.c > Ok. I will keep them in sev/core.c for now and move to sev/savic.c if anything new comes up in future. >> This comment explains why WRMSR is sufficient for sending SELF_IPI. On >> WRMSR by vCPU, Secure AVIC hardware takes care of updating APIC_IRR in >> backing page. Hardware also ensures that new APIC_IRR state is evaluated >> for new pending interrupts. So, WRMSR is hardware-accelerated. >> >> For non-self-IPI case, software need to do APIC_IRR update and sending of >> wakeup-request/doorbell to the target vCPU. > > Yeah, you need to rewrite it like the commit message above - it needs to say > that upon the MSR write, hw does this and that and therefore accelerates this > type of IPI. > > Then it is clear what you mean by "acceleration." > Got it. Will update. Thanks! - Neeraj > Thx. >
© 2016 - 2025 Red Hat, Inc.