From: Kishon Vijay Abraham I <kvijayab@amd.com>
Secure AVIC lets guest manage the APIC backing page (unlike emulated
x2APIC or x2AVIC where the hypervisor manages the APIC backing page).
However the introduced Secure AVIC Linux design still maintains the
APIC backing page in the hypervisor to shadow the APIC backing page
maintained by guest (It should be noted only subset of the registers
are shadowed for specific usecases and registers like APIC_IRR,
APIC_ISR are not shadowed).
Add sev_ghcb_msr_read() to invoke "SVM_EXIT_MSR" VMGEXIT to read
MSRs from hypervisor. Initialize the Secure AVIC's APIC backing
page by copying the initial state of shadow APIC backing page in
the hypervisor to the guest APIC backing page. Specifically copy
APIC_LVR, APIC_LDR, and APIC_LVT MSRs from the shadow APIC backing
page.
Signed-off-by: Kishon Vijay Abraham I <kvijayab@amd.com>
Co-developed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
Signed-off-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
arch/x86/coco/sev/core.c | 41 ++++++++++++++++-----
arch/x86/include/asm/sev.h | 2 ++
arch/x86/kernel/apic/x2apic_savic.c | 55 +++++++++++++++++++++++++++++
3 files changed, 90 insertions(+), 8 deletions(-)
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index 93470538af5e..0e140f92cfef 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -1331,18 +1331,15 @@ int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
return 0;
}
-static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+static enum es_result __vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write)
{
struct pt_regs *regs = ctxt->regs;
+ u64 exit_info_1 = write ? 1 : 0;
enum es_result ret;
- u64 exit_info_1;
-
- /* Is it a WRMSR? */
- exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0;
if (regs->cx == MSR_SVSM_CAA) {
/* Writes to the SVSM CAA msr are ignored */
- if (exit_info_1)
+ if (write)
return ES_OK;
regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa));
@@ -1352,14 +1349,14 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
}
ghcb_set_rcx(ghcb, regs->cx);
- if (exit_info_1) {
+ if (write) {
ghcb_set_rax(ghcb, regs->ax);
ghcb_set_rdx(ghcb, regs->dx);
}
ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);
- if ((ret == ES_OK) && (!exit_info_1)) {
+ if (ret == ES_OK && !write) {
regs->ax = ghcb->save.rax;
regs->dx = ghcb->save.rdx;
}
@@ -1367,6 +1364,34 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ret;
}
+static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+ return __vc_handle_msr(ghcb, ctxt, ctxt->insn.opcode.bytes[1] == 0x30);
+}
+
+enum es_result sev_ghcb_msr_read(u64 msr, u64 *value)
+{
+ struct pt_regs regs = { .cx = msr };
+ struct es_em_ctxt ctxt = { .regs = ®s };
+ struct ghcb_state state;
+ unsigned long flags;
+ enum es_result ret;
+ struct ghcb *ghcb;
+
+ local_irq_save(flags);
+ ghcb = __sev_get_ghcb(&state);
+ vc_ghcb_invalidate(ghcb);
+
+ ret = __vc_handle_msr(ghcb, &ctxt, false);
+ if (ret == ES_OK)
+ *value = regs.ax | regs.dx << 32;
+
+ __sev_put_ghcb(&state);
+ local_irq_restore(flags);
+
+ return ret;
+}
+
enum es_result sev_notify_savic_gpa(u64 gpa)
{
struct ghcb_state state;
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index e84fc7fcc32a..5e6385bfb85a 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -400,6 +400,7 @@ u64 sev_get_status(void);
void sev_show_status(void);
void snp_update_svsm_ca(void);
enum es_result sev_notify_savic_gpa(u64 gpa);
+enum es_result sev_ghcb_msr_read(u64 msr, u64 *value);
#else /* !CONFIG_AMD_MEM_ENCRYPT */
@@ -437,6 +438,7 @@ static inline u64 sev_get_status(void) { return 0; }
static inline void sev_show_status(void) { }
static inline void snp_update_svsm_ca(void) { }
static inline enum es_result sev_notify_savic_gpa(u64 gpa) { return ES_UNSUPPORTED; }
+static inline enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) { return ES_UNSUPPORTED; }
#endif /* CONFIG_AMD_MEM_ENCRYPT */
diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c
index 6a471bbc3dba..99151be4e173 100644
--- a/arch/x86/kernel/apic/x2apic_savic.c
+++ b/arch/x86/kernel/apic/x2apic_savic.c
@@ -11,6 +11,7 @@
#include <linux/cc_platform.h>
#include <linux/percpu-defs.h>
#include <linux/align.h>
+#include <linux/sizes.h>
#include <asm/apic.h>
#include <asm/sev.h>
@@ -20,6 +21,19 @@
static DEFINE_PER_CPU(void *, apic_backing_page);
static DEFINE_PER_CPU(bool, savic_setup_done);
+enum lapic_lvt_entry {
+ LVT_TIMER,
+ LVT_THERMAL_MONITOR,
+ LVT_PERFORMANCE_COUNTER,
+ LVT_LINT0,
+ LVT_LINT1,
+ LVT_ERROR,
+
+ APIC_MAX_NR_LVT_ENTRIES,
+};
+
+#define APIC_LVTx(x) (APIC_LVTT + 0x10 * (x))
+
static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC);
@@ -35,6 +49,22 @@ static inline void set_reg(char *page, int reg_off, u32 val)
WRITE_ONCE(*((u32 *)(page + reg_off)), val);
}
+static u32 read_msr_from_hv(u32 reg)
+{
+ u64 data, msr;
+ int ret;
+
+ msr = APIC_BASE_MSR + (reg >> 4);
+ ret = sev_ghcb_msr_read(msr, &data);
+ if (ret != ES_OK) {
+ pr_err("Secure AVIC msr (%#llx) read returned error (%d)\n", msr, ret);
+ /* MSR read failures are treated as fatal errors */
+ snp_abort();
+ }
+
+ return lower_32_bits(data);
+}
+
#define SAVIC_ALLOWED_IRR_OFFSET 0x204
static u32 x2apic_savic_read(u32 reg)
@@ -168,6 +198,30 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in
__send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
}
+static void init_backing_page(void *backing_page)
+{
+ u32 val;
+ int i;
+
+ val = read_msr_from_hv(APIC_LVR);
+ set_reg(backing_page, APIC_LVR, val);
+
+ /*
+ * Hypervisor is used for all timer related functions,
+ * so don't copy those values.
+ */
+ for (i = LVT_THERMAL_MONITOR; i < APIC_MAX_NR_LVT_ENTRIES; i++) {
+ val = read_msr_from_hv(APIC_LVTx(i));
+ set_reg(backing_page, APIC_LVTx(i), val);
+ }
+
+ val = read_msr_from_hv(APIC_LVT0);
+ set_reg(backing_page, APIC_LVT0, val);
+
+ val = read_msr_from_hv(APIC_LDR);
+ set_reg(backing_page, APIC_LDR, val);
+}
+
static void x2apic_savic_setup(void)
{
void *backing_page;
@@ -178,6 +232,7 @@ static void x2apic_savic_setup(void)
return;
backing_page = this_cpu_read(apic_backing_page);
+ init_backing_page(backing_page);
gpa = __pa(backing_page);
ret = sev_notify_savic_gpa(gpa);
if (ret != ES_OK)
--
2.34.1
Hi Neeraj, On 9/13/2024 4:36 AM, Neeraj Upadhyay wrote: > +static void init_backing_page(void *backing_page) > +{ > + u32 val; > + int i; > + > + val = read_msr_from_hv(APIC_LVR); > + set_reg(backing_page, APIC_LVR, val); > + When you read the register from hypervisor, there is certain value defined in APM Table 16-2. APIC Registers, says APIC_LVR has value 80??0010h out of reset. More specifically, Bit 31 is set which means the presence of extended APIC registers, and Bit 4 is set which is part of version number: "The local APIC implementation is identified with a value=1Xh (20h-FFh are reserved)". I think you should verify those values instead of just reading from the hypervisor. Also, I think you probably should verify all of registers you read from the hypervisor before you use them in the guest. In other words, sanitize the inputs from the hypervisor. Thanks, Melody
On 11/12/2024 4:13 AM, Melody (Huibo) Wang wrote: > Hi Neeraj, > > On 9/13/2024 4:36 AM, Neeraj Upadhyay wrote: > >> +static void init_backing_page(void *backing_page) >> +{ >> + u32 val; >> + int i; >> + >> + val = read_msr_from_hv(APIC_LVR); >> + set_reg(backing_page, APIC_LVR, val); >> + > > When you read the register from hypervisor, there is certain value defined in APM Table 16-2. APIC Registers, says APIC_LVR has value 80??0010h out of reset. > > More specifically, Bit 31 is set which means the presence of extended APIC registers, and Bit 4 is set which is part of version number: "The local APIC implementation is identified with a value=1Xh (20h-FFh are > reserved)". > > I think you should verify those values instead of just reading from the hypervisor. Also, I think you probably should verify all of registers you read from the hypervisor before you use them in the guest. In other words, sanitize the inputs from the hypervisor. > Ok, I will add this verification of hv read data (wherever applicable) as incremental patches. - Neeraj > Thanks, > Melody
On Fri, Sep 13, 2024 at 05:06:55PM +0530, Neeraj Upadhyay wrote: > From: Kishon Vijay Abraham I <kvijayab@amd.com> > > Secure AVIC lets guest manage the APIC backing page (unlike emulated > x2APIC or x2AVIC where the hypervisor manages the APIC backing page). > > However the introduced Secure AVIC Linux design still maintains the > APIC backing page in the hypervisor to shadow the APIC backing page > maintained by guest (It should be noted only subset of the registers > are shadowed for specific usecases and registers like APIC_IRR, > APIC_ISR are not shadowed). > > Add sev_ghcb_msr_read() to invoke "SVM_EXIT_MSR" VMGEXIT to read > MSRs from hypervisor. Initialize the Secure AVIC's APIC backing > page by copying the initial state of shadow APIC backing page in > the hypervisor to the guest APIC backing page. Specifically copy > APIC_LVR, APIC_LDR, and APIC_LVT MSRs from the shadow APIC backing > page. You don't have to explain what the patch does - rather, why the patch exists in the first place and perhaps mention some non-obvious stuff why the code does what it does. Check your whole set pls. > Signed-off-by: Kishon Vijay Abraham I <kvijayab@amd.com> > Co-developed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com> > Signed-off-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com> > --- > arch/x86/coco/sev/core.c | 41 ++++++++++++++++----- > arch/x86/include/asm/sev.h | 2 ++ > arch/x86/kernel/apic/x2apic_savic.c | 55 +++++++++++++++++++++++++++++ > 3 files changed, 90 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c > index 93470538af5e..0e140f92cfef 100644 > --- a/arch/x86/coco/sev/core.c > +++ b/arch/x86/coco/sev/core.c > @@ -1331,18 +1331,15 @@ int __init sev_es_efi_map_ghcbs(pgd_t *pgd) > return 0; > } > > -static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) > +static enum es_result __vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write) Yeah, this one was bugging me already during Nikunj's set so I cleaned it up a bit differently: https://git.kernel.org/tip/8bca85cc1eb72e21a3544ab32e546a819d8674ca > +enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) Why is this a separate function if it is called only once from x2avic_savic.c? I think you should merge it with read_msr_from_hv(), rename latter to x2avic_read_msr_from_hv() and leave it here in sev/core.c. > +{ > + struct pt_regs regs = { .cx = msr }; > + struct es_em_ctxt ctxt = { .regs = ®s }; > + struct ghcb_state state; > + unsigned long flags; > + enum es_result ret; > + struct ghcb *ghcb; > + > + local_irq_save(flags); > + ghcb = __sev_get_ghcb(&state); > + vc_ghcb_invalidate(ghcb); > + > + ret = __vc_handle_msr(ghcb, &ctxt, false); > + if (ret == ES_OK) > + *value = regs.ax | regs.dx << 32; > + > + __sev_put_ghcb(&state); > + local_irq_restore(flags); > + > + return ret; > +} ... > diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c > index 6a471bbc3dba..99151be4e173 100644 > --- a/arch/x86/kernel/apic/x2apic_savic.c > +++ b/arch/x86/kernel/apic/x2apic_savic.c > @@ -11,6 +11,7 @@ > #include <linux/cc_platform.h> > #include <linux/percpu-defs.h> > #include <linux/align.h> > +#include <linux/sizes.h> > > #include <asm/apic.h> > #include <asm/sev.h> > @@ -20,6 +21,19 @@ > static DEFINE_PER_CPU(void *, apic_backing_page); > static DEFINE_PER_CPU(bool, savic_setup_done); > > +enum lapic_lvt_entry { What's that enum for? Oh, you want to use it below but you don't. Why? > + LVT_TIMER, > + LVT_THERMAL_MONITOR, > + LVT_PERFORMANCE_COUNTER, > + LVT_LINT0, > + LVT_LINT1, > + LVT_ERROR, > + > + APIC_MAX_NR_LVT_ENTRIES, > +}; > + > +#define APIC_LVTx(x) (APIC_LVTT + 0x10 * (x)) > + > static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) > { > return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); > @@ -35,6 +49,22 @@ static inline void set_reg(char *page, int reg_off, u32 val) > WRITE_ONCE(*((u32 *)(page + reg_off)), val); > } > > +static u32 read_msr_from_hv(u32 reg) A MSR's contents is u64. Make this function generic enough and have the callsite select only the lower dword. > +{ > + u64 data, msr; > + int ret; > + > + msr = APIC_BASE_MSR + (reg >> 4); > + ret = sev_ghcb_msr_read(msr, &data); > + if (ret != ES_OK) { > + pr_err("Secure AVIC msr (%#llx) read returned error (%d)\n", msr, ret); Prepend "0x" to the format specifier. > + /* MSR read failures are treated as fatal errors */ > + snp_abort(); > + } > + > + return lower_32_bits(data); > +} -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 11/7/2024 8:58 PM, Borislav Petkov wrote: > On Fri, Sep 13, 2024 at 05:06:55PM +0530, Neeraj Upadhyay wrote: >> From: Kishon Vijay Abraham I <kvijayab@amd.com> >> >> Secure AVIC lets guest manage the APIC backing page (unlike emulated >> x2APIC or x2AVIC where the hypervisor manages the APIC backing page). >> >> However the introduced Secure AVIC Linux design still maintains the >> APIC backing page in the hypervisor to shadow the APIC backing page >> maintained by guest (It should be noted only subset of the registers >> are shadowed for specific usecases and registers like APIC_IRR, >> APIC_ISR are not shadowed). >> >> Add sev_ghcb_msr_read() to invoke "SVM_EXIT_MSR" VMGEXIT to read >> MSRs from hypervisor. Initialize the Secure AVIC's APIC backing >> page by copying the initial state of shadow APIC backing page in >> the hypervisor to the guest APIC backing page. Specifically copy >> APIC_LVR, APIC_LDR, and APIC_LVT MSRs from the shadow APIC backing >> page. > > You don't have to explain what the patch does - rather, why the patch exists > in the first place and perhaps mention some non-obvious stuff why the code > does what it does. > > Check your whole set pls. I will improve on this in the next version. >> -static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) >> +static enum es_result __vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt, bool write) > > Yeah, this one was bugging me already during Nikunj's set so I cleaned it up > a bit differently: > > https://git.kernel.org/tip/8bca85cc1eb72e21a3544ab32e546a819d8674ca > Ok nice! I will rebase. >> +enum es_result sev_ghcb_msr_read(u64 msr, u64 *value) > > Why is this a separate function if it is called only once from x2avic_savic.c? > As sev_ghcb_msr_read() work with any msr and is not limited to reading x2apic msrs, I created a global sev function for it. > I think you should merge it with read_msr_from_hv(), rename latter to > > x2avic_read_msr_from_hv() > > and leave it here in sev/core.c. > Ok sure, I will leave generalizing this to future use cases (if/when they come up) and provide a secure avic specific function here (will do the same for sev_ghcb_msr_write(), which comes later in this series). "x2avic" terminology is not used in guest code. As this function only has secure avic user, does secure_avic_ghcb_msr_read() work? >> +enum lapic_lvt_entry { > > What's that enum for? It's used in init_backing_page() for (i = LVT_THERMAL_MONITOR; i < APIC_MAX_NR_LVT_ENTRIES; i++) { val = read_msr_from_hv(APIC_LVTx(i)); set_reg(backing_page, APIC_LVTx(i), val); } > > Oh, you want to use it below but you don't. Why? > As LVT_TIMER is unused, I will remove it: enum lapic_lvt_entry { LVT_THERMAL_MONITOR = 1, LVT_PERFORMANCE_COUNTER, LVT_LINT0, LVT_LINT1, LVT_ERROR, APIC_MAX_NR_LVT_ENTRIES, }; >> + LVT_TIMER, >> + LVT_THERMAL_MONITOR, >> + LVT_PERFORMANCE_COUNTER, >> + LVT_LINT0, >> + LVT_LINT1, >> + LVT_ERROR, >> + >> + APIC_MAX_NR_LVT_ENTRIES, >> +}; >> + >> +#define APIC_LVTx(x) (APIC_LVTT + 0x10 * (x)) >> + >> static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) >> { >> return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); >> @@ -35,6 +49,22 @@ static inline void set_reg(char *page, int reg_off, u32 val) >> WRITE_ONCE(*((u32 *)(page + reg_off)), val); >> } >> >> +static u32 read_msr_from_hv(u32 reg) > > A MSR's contents is u64. Make this function generic enough and have the > callsite select only the lower dword. > Ok sure, will update. >> +{ >> + u64 data, msr; >> + int ret; >> + >> + msr = APIC_BASE_MSR + (reg >> 4); >> + ret = sev_ghcb_msr_read(msr, &data); >> + if (ret != ES_OK) { >> + pr_err("Secure AVIC msr (%#llx) read returned error (%d)\n", msr, ret); > > Prepend "0x" to the format specifier. > Using '#' prepends "0x". Am I missing something here? - Neeraj
On Fri, Nov 08, 2024 at 11:38:15PM +0530, Neeraj Upadhyay wrote: > As sev_ghcb_msr_read() work with any msr and is not limited to reading > x2apic msrs, I created a global sev function for it. That can happen when the functionality is needed somewhere else too. > Ok sure, I will leave generalizing this to future use cases (if/when they come up) Yap. > and provide a secure avic specific function here (will do the same for > sev_ghcb_msr_write(), which comes later in this series). > > "x2avic" terminology is not used in guest code. As this function only has secure > avic user, does secure_avic_ghcb_msr_read() work? That or "savic_..." > >> +enum lapic_lvt_entry { > > > > What's that enum for? > > It's used in init_backing_page() Then use it properly: diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index 99151be4e173..1753c4a71b50 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -200,8 +200,8 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in static void init_backing_page(void *backing_page) { + enum lapic_lvt_entry i; u32 val; - int i; val = read_msr_from_hv(APIC_LVR); set_reg(backing_page, APIC_LVR, val); > >> + pr_err("Secure AVIC msr (%#llx) read returned error (%d)\n", msr, ret); > > > > Prepend "0x" to the format specifier. > > > > Using '#' prepends "0x". Am I missing something here? Let's use the common thing pls even if the alternate form works too: $ git grep "\"%#" | wc -l 411 $ git grep "\"0x" | wc -l 112823 -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
>> and provide a secure avic specific function here (will do the same for >> sev_ghcb_msr_write(), which comes later in this series). >> >> "x2avic" terminology is not used in guest code. As this function only has secure >> avic user, does secure_avic_ghcb_msr_read() work? > > That or "savic_..." > Ok sure. >>>> +enum lapic_lvt_entry { >>> >>> What's that enum for? >> >> It's used in init_backing_page() > > Then use it properly: > I will update it. > diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c > index 99151be4e173..1753c4a71b50 100644 > --- a/arch/x86/kernel/apic/x2apic_savic.c > +++ b/arch/x86/kernel/apic/x2apic_savic.c > @@ -200,8 +200,8 @@ static void x2apic_savic_send_IPI_mask_allbutself(const struct cpumask *mask, in > > static void init_backing_page(void *backing_page) > { > + enum lapic_lvt_entry i; > u32 val; > - int i; > > val = read_msr_from_hv(APIC_LVR); > set_reg(backing_page, APIC_LVR, val); > >>>> + pr_err("Secure AVIC msr (%#llx) read returned error (%d)\n", msr, ret); >>> >>> Prepend "0x" to the format specifier. >>> >> >> Using '#' prepends "0x". Am I missing something here? > > Let's use the common thing pls even if the alternate form works too: > Ok sure, will change to "0x". - Neeraj > $ git grep "\"%#" | wc -l > 411 > $ git grep "\"0x" | wc -l > 112823 >
© 2016 - 2024 Red Hat, Inc.