arch/x86/include/asm/svm.h | 1 + arch/x86/kvm/svm/svm.h | 42 ------------- arch/x86/kvm/svm/sev.c | 24 ++++++++ arch/x86/kvm/svm/svm.c | 65 +++++++++++++++++++- 4 files changed, 87 insertions(+), 45 deletions(-)
Prior to SEV-ES, KVM stored/loaded host debug registers upon switching
to/from a VM. Changing those registers inside a running SEV VM
triggered #VC exit to KVM.
SEV-ES added the encrypted state (ES) which uses an encrypted guest page
for the VM state (VMSA). The hardware saves/restores certain registers on
VMRUN/VMEXIT according to a swap type (A, B, C), see
"Table B-3. Swap Types" in the AMD Architecture Programmer’s Manual
volume 2.
AMD Milan (Fam 19h) introduces support for the debug registers swapping.
DR6 and DR7 are always swapped. DR[0-3] and DR[0-3]_ADDR_MASK are swapped
a type B when SEV_FEATURES[5] ("DebugSwap") is set.
Enable DebugSwap in VMSA. But only do so if CPUID Fn80000021_EAX[0]
("NoNestedDataBp", "Processor ignores nested data breakpoints") is
supported by the SOC as otherwise a malicious SEV-ES guest can set up
data breakpoints on the #VC IDT entry/stack and cause an infinite loop.
Eliminate DR7 and #DB intercepts as:
- they are not needed when DebugSwap is supported;
- #VC for these intercepts is most likely not supported anyway and
kills the VM.
Keep DR7 intercepted unless DebugSwap enabled to prevent the infinite #DB
loop DoS.
While at this, move set_/clr_dr_intercepts to .c and move #DB intercept
next to DR7 intercept.
Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
---
Changes:
v4:
* removed sev_es_is_debug_swap_enabled() helper
* made sev_es_debug_swap_enabled (module param) static
* set sev_feature early in sev_es_init_vmcb() and made intercepts
dependend on it vs. module param
* move set_/clr_dr_intercepts to .c
v3:
* rewrote the commit log again
* rebased on tip/master to use recently defined X86_FEATURE_NO_NESTED_DATA_BP
* s/boot_cpu_has/cpu_feature_enabled/
v2:
* debug_swap moved from vcpu to module_param
* rewrote commit log
---
Tested with:
===
int x;
int main(int argc, char *argv[])
{
x = 1;
return 0;
}
===
gcc -g a.c
rsync a.out ruby-954vm:~/
ssh -t ruby-954vm 'gdb -ex "file a.out" -ex "watch x" -ex r'
where ruby-954vm is a VM.
With "/sys/module/kvm_amd/parameters/debug_swap = 0", gdb does not stop
on the watchpoint, with "= 1" - gdb does.
---
arch/x86/include/asm/svm.h | 1 +
arch/x86/kvm/svm/svm.h | 42 -------------
arch/x86/kvm/svm/sev.c | 24 ++++++++
arch/x86/kvm/svm/svm.c | 65 +++++++++++++++++++-
4 files changed, 87 insertions(+), 45 deletions(-)
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index cb1ee53ad3b1..665515c7edae 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -278,6 +278,7 @@ enum avic_ipi_failure_cause {
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
struct vmcb_seg {
u16 selector;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 4826e6cc611b..653fd09929df 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -389,48 +389,6 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3
return test_bit(bit, (unsigned long *)&control->intercepts);
}
-static inline void set_dr_intercepts(struct vcpu_svm *svm)
-{
- struct vmcb *vmcb = svm->vmcb01.ptr;
-
- if (!sev_es_guest(svm->vcpu.kvm)) {
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
- }
-
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
-
- recalc_intercepts(svm);
-}
-
-static inline void clr_dr_intercepts(struct vcpu_svm *svm)
-{
- struct vmcb *vmcb = svm->vmcb01.ptr;
-
- vmcb->control.intercepts[INTERCEPT_DR] = 0;
-
- /* DR7 access must remain intercepted for an SEV-ES guest */
- if (sev_es_guest(svm->vcpu.kvm)) {
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
- }
-
- recalc_intercepts(svm);
-}
-
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 86d6897f4806..af775410c5eb 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -21,6 +21,7 @@
#include <asm/pkru.h>
#include <asm/trapnr.h>
#include <asm/fpu/xcr.h>
+#include <asm/debugreg.h>
#include "mmu.h"
#include "x86.h"
@@ -52,9 +53,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
/* enable/disable SEV-ES support */
static bool sev_es_enabled = true;
module_param_named(sev_es, sev_es_enabled, bool, 0444);
+
+/* enable/disable SEV-ES DebugSwap support */
+static bool sev_es_debug_swap_enabled = true;
+module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0644);
#else
#define sev_enabled false
#define sev_es_enabled false
+#define sev_es_debug_swap false
#endif /* CONFIG_KVM_AMD_SEV */
static u8 sev_enc_bit;
@@ -2249,6 +2255,8 @@ void __init sev_hardware_setup(void)
out:
sev_enabled = sev_supported;
sev_es_enabled = sev_es_supported;
+ if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
+ sev_es_debug_swap_enabled = false;
#endif
}
@@ -2940,6 +2948,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
static void sev_es_init_vmcb(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct sev_es_save_area *save = svm->sev_es.vmsa;
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
@@ -2988,6 +2997,9 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP))
svm_clr_intercept(svm, INTERCEPT_RDTSCP);
}
+
+ if (sev_es_debug_swap_enabled)
+ save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
}
void sev_init_vmcb(struct vcpu_svm *svm)
@@ -3027,6 +3039,18 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
/* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
hostsa->xss = host_xss;
+
+ /* The DebugSwap SEV feature does Type B swaps of DR[0-3] */
+ if (sev_es_debug_swap_enabled) {
+ hostsa->dr0 = native_get_debugreg(0);
+ hostsa->dr1 = native_get_debugreg(1);
+ hostsa->dr2 = native_get_debugreg(2);
+ hostsa->dr3 = native_get_debugreg(3);
+ hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
+ hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
+ hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
+ hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
+ }
}
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 60c7c880266b..f8e222bee22a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -671,6 +671,65 @@ static int svm_cpu_init(int cpu)
}
+static void set_dr_intercepts(struct vcpu_svm *svm)
+{
+ struct vmcb *vmcb = svm->vmcb01.ptr;
+ bool intercept;
+
+ if (!sev_es_guest(svm->vcpu.kvm)) {
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
+ }
+
+ if (sev_es_guest(svm->vcpu.kvm)) {
+ struct sev_es_save_area *save = svm->sev_es.vmsa;
+
+ intercept = !(save->sev_features & SVM_SEV_FEAT_DEBUG_SWAP);
+ } else {
+ intercept = true;
+ }
+
+ if (intercept) {
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+ set_exception_intercept(svm, DB_VECTOR);
+ }
+
+ recalc_intercepts(svm);
+}
+
+static void clr_dr_intercepts(struct vcpu_svm *svm)
+{
+ struct vmcb *vmcb = svm->vmcb01.ptr;
+ struct sev_es_save_area *save = svm->sev_es.vmsa;
+
+ vmcb->control.intercepts[INTERCEPT_DR] = 0;
+
+ /*
+ * DR7 access must remain intercepted for an SEV-ES guest unless DebugSwap
+ * (depends on NO_NESTED_DATA_BP) is enabled as otherwise a VM writing to DR7
+ * from the #DB handler may trigger infinite loop of #DB's.
+ */
+ if (sev_es_guest(svm->vcpu.kvm) && (save->sev_features & SVM_SEV_FEAT_DEBUG_SWAP)) {
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+ }
+
+ recalc_intercepts(svm);
+}
+
static int direct_access_msr_slot(u32 msr)
{
u32 i;
@@ -1184,13 +1243,11 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
if (!kvm_vcpu_apicv_active(vcpu))
svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
- set_dr_intercepts(svm);
-
set_exception_intercept(svm, PF_VECTOR);
set_exception_intercept(svm, UD_VECTOR);
set_exception_intercept(svm, MC_VECTOR);
set_exception_intercept(svm, AC_VECTOR);
- set_exception_intercept(svm, DB_VECTOR);
+
/*
* Guest access to VMware backdoor ports could legitimately
* trigger #GP because of TSS I/O permission bitmap.
@@ -1308,6 +1365,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
if (sev_guest(vcpu->kvm))
sev_init_vmcb(svm);
+ set_dr_intercepts(svm);
+
svm_hv_init_vmcb(vmcb);
init_vmcb_after_set_cpuid(vcpu);
--
2.39.1
On Fri, Feb 03, 2023, Alexey Kardashevskiy wrote:
> While at this, move set_/clr_dr_intercepts to .c and move #DB intercept
> next to DR7 intercept.
Please do non-trivial code movement in separate patches unless the functional change
is trivial. Moving and changing at the same time makes the patch difficult to review.
> @@ -52,9 +53,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
> /* enable/disable SEV-ES support */
> static bool sev_es_enabled = true;
> module_param_named(sev_es, sev_es_enabled, bool, 0444);
> +
> +/* enable/disable SEV-ES DebugSwap support */
> +static bool sev_es_debug_swap_enabled = true;
> +module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0644);
Needs to be 0444, otherwise userspace can turn on the knob after KVM is loaded,
which would allow enabling the feature on unsupported platforms, amongst many
other problems.
> void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 60c7c880266b..f8e222bee22a 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -671,6 +671,65 @@ static int svm_cpu_init(int cpu)
>
> }
>
> +static void set_dr_intercepts(struct vcpu_svm *svm)
> +{
> + struct vmcb *vmcb = svm->vmcb01.ptr;
> + bool intercept;
> +
> + if (!sev_es_guest(svm->vcpu.kvm)) {
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
> + }
> +
> + if (sev_es_guest(svm->vcpu.kvm)) {
> + struct sev_es_save_area *save = svm->sev_es.vmsa;
> +
> + intercept = !(save->sev_features & SVM_SEV_FEAT_DEBUG_SWAP);
Blech, the VMCB vs. SEV and SEV-ES code is a mess. E.g. init_vmcb() does
/*
* Guest access to VMware backdoor ports could legitimately
* trigger #GP because of TSS I/O permission bitmap.
* We intercept those #GP and allow access to them anyway
* as VMware does. Don't intercept #GP for SEV guests as KVM can't
* decrypt guest memory to decode the faulting instruction.
*/
if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
set_exception_intercept(svm, GP_VECTOR);
but then sev_es_init_vmcb() also does:
/* No support for enable_vmware_backdoor */
clr_exception_intercept(svm, GP_VECTOR);
DR interception is a similar trainwreck. svm_sync_dirty_debug_regs() bails if
guest_state_protected is true, i.e. is a nop for SEV-ES guests, but only after
the vCPU has done LAUNCH_UPDATE_VMSA. IIUC, that's nonsensical because even before
guest state is encrypted, #DB will be reflected as #VC into the guest. And, again
IIUC, except for DR7, DRs are never intercepted for SEV-ES guests and so trying
to debug from the host is futile as the guest can clobber DRs at any time.
Similarly, flowing into dr_interception() on an SEV-ES VMGEXITis just dumb. KVM
_knows_ it can't give the guest control of DR7, but it mucks with the intercepts
anyways. That the GHCB spec even allows SVM_EXIT_{READ,WRITE}_DR7 is just asinine,
but that's a moot point. Anyways, the GHCB spec's "suggestion" effectively says
KVM's responsibility is purely to make a read of DR7 return the last written value.
And of course KVM's disaster of a flow doesn't even do that unless the host is
debugging the guest.
Currently, hardware debug traps aren’t supported for an SEV-ES guest. The hypervisor
must set the intercept for both read and write of the debug control register (DR7).
With the intercepts in place, the #VC handler will be invoked when the guest accesses
DR7. For a write to DR7, the #VC handler should perform Standard VMGExit processing.
The #VC handler must not update the actual DR7 register, but rather it should cache
the DR7 value being written.
I bring this up because of the subtle dependency that checking SVM_SEV_FEAT_DEBUG_SWAP
creates: set_dr_intercepts() needs to be called after sev_init_vmcb(). I believe
this approach also fails to handle intrahost migration; at the very least, what
exactly will happen when sev_migrate_from() invokes sev_init_vmcb() is unclear.
And I really don't want to pile even more gunk on top of the existing mess.
So, can you (and by "you" I really mean "the folks at AMD working on SEV stuff")
start with the below diff (not intended to be a single patch), disallow
kvm_arch_vcpu_ioctl_set_guest_debug() entirely for SEV-ES guests (will likely
take some back and forth to figure out how we want to do this), and then fill
in the blanks? I.e. get KVM to a state where all the intercept shenanigans for
SEV and SEV-ES are reasonably contained in sev.c, and then enable the debug_swap
stuff on top?
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index c25aeb550cd9..ff7a4d68731c 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2968,8 +2968,7 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
svm_set_intercept(svm, TRAP_CR4_WRITE);
svm_set_intercept(svm, TRAP_CR8_WRITE);
- /* No support for enable_vmware_backdoor */
- clr_exception_intercept(svm, GP_VECTOR);
+ <debug register stuff goes here>
/* Can't intercept XSETBV, HV can't modify XCR0 directly */
svm_clr_intercept(svm, INTERCEPT_XSETBV);
@@ -2996,6 +2995,12 @@ void sev_init_vmcb(struct vcpu_svm *svm)
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
clr_exception_intercept(svm, UD_VECTOR);
+ /*
+ * Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as
+ * KVM can't decrypt guest memory to decode the faulting instruction.
+ */
+ clr_exception_intercept(svm, GP_VECTOR);
+
if (sev_es_guest(svm->vcpu.kvm))
sev_es_init_vmcb(svm);
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e0ec95f1f068..89753d7fd821 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1209,10 +1209,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
* Guest access to VMware backdoor ports could legitimately
* trigger #GP because of TSS I/O permission bitmap.
* We intercept those #GP and allow access to them anyway
- * as VMware does. Don't intercept #GP for SEV guests as KVM can't
- * decrypt guest memory to decode the faulting instruction.
+ * as VMware does.
*/
- if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
+ if (enable_vmware_backdoor)
set_exception_intercept(svm, GP_VECTOR);
svm_set_intercept(svm, INTERCEPT_INTR);
@@ -1950,7 +1949,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (vcpu->arch.guest_state_protected)
+ if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
return;
get_debugreg(vcpu->arch.db[0], 0);
@@ -2681,7 +2680,7 @@ static int dr_interception(struct kvm_vcpu *vcpu)
unsigned long val;
int err = 0;
- if (vcpu->guest_debug == 0) {
+ if (vcpu->guest_debug == 0 && !sev_es_guest(vcpu->kvm)) {
/*
* No more DR vmexits; force a reload of the debug registers
* and reenter on this instruction. The next vmexit will
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f44751dd8d5d..7c99a7d55476 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -409,23 +409,25 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
- if (!sev_es_guest(svm->vcpu.kvm)) {
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
+ if (sev_es_guest(svm->vcpu.kvm)) {
+ WARN_ON_ONCE(svm->vcpu.arch.last_vmentry_cpu != -1);
+ return;
}
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
@@ -436,13 +438,13 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
+ if (WARN_ON_ONCE(sev_es_guest(svm->vcpu.kvm)))
+ return;
+
vmcb->control.intercepts[INTERCEPT_DR] = 0;
- /* DR7 access must remain intercepted for an SEV-ES guest */
- if (sev_es_guest(svm->vcpu.kvm)) {
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
- }
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
recalc_intercepts(svm);
}
On 3/23/23 12:40, Sean Christopherson wrote:
> On Fri, Feb 03, 2023, Alexey Kardashevskiy wrote:
>> While at this, move set_/clr_dr_intercepts to .c and move #DB intercept
>> next to DR7 intercept.
>
> Please do non-trivial code movement in separate patches unless the functional change
> is trivial. Moving and changing at the same time makes the patch difficult to review.
>
>> @@ -52,9 +53,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
>> /* enable/disable SEV-ES support */
>> static bool sev_es_enabled = true;
>> module_param_named(sev_es, sev_es_enabled, bool, 0444);
>> +
>> +/* enable/disable SEV-ES DebugSwap support */
>> +static bool sev_es_debug_swap_enabled = true;
>> +module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0644);
>
> Needs to be 0444, otherwise userspace can turn on the knob after KVM is loaded,
> which would allow enabling the feature on unsupported platforms, amongst many
> other problems.
>
>> void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>> index 60c7c880266b..f8e222bee22a 100644
>> --- a/arch/x86/kvm/svm/svm.c
>> +++ b/arch/x86/kvm/svm/svm.c
>> @@ -671,6 +671,65 @@ static int svm_cpu_init(int cpu)
>>
>> }
>>
>> +static void set_dr_intercepts(struct vcpu_svm *svm)
>> +{
>> + struct vmcb *vmcb = svm->vmcb01.ptr;
>> + bool intercept;
>> +
>> + if (!sev_es_guest(svm->vcpu.kvm)) {
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
>> + }
>> +
>> + if (sev_es_guest(svm->vcpu.kvm)) {
>> + struct sev_es_save_area *save = svm->sev_es.vmsa;
>> +
>> + intercept = !(save->sev_features & SVM_SEV_FEAT_DEBUG_SWAP);
>
> Blech, the VMCB vs. SEV and SEV-ES code is a mess. E.g. init_vmcb() does
>
> /*
> * Guest access to VMware backdoor ports could legitimately
> * trigger #GP because of TSS I/O permission bitmap.
> * We intercept those #GP and allow access to them anyway
> * as VMware does. Don't intercept #GP for SEV guests as KVM can't
> * decrypt guest memory to decode the faulting instruction.
> */
> if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
> set_exception_intercept(svm, GP_VECTOR);
>
> but then sev_es_init_vmcb() also does:
>
> /* No support for enable_vmware_backdoor */
> clr_exception_intercept(svm, GP_VECTOR);
>
> DR interception is a similar trainwreck. svm_sync_dirty_debug_regs() bails if
> guest_state_protected is true, i.e. is a nop for SEV-ES guests, but only after
> the vCPU has done LAUNCH_UPDATE_VMSA. IIUC, that's nonsensical because even before
> guest state is encrypted, #DB will be reflected as #VC into the guest. And, again
A guest can't run before the LAUNCH_UPDATE process is complete, so there
can't be a #VC before guest_state_proteced is true.
> IIUC, except for DR7, DRs are never intercepted for SEV-ES guests and so trying
> to debug from the host is futile as the guest can clobber DRs at any time.
>
> Similarly, flowing into dr_interception() on an SEV-ES VMGEXITis just dumb. KVM
> _knows_ it can't give the guest control of DR7, but it mucks with the intercepts
> anyways. That the GHCB spec even allows SVM_EXIT_{READ,WRITE}_DR7 is just asinine,
> but that's a moot point. Anyways, the GHCB spec's "suggestion" effectively says
> KVM's responsibility is purely to make a read of DR7 return the last written value.
That's not KVM's responsibility, that is the responsibility of the guest
#VC handler. So a DR7 read, while intercepted, should never get to KVM.
> And of course KVM's disaster of a flow doesn't even do that unless the host is
> debugging the guest.
>
> Currently, hardware debug traps aren’t supported for an SEV-ES guest. The hypervisor
> must set the intercept for both read and write of the debug control register (DR7).
> With the intercepts in place, the #VC handler will be invoked when the guest accesses
> DR7. For a write to DR7, the #VC handler should perform Standard VMGExit processing.
> The #VC handler must not update the actual DR7 register, but rather it should cache
> the DR7 value being written.
>
> I bring this up because of the subtle dependency that checking SVM_SEV_FEAT_DEBUG_SWAP
> creates: set_dr_intercepts() needs to be called after sev_init_vmcb(). I believe
> this approach also fails to handle intrahost migration; at the very least, what
> exactly will happen when sev_migrate_from() invokes sev_init_vmcb() is unclear.
> And I really don't want to pile even more gunk on top of the existing mess.
>
> So, can you (and by "you" I really mean "the folks at AMD working on SEV stuff")
> start with the below diff (not intended to be a single patch), disallow
> kvm_arch_vcpu_ioctl_set_guest_debug() entirely for SEV-ES guests (will likely
> take some back and forth to figure out how we want to do this), and then fill
> in the blanks? I.e. get KVM to a state where all the intercept shenanigans for
> SEV and SEV-ES are reasonably contained in sev.c, and then enable the debug_swap
> stuff on top?
>
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index c25aeb550cd9..ff7a4d68731c 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -2968,8 +2968,7 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
> svm_set_intercept(svm, TRAP_CR4_WRITE);
> svm_set_intercept(svm, TRAP_CR8_WRITE);
>
> - /* No support for enable_vmware_backdoor */
> - clr_exception_intercept(svm, GP_VECTOR);
> + <debug register stuff goes here>
>
> /* Can't intercept XSETBV, HV can't modify XCR0 directly */
> svm_clr_intercept(svm, INTERCEPT_XSETBV);
> @@ -2996,6 +2995,12 @@ void sev_init_vmcb(struct vcpu_svm *svm)
> svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
> clr_exception_intercept(svm, UD_VECTOR);
>
> + /*
> + * Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as
> + * KVM can't decrypt guest memory to decode the faulting instruction.
> + */
> + clr_exception_intercept(svm, GP_VECTOR);
> +
> if (sev_es_guest(svm->vcpu.kvm))
> sev_es_init_vmcb(svm);
> }
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index e0ec95f1f068..89753d7fd821 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1209,10 +1209,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
> * Guest access to VMware backdoor ports could legitimately
> * trigger #GP because of TSS I/O permission bitmap.
> * We intercept those #GP and allow access to them anyway
> - * as VMware does. Don't intercept #GP for SEV guests as KVM can't
> - * decrypt guest memory to decode the faulting instruction.
> + * as VMware does.
> */
> - if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
> + if (enable_vmware_backdoor)
> set_exception_intercept(svm, GP_VECTOR);
>
> svm_set_intercept(svm, INTERCEPT_INTR);
> @@ -1950,7 +1949,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
>
> - if (vcpu->arch.guest_state_protected)
> + if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
> return;
>
> get_debugreg(vcpu->arch.db[0], 0);
> @@ -2681,7 +2680,7 @@ static int dr_interception(struct kvm_vcpu *vcpu)
> unsigned long val;
> int err = 0;
>
> - if (vcpu->guest_debug == 0) {
> + if (vcpu->guest_debug == 0 && !sev_es_guest(vcpu->kvm)) {
This will change the current flow of an SEV-ES guest. With SEV-ES,
vcpu->guest_debug can never be anything other than 0 and currently always
takes this path.
So what is really needed is:
if (vcpu->guest_debug == 0) {
if (!sev_es_guest(vcpu->kvm)) {
...
}
return 1;
}
> /*
> * No more DR vmexits; force a reload of the debug registers
> * and reenter on this instruction. The next vmexit will
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index f44751dd8d5d..7c99a7d55476 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -409,23 +409,25 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm)
> {
> struct vmcb *vmcb = svm->vmcb01.ptr;
>
> - if (!sev_es_guest(svm->vcpu.kvm)) {
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
> + if (sev_es_guest(svm->vcpu.kvm)) {
> + WARN_ON_ONCE(svm->vcpu.arch.last_vmentry_cpu != -1);
> + return;
> }
>
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
> vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
> vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
>
> @@ -436,13 +438,13 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm)
> {
> struct vmcb *vmcb = svm->vmcb01.ptr;
>
> + if (WARN_ON_ONCE(sev_es_guest(svm->vcpu.kvm)))
> + return;
> +
> vmcb->control.intercepts[INTERCEPT_DR] = 0;
>
> - /* DR7 access must remain intercepted for an SEV-ES guest */
> - if (sev_es_guest(svm->vcpu.kvm)) {
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
> - }
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
If we never call clr_dr_intercepts() anymore for an SEV-ES guest, then the
above two lines should be removed. They only were executed for an SEV-ES
guest and now they would be executed for any guest.
Thanks,
Tom
>
> recalc_intercepts(svm);
> }
>
Ping? Thanks,
On 3/2/23 16:14, Alexey Kardashevskiy wrote:
> Prior to SEV-ES, KVM stored/loaded host debug registers upon switching
> to/from a VM. Changing those registers inside a running SEV VM
> triggered #VC exit to KVM.
>
> SEV-ES added the encrypted state (ES) which uses an encrypted guest page
> for the VM state (VMSA). The hardware saves/restores certain registers on
> VMRUN/VMEXIT according to a swap type (A, B, C), see
> "Table B-3. Swap Types" in the AMD Architecture Programmer’s Manual
> volume 2.
>
> AMD Milan (Fam 19h) introduces support for the debug registers swapping.
> DR6 and DR7 are always swapped. DR[0-3] and DR[0-3]_ADDR_MASK are swapped
> a type B when SEV_FEATURES[5] ("DebugSwap") is set.
>
> Enable DebugSwap in VMSA. But only do so if CPUID Fn80000021_EAX[0]
> ("NoNestedDataBp", "Processor ignores nested data breakpoints") is
> supported by the SOC as otherwise a malicious SEV-ES guest can set up
> data breakpoints on the #VC IDT entry/stack and cause an infinite loop.
>
> Eliminate DR7 and #DB intercepts as:
> - they are not needed when DebugSwap is supported;
> - #VC for these intercepts is most likely not supported anyway and
> kills the VM.
> Keep DR7 intercepted unless DebugSwap enabled to prevent the infinite #DB
> loop DoS.
>
> While at this, move set_/clr_dr_intercepts to .c and move #DB intercept
> next to DR7 intercept.
>
> Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
> ---
> Changes:
> v4:
> * removed sev_es_is_debug_swap_enabled() helper
> * made sev_es_debug_swap_enabled (module param) static
> * set sev_feature early in sev_es_init_vmcb() and made intercepts
> dependend on it vs. module param
> * move set_/clr_dr_intercepts to .c
>
> v3:
> * rewrote the commit log again
> * rebased on tip/master to use recently defined X86_FEATURE_NO_NESTED_DATA_BP
> * s/boot_cpu_has/cpu_feature_enabled/
>
> v2:
> * debug_swap moved from vcpu to module_param
> * rewrote commit log
>
> ---
> Tested with:
> ===
> int x;
> int main(int argc, char *argv[])
> {
> x = 1;
> return 0;
> }
> ===
> gcc -g a.c
> rsync a.out ruby-954vm:~/
> ssh -t ruby-954vm 'gdb -ex "file a.out" -ex "watch x" -ex r'
>
> where ruby-954vm is a VM.
>
> With "/sys/module/kvm_amd/parameters/debug_swap = 0", gdb does not stop
> on the watchpoint, with "= 1" - gdb does.
> ---
> arch/x86/include/asm/svm.h | 1 +
> arch/x86/kvm/svm/svm.h | 42 -------------
> arch/x86/kvm/svm/sev.c | 24 ++++++++
> arch/x86/kvm/svm/svm.c | 65 +++++++++++++++++++-
> 4 files changed, 87 insertions(+), 45 deletions(-)
>
> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
> index cb1ee53ad3b1..665515c7edae 100644
> --- a/arch/x86/include/asm/svm.h
> +++ b/arch/x86/include/asm/svm.h
> @@ -278,6 +278,7 @@ enum avic_ipi_failure_cause {
> #define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
> #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
>
> +#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
>
> struct vmcb_seg {
> u16 selector;
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 4826e6cc611b..653fd09929df 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -389,48 +389,6 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3
> return test_bit(bit, (unsigned long *)&control->intercepts);
> }
>
> -static inline void set_dr_intercepts(struct vcpu_svm *svm)
> -{
> - struct vmcb *vmcb = svm->vmcb01.ptr;
> -
> - if (!sev_es_guest(svm->vcpu.kvm)) {
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
> - }
> -
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
> -
> - recalc_intercepts(svm);
> -}
> -
> -static inline void clr_dr_intercepts(struct vcpu_svm *svm)
> -{
> - struct vmcb *vmcb = svm->vmcb01.ptr;
> -
> - vmcb->control.intercepts[INTERCEPT_DR] = 0;
> -
> - /* DR7 access must remain intercepted for an SEV-ES guest */
> - if (sev_es_guest(svm->vcpu.kvm)) {
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
> - }
> -
> - recalc_intercepts(svm);
> -}
> -
> static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
> {
> struct vmcb *vmcb = svm->vmcb01.ptr;
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 86d6897f4806..af775410c5eb 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -21,6 +21,7 @@
> #include <asm/pkru.h>
> #include <asm/trapnr.h>
> #include <asm/fpu/xcr.h>
> +#include <asm/debugreg.h>
>
> #include "mmu.h"
> #include "x86.h"
> @@ -52,9 +53,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
> /* enable/disable SEV-ES support */
> static bool sev_es_enabled = true;
> module_param_named(sev_es, sev_es_enabled, bool, 0444);
> +
> +/* enable/disable SEV-ES DebugSwap support */
> +static bool sev_es_debug_swap_enabled = true;
> +module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0644);
> #else
> #define sev_enabled false
> #define sev_es_enabled false
> +#define sev_es_debug_swap false
> #endif /* CONFIG_KVM_AMD_SEV */
>
> static u8 sev_enc_bit;
> @@ -2249,6 +2255,8 @@ void __init sev_hardware_setup(void)
> out:
> sev_enabled = sev_supported;
> sev_es_enabled = sev_es_supported;
> + if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
> + sev_es_debug_swap_enabled = false;
> #endif
> }
>
> @@ -2940,6 +2948,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
> static void sev_es_init_vmcb(struct vcpu_svm *svm)
> {
> struct kvm_vcpu *vcpu = &svm->vcpu;
> + struct sev_es_save_area *save = svm->sev_es.vmsa;
>
> svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
> svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
> @@ -2988,6 +2997,9 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
> if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP))
> svm_clr_intercept(svm, INTERCEPT_RDTSCP);
> }
> +
> + if (sev_es_debug_swap_enabled)
> + save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
> }
>
> void sev_init_vmcb(struct vcpu_svm *svm)
> @@ -3027,6 +3039,18 @@ void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
>
> /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
> hostsa->xss = host_xss;
> +
> + /* The DebugSwap SEV feature does Type B swaps of DR[0-3] */
> + if (sev_es_debug_swap_enabled) {
> + hostsa->dr0 = native_get_debugreg(0);
> + hostsa->dr1 = native_get_debugreg(1);
> + hostsa->dr2 = native_get_debugreg(2);
> + hostsa->dr3 = native_get_debugreg(3);
> + hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
> + hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
> + hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
> + hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
> + }
> }
>
> void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 60c7c880266b..f8e222bee22a 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -671,6 +671,65 @@ static int svm_cpu_init(int cpu)
>
> }
>
> +static void set_dr_intercepts(struct vcpu_svm *svm)
> +{
> + struct vmcb *vmcb = svm->vmcb01.ptr;
> + bool intercept;
> +
> + if (!sev_es_guest(svm->vcpu.kvm)) {
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
> + }
> +
> + if (sev_es_guest(svm->vcpu.kvm)) {
> + struct sev_es_save_area *save = svm->sev_es.vmsa;
> +
> + intercept = !(save->sev_features & SVM_SEV_FEAT_DEBUG_SWAP);
> + } else {
> + intercept = true;
> + }
> +
> + if (intercept) {
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
> + set_exception_intercept(svm, DB_VECTOR);
> + }
> +
> + recalc_intercepts(svm);
> +}
> +
> +static void clr_dr_intercepts(struct vcpu_svm *svm)
> +{
> + struct vmcb *vmcb = svm->vmcb01.ptr;
> + struct sev_es_save_area *save = svm->sev_es.vmsa;
> +
> + vmcb->control.intercepts[INTERCEPT_DR] = 0;
> +
> + /*
> + * DR7 access must remain intercepted for an SEV-ES guest unless DebugSwap
> + * (depends on NO_NESTED_DATA_BP) is enabled as otherwise a VM writing to DR7
> + * from the #DB handler may trigger infinite loop of #DB's.
> + */
> + if (sev_es_guest(svm->vcpu.kvm) && (save->sev_features & SVM_SEV_FEAT_DEBUG_SWAP)) {
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
> + }
> +
> + recalc_intercepts(svm);
> +}
> +
> static int direct_access_msr_slot(u32 msr)
> {
> u32 i;
> @@ -1184,13 +1243,11 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
> if (!kvm_vcpu_apicv_active(vcpu))
> svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
>
> - set_dr_intercepts(svm);
> -
> set_exception_intercept(svm, PF_VECTOR);
> set_exception_intercept(svm, UD_VECTOR);
> set_exception_intercept(svm, MC_VECTOR);
> set_exception_intercept(svm, AC_VECTOR);
> - set_exception_intercept(svm, DB_VECTOR);
> +
> /*
> * Guest access to VMware backdoor ports could legitimately
> * trigger #GP because of TSS I/O permission bitmap.
> @@ -1308,6 +1365,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
> if (sev_guest(vcpu->kvm))
> sev_init_vmcb(svm);
>
> + set_dr_intercepts(svm);
> +
> svm_hv_init_vmcb(vmcb);
> init_vmcb_after_set_cpuid(vcpu);
>
--
Alexey
Ping? Thanks,
On 21/2/23 16:19, Alexey Kardashevskiy wrote:
> Ping? Thanks,
>
> On 3/2/23 16:14, Alexey Kardashevskiy wrote:
>> Prior to SEV-ES, KVM stored/loaded host debug registers upon switching
>> to/from a VM. Changing those registers inside a running SEV VM
>> triggered #VC exit to KVM.
>>
>> SEV-ES added the encrypted state (ES) which uses an encrypted guest page
>> for the VM state (VMSA). The hardware saves/restores certain registers on
>> VMRUN/VMEXIT according to a swap type (A, B, C), see
>> "Table B-3. Swap Types" in the AMD Architecture Programmer’s Manual
>> volume 2.
>>
>> AMD Milan (Fam 19h) introduces support for the debug registers swapping.
>> DR6 and DR7 are always swapped. DR[0-3] and DR[0-3]_ADDR_MASK are swapped
>> a type B when SEV_FEATURES[5] ("DebugSwap") is set.
>>
>> Enable DebugSwap in VMSA. But only do so if CPUID Fn80000021_EAX[0]
>> ("NoNestedDataBp", "Processor ignores nested data breakpoints") is
>> supported by the SOC as otherwise a malicious SEV-ES guest can set up
>> data breakpoints on the #VC IDT entry/stack and cause an infinite loop.
>>
>> Eliminate DR7 and #DB intercepts as:
>> - they are not needed when DebugSwap is supported;
>> - #VC for these intercepts is most likely not supported anyway and
>> kills the VM.
>> Keep DR7 intercepted unless DebugSwap enabled to prevent the infinite #DB
>> loop DoS.
>>
>> While at this, move set_/clr_dr_intercepts to .c and move #DB intercept
>> next to DR7 intercept.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
>> ---
>> Changes:
>> v4:
>> * removed sev_es_is_debug_swap_enabled() helper
>> * made sev_es_debug_swap_enabled (module param) static
>> * set sev_feature early in sev_es_init_vmcb() and made intercepts
>> dependend on it vs. module param
>> * move set_/clr_dr_intercepts to .c
>>
>> v3:
>> * rewrote the commit log again
>> * rebased on tip/master to use recently defined
>> X86_FEATURE_NO_NESTED_DATA_BP
>> * s/boot_cpu_has/cpu_feature_enabled/
>>
>> v2:
>> * debug_swap moved from vcpu to module_param
>> * rewrote commit log
>>
>> ---
>> Tested with:
>> ===
>> int x;
>> int main(int argc, char *argv[])
>> {
>> x = 1;
>> return 0;
>> }
>> ===
>> gcc -g a.c
>> rsync a.out ruby-954vm:~/
>> ssh -t ruby-954vm 'gdb -ex "file a.out" -ex "watch x" -ex r'
>>
>> where ruby-954vm is a VM.
>>
>> With "/sys/module/kvm_amd/parameters/debug_swap = 0", gdb does not stop
>> on the watchpoint, with "= 1" - gdb does.
>> ---
>> arch/x86/include/asm/svm.h | 1 +
>> arch/x86/kvm/svm/svm.h | 42 -------------
>> arch/x86/kvm/svm/sev.c | 24 ++++++++
>> arch/x86/kvm/svm/svm.c | 65 +++++++++++++++++++-
>> 4 files changed, 87 insertions(+), 45 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
>> index cb1ee53ad3b1..665515c7edae 100644
>> --- a/arch/x86/include/asm/svm.h
>> +++ b/arch/x86/include/asm/svm.h
>> @@ -278,6 +278,7 @@ enum avic_ipi_failure_cause {
>> #define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
>> #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
>> +#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
>> struct vmcb_seg {
>> u16 selector;
>> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
>> index 4826e6cc611b..653fd09929df 100644
>> --- a/arch/x86/kvm/svm/svm.h
>> +++ b/arch/x86/kvm/svm/svm.h
>> @@ -389,48 +389,6 @@ static inline bool vmcb12_is_intercept(struct
>> vmcb_ctrl_area_cached *control, u3
>> return test_bit(bit, (unsigned long *)&control->intercepts);
>> }
>> -static inline void set_dr_intercepts(struct vcpu_svm *svm)
>> -{
>> - struct vmcb *vmcb = svm->vmcb01.ptr;
>> -
>> - if (!sev_es_guest(svm->vcpu.kvm)) {
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
>> - }
>> -
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
>> -
>> - recalc_intercepts(svm);
>> -}
>> -
>> -static inline void clr_dr_intercepts(struct vcpu_svm *svm)
>> -{
>> - struct vmcb *vmcb = svm->vmcb01.ptr;
>> -
>> - vmcb->control.intercepts[INTERCEPT_DR] = 0;
>> -
>> - /* DR7 access must remain intercepted for an SEV-ES guest */
>> - if (sev_es_guest(svm->vcpu.kvm)) {
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
>> - }
>> -
>> - recalc_intercepts(svm);
>> -}
>> -
>> static inline void set_exception_intercept(struct vcpu_svm *svm, u32
>> bit)
>> {
>> struct vmcb *vmcb = svm->vmcb01.ptr;
>> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
>> index 86d6897f4806..af775410c5eb 100644
>> --- a/arch/x86/kvm/svm/sev.c
>> +++ b/arch/x86/kvm/svm/sev.c
>> @@ -21,6 +21,7 @@
>> #include <asm/pkru.h>
>> #include <asm/trapnr.h>
>> #include <asm/fpu/xcr.h>
>> +#include <asm/debugreg.h>
>> #include "mmu.h"
>> #include "x86.h"
>> @@ -52,9 +53,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
>> /* enable/disable SEV-ES support */
>> static bool sev_es_enabled = true;
>> module_param_named(sev_es, sev_es_enabled, bool, 0444);
>> +
>> +/* enable/disable SEV-ES DebugSwap support */
>> +static bool sev_es_debug_swap_enabled = true;
>> +module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0644);
>> #else
>> #define sev_enabled false
>> #define sev_es_enabled false
>> +#define sev_es_debug_swap false
>> #endif /* CONFIG_KVM_AMD_SEV */
>> static u8 sev_enc_bit;
>> @@ -2249,6 +2255,8 @@ void __init sev_hardware_setup(void)
>> out:
>> sev_enabled = sev_supported;
>> sev_es_enabled = sev_es_supported;
>> + if (!sev_es_enabled ||
>> !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
>> + sev_es_debug_swap_enabled = false;
>> #endif
>> }
>> @@ -2940,6 +2948,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int
>> size, unsigned int port, int in)
>> static void sev_es_init_vmcb(struct vcpu_svm *svm)
>> {
>> struct kvm_vcpu *vcpu = &svm->vcpu;
>> + struct sev_es_save_area *save = svm->sev_es.vmsa;
>> svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
>> svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
>> @@ -2988,6 +2997,9 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
>> if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP))
>> svm_clr_intercept(svm, INTERCEPT_RDTSCP);
>> }
>> +
>> + if (sev_es_debug_swap_enabled)
>> + save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
>> }
>> void sev_init_vmcb(struct vcpu_svm *svm)
>> @@ -3027,6 +3039,18 @@ void sev_es_prepare_switch_to_guest(struct
>> sev_es_save_area *hostsa)
>> /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host
>> value */
>> hostsa->xss = host_xss;
>> +
>> + /* The DebugSwap SEV feature does Type B swaps of DR[0-3] */
>> + if (sev_es_debug_swap_enabled) {
>> + hostsa->dr0 = native_get_debugreg(0);
>> + hostsa->dr1 = native_get_debugreg(1);
>> + hostsa->dr2 = native_get_debugreg(2);
>> + hostsa->dr3 = native_get_debugreg(3);
>> + hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
>> + hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
>> + hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
>> + hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
>> + }
>> }
>> void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>> index 60c7c880266b..f8e222bee22a 100644
>> --- a/arch/x86/kvm/svm/svm.c
>> +++ b/arch/x86/kvm/svm/svm.c
>> @@ -671,6 +671,65 @@ static int svm_cpu_init(int cpu)
>> }
>> +static void set_dr_intercepts(struct vcpu_svm *svm)
>> +{
>> + struct vmcb *vmcb = svm->vmcb01.ptr;
>> + bool intercept;
>> +
>> + if (!sev_es_guest(svm->vcpu.kvm)) {
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
>> + }
>> +
>> + if (sev_es_guest(svm->vcpu.kvm)) {
>> + struct sev_es_save_area *save = svm->sev_es.vmsa;
>> +
>> + intercept = !(save->sev_features & SVM_SEV_FEAT_DEBUG_SWAP);
>> + } else {
>> + intercept = true;
>> + }
>> +
>> + if (intercept) {
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
>> + set_exception_intercept(svm, DB_VECTOR);
>> + }
>> +
>> + recalc_intercepts(svm);
>> +}
>> +
>> +static void clr_dr_intercepts(struct vcpu_svm *svm)
>> +{
>> + struct vmcb *vmcb = svm->vmcb01.ptr;
>> + struct sev_es_save_area *save = svm->sev_es.vmsa;
>> +
>> + vmcb->control.intercepts[INTERCEPT_DR] = 0;
>> +
>> + /*
>> + * DR7 access must remain intercepted for an SEV-ES guest unless
>> DebugSwap
>> + * (depends on NO_NESTED_DATA_BP) is enabled as otherwise a VM
>> writing to DR7
>> + * from the #DB handler may trigger infinite loop of #DB's.
>> + */
>> + if (sev_es_guest(svm->vcpu.kvm) && (save->sev_features &
>> SVM_SEV_FEAT_DEBUG_SWAP)) {
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
>> + }
>> +
>> + recalc_intercepts(svm);
>> +}
>> +
>> static int direct_access_msr_slot(u32 msr)
>> {
>> u32 i;
>> @@ -1184,13 +1243,11 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
>> if (!kvm_vcpu_apicv_active(vcpu))
>> svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
>> - set_dr_intercepts(svm);
>> -
>> set_exception_intercept(svm, PF_VECTOR);
>> set_exception_intercept(svm, UD_VECTOR);
>> set_exception_intercept(svm, MC_VECTOR);
>> set_exception_intercept(svm, AC_VECTOR);
>> - set_exception_intercept(svm, DB_VECTOR);
>> +
>> /*
>> * Guest access to VMware backdoor ports could legitimately
>> * trigger #GP because of TSS I/O permission bitmap.
>> @@ -1308,6 +1365,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
>> if (sev_guest(vcpu->kvm))
>> sev_init_vmcb(svm);
>> + set_dr_intercepts(svm);
>> +
>> svm_hv_init_vmcb(vmcb);
>> init_vmcb_after_set_cpuid(vcpu);
>
--
Alexey
Ping? (I am told that pinging once a week is ok) Thanks,
On 14/3/23 20:43, Alexey Kardashevskiy wrote:
> Ping? Thanks,
>
>
> On 21/2/23 16:19, Alexey Kardashevskiy wrote:
>> Ping? Thanks,
>>
>> On 3/2/23 16:14, Alexey Kardashevskiy wrote:
>>> Prior to SEV-ES, KVM stored/loaded host debug registers upon switching
>>> to/from a VM. Changing those registers inside a running SEV VM
>>> triggered #VC exit to KVM.
>>>
>>> SEV-ES added the encrypted state (ES) which uses an encrypted guest page
>>> for the VM state (VMSA). The hardware saves/restores certain
>>> registers on
>>> VMRUN/VMEXIT according to a swap type (A, B, C), see
>>> "Table B-3. Swap Types" in the AMD Architecture Programmer’s Manual
>>> volume 2.
>>>
>>> AMD Milan (Fam 19h) introduces support for the debug registers swapping.
>>> DR6 and DR7 are always swapped. DR[0-3] and DR[0-3]_ADDR_MASK are
>>> swapped
>>> a type B when SEV_FEATURES[5] ("DebugSwap") is set.
>>>
>>> Enable DebugSwap in VMSA. But only do so if CPUID Fn80000021_EAX[0]
>>> ("NoNestedDataBp", "Processor ignores nested data breakpoints") is
>>> supported by the SOC as otherwise a malicious SEV-ES guest can set up
>>> data breakpoints on the #VC IDT entry/stack and cause an infinite loop.
>>>
>>> Eliminate DR7 and #DB intercepts as:
>>> - they are not needed when DebugSwap is supported;
>>> - #VC for these intercepts is most likely not supported anyway and
>>> kills the VM.
>>> Keep DR7 intercepted unless DebugSwap enabled to prevent the infinite
>>> #DB
>>> loop DoS.
>>>
>>> While at this, move set_/clr_dr_intercepts to .c and move #DB intercept
>>> next to DR7 intercept.
>>>
>>> Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
>>> ---
>>> Changes:
>>> v4:
>>> * removed sev_es_is_debug_swap_enabled() helper
>>> * made sev_es_debug_swap_enabled (module param) static
>>> * set sev_feature early in sev_es_init_vmcb() and made intercepts
>>> dependend on it vs. module param
>>> * move set_/clr_dr_intercepts to .c
>>>
>>> v3:
>>> * rewrote the commit log again
>>> * rebased on tip/master to use recently defined
>>> X86_FEATURE_NO_NESTED_DATA_BP
>>> * s/boot_cpu_has/cpu_feature_enabled/
>>>
>>> v2:
>>> * debug_swap moved from vcpu to module_param
>>> * rewrote commit log
>>>
>>> ---
>>> Tested with:
>>> ===
>>> int x;
>>> int main(int argc, char *argv[])
>>> {
>>> x = 1;
>>> return 0;
>>> }
>>> ===
>>> gcc -g a.c
>>> rsync a.out ruby-954vm:~/
>>> ssh -t ruby-954vm 'gdb -ex "file a.out" -ex "watch x" -ex r'
>>>
>>> where ruby-954vm is a VM.
>>>
>>> With "/sys/module/kvm_amd/parameters/debug_swap = 0", gdb does not stop
>>> on the watchpoint, with "= 1" - gdb does.
>>> ---
>>> arch/x86/include/asm/svm.h | 1 +
>>> arch/x86/kvm/svm/svm.h | 42 -------------
>>> arch/x86/kvm/svm/sev.c | 24 ++++++++
>>> arch/x86/kvm/svm/svm.c | 65 +++++++++++++++++++-
>>> 4 files changed, 87 insertions(+), 45 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
>>> index cb1ee53ad3b1..665515c7edae 100644
>>> --- a/arch/x86/include/asm/svm.h
>>> +++ b/arch/x86/include/asm/svm.h
>>> @@ -278,6 +278,7 @@ enum avic_ipi_failure_cause {
>>> #define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
>>> #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
>>> +#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
>>> struct vmcb_seg {
>>> u16 selector;
>>> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
>>> index 4826e6cc611b..653fd09929df 100644
>>> --- a/arch/x86/kvm/svm/svm.h
>>> +++ b/arch/x86/kvm/svm/svm.h
>>> @@ -389,48 +389,6 @@ static inline bool vmcb12_is_intercept(struct
>>> vmcb_ctrl_area_cached *control, u3
>>> return test_bit(bit, (unsigned long *)&control->intercepts);
>>> }
>>> -static inline void set_dr_intercepts(struct vcpu_svm *svm)
>>> -{
>>> - struct vmcb *vmcb = svm->vmcb01.ptr;
>>> -
>>> - if (!sev_es_guest(svm->vcpu.kvm)) {
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
>>> - }
>>> -
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
>>> -
>>> - recalc_intercepts(svm);
>>> -}
>>> -
>>> -static inline void clr_dr_intercepts(struct vcpu_svm *svm)
>>> -{
>>> - struct vmcb *vmcb = svm->vmcb01.ptr;
>>> -
>>> - vmcb->control.intercepts[INTERCEPT_DR] = 0;
>>> -
>>> - /* DR7 access must remain intercepted for an SEV-ES guest */
>>> - if (sev_es_guest(svm->vcpu.kvm)) {
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
>>> - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
>>> - }
>>> -
>>> - recalc_intercepts(svm);
>>> -}
>>> -
>>> static inline void set_exception_intercept(struct vcpu_svm *svm,
>>> u32 bit)
>>> {
>>> struct vmcb *vmcb = svm->vmcb01.ptr;
>>> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
>>> index 86d6897f4806..af775410c5eb 100644
>>> --- a/arch/x86/kvm/svm/sev.c
>>> +++ b/arch/x86/kvm/svm/sev.c
>>> @@ -21,6 +21,7 @@
>>> #include <asm/pkru.h>
>>> #include <asm/trapnr.h>
>>> #include <asm/fpu/xcr.h>
>>> +#include <asm/debugreg.h>
>>> #include "mmu.h"
>>> #include "x86.h"
>>> @@ -52,9 +53,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
>>> /* enable/disable SEV-ES support */
>>> static bool sev_es_enabled = true;
>>> module_param_named(sev_es, sev_es_enabled, bool, 0444);
>>> +
>>> +/* enable/disable SEV-ES DebugSwap support */
>>> +static bool sev_es_debug_swap_enabled = true;
>>> +module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0644);
>>> #else
>>> #define sev_enabled false
>>> #define sev_es_enabled false
>>> +#define sev_es_debug_swap false
>>> #endif /* CONFIG_KVM_AMD_SEV */
>>> static u8 sev_enc_bit;
>>> @@ -2249,6 +2255,8 @@ void __init sev_hardware_setup(void)
>>> out:
>>> sev_enabled = sev_supported;
>>> sev_es_enabled = sev_es_supported;
>>> + if (!sev_es_enabled ||
>>> !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
>>> + sev_es_debug_swap_enabled = false;
>>> #endif
>>> }
>>> @@ -2940,6 +2948,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int
>>> size, unsigned int port, int in)
>>> static void sev_es_init_vmcb(struct vcpu_svm *svm)
>>> {
>>> struct kvm_vcpu *vcpu = &svm->vcpu;
>>> + struct sev_es_save_area *save = svm->sev_es.vmsa;
>>> svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
>>> svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
>>> @@ -2988,6 +2997,9 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
>>> if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP))
>>> svm_clr_intercept(svm, INTERCEPT_RDTSCP);
>>> }
>>> +
>>> + if (sev_es_debug_swap_enabled)
>>> + save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
>>> }
>>> void sev_init_vmcb(struct vcpu_svm *svm)
>>> @@ -3027,6 +3039,18 @@ void sev_es_prepare_switch_to_guest(struct
>>> sev_es_save_area *hostsa)
>>> /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host
>>> value */
>>> hostsa->xss = host_xss;
>>> +
>>> + /* The DebugSwap SEV feature does Type B swaps of DR[0-3] */
>>> + if (sev_es_debug_swap_enabled) {
>>> + hostsa->dr0 = native_get_debugreg(0);
>>> + hostsa->dr1 = native_get_debugreg(1);
>>> + hostsa->dr2 = native_get_debugreg(2);
>>> + hostsa->dr3 = native_get_debugreg(3);
>>> + hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
>>> + hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
>>> + hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
>>> + hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
>>> + }
>>> }
>>> void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
>>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>>> index 60c7c880266b..f8e222bee22a 100644
>>> --- a/arch/x86/kvm/svm/svm.c
>>> +++ b/arch/x86/kvm/svm/svm.c
>>> @@ -671,6 +671,65 @@ static int svm_cpu_init(int cpu)
>>> }
>>> +static void set_dr_intercepts(struct vcpu_svm *svm)
>>> +{
>>> + struct vmcb *vmcb = svm->vmcb01.ptr;
>>> + bool intercept;
>>> +
>>> + if (!sev_es_guest(svm->vcpu.kvm)) {
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
>>> + }
>>> +
>>> + if (sev_es_guest(svm->vcpu.kvm)) {
>>> + struct sev_es_save_area *save = svm->sev_es.vmsa;
>>> +
>>> + intercept = !(save->sev_features & SVM_SEV_FEAT_DEBUG_SWAP);
>>> + } else {
>>> + intercept = true;
>>> + }
>>> +
>>> + if (intercept) {
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
>>> + set_exception_intercept(svm, DB_VECTOR);
>>> + }
>>> +
>>> + recalc_intercepts(svm);
>>> +}
>>> +
>>> +static void clr_dr_intercepts(struct vcpu_svm *svm)
>>> +{
>>> + struct vmcb *vmcb = svm->vmcb01.ptr;
>>> + struct sev_es_save_area *save = svm->sev_es.vmsa;
>>> +
>>> + vmcb->control.intercepts[INTERCEPT_DR] = 0;
>>> +
>>> + /*
>>> + * DR7 access must remain intercepted for an SEV-ES guest unless
>>> DebugSwap
>>> + * (depends on NO_NESTED_DATA_BP) is enabled as otherwise a VM
>>> writing to DR7
>>> + * from the #DB handler may trigger infinite loop of #DB's.
>>> + */
>>> + if (sev_es_guest(svm->vcpu.kvm) && (save->sev_features &
>>> SVM_SEV_FEAT_DEBUG_SWAP)) {
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
>>> + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
>>> + }
>>> +
>>> + recalc_intercepts(svm);
>>> +}
>>> +
>>> static int direct_access_msr_slot(u32 msr)
>>> {
>>> u32 i;
>>> @@ -1184,13 +1243,11 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
>>> if (!kvm_vcpu_apicv_active(vcpu))
>>> svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
>>> - set_dr_intercepts(svm);
>>> -
>>> set_exception_intercept(svm, PF_VECTOR);
>>> set_exception_intercept(svm, UD_VECTOR);
>>> set_exception_intercept(svm, MC_VECTOR);
>>> set_exception_intercept(svm, AC_VECTOR);
>>> - set_exception_intercept(svm, DB_VECTOR);
>>> +
>>> /*
>>> * Guest access to VMware backdoor ports could legitimately
>>> * trigger #GP because of TSS I/O permission bitmap.
>>> @@ -1308,6 +1365,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
>>> if (sev_guest(vcpu->kvm))
>>> sev_init_vmcb(svm);
>>> + set_dr_intercepts(svm);
>>> +
>>> svm_hv_init_vmcb(vmcb);
>>> init_vmcb_after_set_cpuid(vcpu);
>>
>
--
Alexey
© 2016 - 2026 Red Hat, Inc.