xen/arch/x86/hvm/Kconfig | 7 +++ xen/arch/x86/hvm/Makefile | 2 +- xen/arch/x86/hvm/svm/Makefile | 2 +- xen/arch/x86/hvm/svm/entry.S | 4 ++ xen/arch/x86/hvm/svm/nestedhvm.h | 2 +- xen/arch/x86/hvm/svm/svm.c | 18 ++++-- xen/arch/x86/hvm/vmx/Makefile | 2 +- xen/arch/x86/hvm/vmx/entry.S | 2 + xen/arch/x86/hvm/vmx/vmx.c | 31 +--------- xen/arch/x86/hvm/vmx/vvmx.c | 26 +++++++++ xen/arch/x86/include/asm/hvm/hvm.h | 2 +- xen/arch/x86/include/asm/hvm/nestedhvm.h | 64 +++++++++++++++++--- xen/arch/x86/include/asm/hvm/vmx/vvmx.h | 74 ++++++++++++++++++++++++ xen/arch/x86/mm/Makefile | 2 +- xen/arch/x86/mm/hap/Makefile | 5 +- xen/arch/x86/mm/p2m.h | 6 ++ xen/arch/x86/sysctl.c | 2 + xen/include/public/sysctl.h | 4 +- 18 files changed, 204 insertions(+), 51 deletions(-)
Introduce CONFIG_NESTED_VIRT (default n) to allow nested virtualization
support to be disabled at build time. This is useful for embedded or
safety-focused deployments where nested virtualization is not needed,
reducing code size and attack surface.
When CONFIG_NESTED_VIRT=n, the following source files are excluded:
- arch/x86/hvm/nestedhvm.c
- arch/x86/hvm/svm/nestedsvm.c
- arch/x86/hvm/vmx/vvmx.c
- arch/x86/mm/nested.c
- arch/x86/mm/hap/nested_hap.c
- arch/x86/mm/hap/nested_ept.c
Add inline stubs where needed in headers. Guard assembly code paths
for nested virt with #ifdef CONFIG_NESTED_VIRT. Move exception
injection for VMX/SVM instructions to the callers in vmx.c/svm.c to
avoid header dependency issues in the stubs.
No functional change when CONFIG_NESTED_VIRT=y.
Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
---
Changes in v3:
- Kconfig: Change "depends on AMD_SVM || INTEL_VMX" to "depends on HVM"
- Kconfig: Remove redundant "default n" line
- Kconfig: Remove "If unsure, say N." from help text
- mm/hap/Makefile: Simplify using intermediate nested-y variable:
nested-y := nested_hap.o
nested-$(CONFIG_INTEL_VMX) += nested_ept.o
obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
- svm/nestedhvm.h: Remove #ifdef CONFIG_NESTED_VIRT stubs, keep only
function declarations (the functions are only called from code that
is already compiled out when nested virt is disabled)
- svm/nestedhvm.h: Add CONFIG_NESTED_VIRT guard to nsvm_efer_svm_enabled
macro to return false when nested virt is disabled
- svm/svm.c: Move #UD injection for STGI/CLGI to the caller instead of
stub functions, checking nestedhvm_enabled()/nsvm_efer_svm_enabled()
- svm/svm.c: Mark svm_vmexit_do_vmrun/vmload/vmsave as __maybe_unused
- svm/svm.c: Remove empty nsvm_vcpu_switch stub (now guarded in asm)
- svm/entry.S: Add #ifdef CONFIG_NESTED_VIRT guards around nested virt
specific code paths
- vmx/vmx.c: Remove empty nvmx_switch_guest stub (now guarded in asm)
- vmx/vmx.c: Move nvmx_enqueue_n2_exceptions and nvmx_vmexit_event to
vvmx.c where they belong
- vmx/vvmx.h: Add declarations for nvmx_vmexit_event and
nvmx_enqueue_n2_exceptions
- vmx/vvmx.h: Fix nvmx_msr_read_intercept stub comment
- vmx/vvmx.h: nvmx_handle_vmx_insn stub returns X86EMUL_EXCEPTION with
ASSERT_UNREACHABLE (caller handles injection)
- vmx/vvmx.h: Convert get_vvmcs macro to inline function in stubs
- vmx/entry.S: Add #ifdef CONFIG_NESTED_VIRT guard around nvmx_switch_guest
- nestedhvm.h: Convert macro stubs to proper inline functions
---
xen/arch/x86/hvm/Kconfig | 7 +++
xen/arch/x86/hvm/Makefile | 2 +-
xen/arch/x86/hvm/svm/Makefile | 2 +-
xen/arch/x86/hvm/svm/entry.S | 4 ++
xen/arch/x86/hvm/svm/nestedhvm.h | 2 +-
xen/arch/x86/hvm/svm/svm.c | 18 ++++--
xen/arch/x86/hvm/vmx/Makefile | 2 +-
xen/arch/x86/hvm/vmx/entry.S | 2 +
xen/arch/x86/hvm/vmx/vmx.c | 31 +---------
xen/arch/x86/hvm/vmx/vvmx.c | 26 +++++++++
xen/arch/x86/include/asm/hvm/hvm.h | 2 +-
xen/arch/x86/include/asm/hvm/nestedhvm.h | 64 +++++++++++++++++---
xen/arch/x86/include/asm/hvm/vmx/vvmx.h | 74 ++++++++++++++++++++++++
xen/arch/x86/mm/Makefile | 2 +-
xen/arch/x86/mm/hap/Makefile | 5 +-
xen/arch/x86/mm/p2m.h | 6 ++
xen/arch/x86/sysctl.c | 2 +
xen/include/public/sysctl.h | 4 +-
18 files changed, 204 insertions(+), 51 deletions(-)
diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
index f32bf5cbb7..af661385b5 100644
--- a/xen/arch/x86/hvm/Kconfig
+++ b/xen/arch/x86/hvm/Kconfig
@@ -92,4 +92,11 @@ config MEM_SHARING
bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
depends on INTEL_VMX
+config NESTED_VIRT
+ bool "Nested virtualization support"
+ depends on HVM
+ help
+ Enable nested virtualization, allowing guests to run their own
+ hypervisors. This requires hardware support.
+
endif
diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index f34fb03934..b8a0a68624 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -18,7 +18,7 @@ obj-y += irq.o
obj-y += mmio.o
obj-$(CONFIG_VM_EVENT) += monitor.o
obj-y += mtrr.o
-obj-y += nestedhvm.o
+obj-$(CONFIG_NESTED_VIRT) += nestedhvm.o
obj-y += pmtimer.o
obj-y += quirks.o
obj-y += rtc.o
diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
index 8a072cafd5..92418e3444 100644
--- a/xen/arch/x86/hvm/svm/Makefile
+++ b/xen/arch/x86/hvm/svm/Makefile
@@ -2,6 +2,6 @@ obj-y += asid.o
obj-y += emulate.o
obj-bin-y += entry.o
obj-y += intr.o
-obj-y += nestedsvm.o
+obj-$(CONFIG_NESTED_VIRT) += nestedsvm.o
obj-y += svm.o
obj-y += vmcb.o
diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
index af8db23b03..7964c80750 100644
--- a/xen/arch/x86/hvm/svm/entry.S
+++ b/xen/arch/x86/hvm/svm/entry.S
@@ -28,7 +28,9 @@ FUNC(svm_asm_do_resume)
GET_CURRENT(bx)
.Lsvm_do_resume:
call svm_intr_assist
+#ifdef CONFIG_NESTED_VIRT
call nsvm_vcpu_switch
+#endif
ASSERT_NOT_IN_ATOMIC
mov VCPU_processor(%rbx),%eax
@@ -39,6 +41,7 @@ FUNC(svm_asm_do_resume)
cmp %ecx,(%rdx,%rax,1)
jne .Lsvm_process_softirqs
+#ifdef CONFIG_NESTED_VIRT
cmp %cl,VCPU_nsvm_hap_enabled(%rbx)
UNLIKELY_START(ne, nsvm_hap)
cmp %rcx,VCPU_nhvm_p2m(%rbx)
@@ -52,6 +55,7 @@ UNLIKELY_START(ne, nsvm_hap)
sti
jmp .Lsvm_do_resume
__UNLIKELY_END(nsvm_hap)
+#endif
call svm_vmenter_helper
diff --git a/xen/arch/x86/hvm/svm/nestedhvm.h b/xen/arch/x86/hvm/svm/nestedhvm.h
index 9bfed5ffd7..5cb85410f8 100644
--- a/xen/arch/x86/hvm/svm/nestedhvm.h
+++ b/xen/arch/x86/hvm/svm/nestedhvm.h
@@ -24,7 +24,7 @@
/* True when l1 guest enabled SVM in EFER */
#define nsvm_efer_svm_enabled(v) \
- (!!((v)->arch.hvm.guest_efer & EFER_SVME))
+ (IS_ENABLED(CONFIG_NESTED_VIRT) && ((v)->arch.hvm.guest_efer & EFER_SVME))
int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr);
void nestedsvm_vmexit_defer(struct vcpu *v,
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 18ba837738..2cabc89fb5 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2165,7 +2165,7 @@ static void svm_vmexit_do_pause(struct cpu_user_regs *regs)
vcpu_yield();
}
-static void
+static void __maybe_unused
svm_vmexit_do_vmrun(struct cpu_user_regs *regs,
struct vcpu *v, uint64_t vmcbaddr)
{
@@ -2211,7 +2211,7 @@ nsvm_get_nvmcb_page(struct vcpu *v, uint64_t vmcbaddr)
return page;
}
-static void
+static void __maybe_unused
svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
struct cpu_user_regs *regs,
struct vcpu *v, uint64_t vmcbaddr)
@@ -2246,7 +2246,7 @@ svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
__update_guest_eip(regs, inst_len);
}
-static void
+static void __maybe_unused
svm_vmexit_do_vmsave(struct vmcb_struct *vmcb,
struct cpu_user_regs *regs,
struct vcpu *v, uint64_t vmcbaddr)
@@ -2465,6 +2465,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
.set_rdtsc_exiting = svm_set_rdtsc_exiting,
.get_insn_bytes = svm_get_insn_bytes,
+#ifdef CONFIG_NESTED_VIRT
.nhvm_vcpu_initialise = nsvm_vcpu_initialise,
.nhvm_vcpu_destroy = nsvm_vcpu_destroy,
.nhvm_vcpu_reset = nsvm_vcpu_reset,
@@ -2474,6 +2475,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
.nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
.nhvm_intr_blocked = nsvm_intr_blocked,
.nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m,
+#endif
.get_reg = svm_get_reg,
.set_reg = svm_set_reg,
@@ -3011,10 +3013,16 @@ void asmlinkage svm_vmexit_handler(void)
svm_vmexit_do_vmsave(vmcb, regs, v, regs->rax);
break;
case VMEXIT_STGI:
- svm_vmexit_do_stgi(regs, v);
+ if ( !nestedhvm_enabled(v->domain) )
+ hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
+ else
+ svm_vmexit_do_stgi(regs, v);
break;
case VMEXIT_CLGI:
- svm_vmexit_do_clgi(regs, v);
+ if ( !nsvm_efer_svm_enabled(v) )
+ hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
+ else
+ svm_vmexit_do_clgi(regs, v);
break;
case VMEXIT_XSETBV:
diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
index 04a29ce59d..902564b3e2 100644
--- a/xen/arch/x86/hvm/vmx/Makefile
+++ b/xen/arch/x86/hvm/vmx/Makefile
@@ -3,4 +3,4 @@ obj-y += intr.o
obj-y += realmode.o
obj-y += vmcs.o
obj-y += vmx.o
-obj-y += vvmx.o
+obj-$(CONFIG_NESTED_VIRT) += vvmx.o
diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
index 2bfee715b3..4d62efddf4 100644
--- a/xen/arch/x86/hvm/vmx/entry.S
+++ b/xen/arch/x86/hvm/vmx/entry.S
@@ -83,7 +83,9 @@ FUNC(vmx_asm_vmexit_handler)
.Lvmx_do_vmentry:
call vmx_intr_assist
+#ifdef CONFIG_NESTED_VIRT
call nvmx_switch_guest
+#endif
ASSERT_NOT_IN_ATOMIC
mov VCPU_processor(%rbx),%eax
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 82c55f49ae..4e3c8018d2 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2014,33 +2014,6 @@ static void cf_check vmx_update_guest_efer(struct vcpu *v)
vmx_set_msr_intercept(v, MSR_EFER, VMX_MSR_R);
}
-static void nvmx_enqueue_n2_exceptions(struct vcpu *v,
- unsigned long intr_fields, int error_code, uint8_t source)
-{
- struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
-
- if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
- /* enqueue the exception till the VMCS switch back to L1 */
- nvmx->intr.intr_info = intr_fields;
- nvmx->intr.error_code = error_code;
- nvmx->intr.source = source;
- vcpu_nestedhvm(v).nv_vmexit_pending = 1;
- return;
- }
- else
- gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
- "on %lx %x\n", intr_fields, error_code,
- nvmx->intr.intr_info, nvmx->intr.error_code);
-}
-
-static int cf_check nvmx_vmexit_event(
- struct vcpu *v, const struct x86_event *event)
-{
- nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
- hvm_intsrc_none);
- return NESTEDHVM_VMEXIT_DONE;
-}
-
static void __vmx_inject_exception(int trap, int type, int error_code)
{
unsigned long intr_fields;
@@ -2933,6 +2906,7 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
.handle_cd = vmx_handle_cd,
.set_info_guest = vmx_set_info_guest,
.set_rdtsc_exiting = vmx_set_rdtsc_exiting,
+#ifdef CONFIG_NESTED_VIRT
.nhvm_vcpu_initialise = nvmx_vcpu_initialise,
.nhvm_vcpu_destroy = nvmx_vcpu_destroy,
.nhvm_vcpu_reset = nvmx_vcpu_reset,
@@ -2942,8 +2916,9 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
.nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
.nhvm_intr_blocked = nvmx_intr_blocked,
.nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
- .update_vlapic_mode = vmx_vlapic_msr_changed,
.nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
+#endif
+ .update_vlapic_mode = vmx_vlapic_msr_changed,
#ifdef CONFIG_VM_EVENT
.enable_msr_interception = vmx_enable_msr_interception,
#endif
diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
index 38952f0696..2bb42678c5 100644
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -2821,6 +2821,32 @@ void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
__vmwrite(read_shadow_field, v->arch.hvm.nvcpu.guest_cr[cr]);
}
+void nvmx_enqueue_n2_exceptions(struct vcpu *v,
+ unsigned long intr_fields, int error_code, uint8_t source)
+{
+ struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+
+ if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
+ /* enqueue the exception till the VMCS switch back to L1 */
+ nvmx->intr.intr_info = intr_fields;
+ nvmx->intr.error_code = error_code;
+ nvmx->intr.source = source;
+ vcpu_nestedhvm(v).nv_vmexit_pending = 1;
+ return;
+ }
+ else
+ gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
+ "on %lx %x\n", intr_fields, error_code,
+ nvmx->intr.intr_info, nvmx->intr.error_code);
+}
+
+int cf_check nvmx_vmexit_event(struct vcpu *v, const struct x86_event *event)
+{
+ nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
+ hvm_intsrc_none);
+ return NESTEDHVM_VMEXIT_DONE;
+}
+
void __init start_nested_vmx(struct hvm_function_table *hvm_function_table)
{
/* TODO: Require hardware support before enabling nested virt */
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index 7d9774df59..536a38b450 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -711,7 +711,7 @@ static inline bool hvm_altp2m_supported(void)
/* Returns true if we have the minimum hardware requirements for nested virt */
static inline bool hvm_nested_virt_supported(void)
{
- return hvm_funcs.caps.nested_virt;
+ return IS_ENABLED(CONFIG_NESTED_VIRT) && hvm_funcs.caps.nested_virt;
}
#ifdef CONFIG_ALTP2M
diff --git a/xen/arch/x86/include/asm/hvm/nestedhvm.h b/xen/arch/x86/include/asm/hvm/nestedhvm.h
index ea2c1bc328..2f8209271a 100644
--- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
+++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
@@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
/* Nested HVM on/off per domain */
static inline bool nestedhvm_enabled(const struct domain *d)
{
- return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
+ return IS_ENABLED(CONFIG_NESTED_VIRT) &&
+ (d->options & XEN_DOMCTL_CDF_nested_virt);
}
+/* Nested paging */
+#define NESTEDHVM_PAGEFAULT_DONE 0
+#define NESTEDHVM_PAGEFAULT_INJECT 1
+#define NESTEDHVM_PAGEFAULT_L1_ERROR 2
+#define NESTEDHVM_PAGEFAULT_L0_ERROR 3
+#define NESTEDHVM_PAGEFAULT_MMIO 4
+#define NESTEDHVM_PAGEFAULT_RETRY 5
+#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
+
+#ifdef CONFIG_NESTED_VIRT
+
/* Nested VCPU */
int nestedhvm_vcpu_initialise(struct vcpu *v);
void nestedhvm_vcpu_destroy(struct vcpu *v);
@@ -38,14 +50,6 @@ bool nestedhvm_vcpu_in_guestmode(struct vcpu *v);
#define nestedhvm_vcpu_exit_guestmode(v) \
vcpu_nestedhvm(v).nv_guestmode = 0
-/* Nested paging */
-#define NESTEDHVM_PAGEFAULT_DONE 0
-#define NESTEDHVM_PAGEFAULT_INJECT 1
-#define NESTEDHVM_PAGEFAULT_L1_ERROR 2
-#define NESTEDHVM_PAGEFAULT_L0_ERROR 3
-#define NESTEDHVM_PAGEFAULT_MMIO 4
-#define NESTEDHVM_PAGEFAULT_RETRY 5
-#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
struct npfec npfec);
@@ -59,6 +63,48 @@ unsigned long *nestedhvm_vcpu_iomap_get(bool ioport_80, bool ioport_ed);
void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
+#else /* !CONFIG_NESTED_VIRT */
+
+static inline int nestedhvm_vcpu_initialise(struct vcpu *v)
+{
+ ASSERT_UNREACHABLE();
+ return -EOPNOTSUPP;
+}
+static inline void nestedhvm_vcpu_destroy(struct vcpu *v) { }
+static inline void nestedhvm_vcpu_reset(struct vcpu *v)
+{
+ ASSERT_UNREACHABLE();
+}
+static inline bool nestedhvm_vcpu_in_guestmode(struct vcpu *v) { return false; }
+static inline int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
+ struct npfec npfec)
+{
+ ASSERT_UNREACHABLE();
+ return NESTEDHVM_PAGEFAULT_L0_ERROR;
+}
+static inline void nestedhvm_vcpu_enter_guestmode(struct vcpu *v)
+{
+ ASSERT_UNREACHABLE();
+}
+static inline void nestedhvm_vcpu_exit_guestmode(struct vcpu *v)
+{
+ ASSERT_UNREACHABLE();
+}
+static inline bool nestedhvm_paging_mode_hap(struct vcpu *v)
+{
+ return false;
+}
+static inline bool nestedhvm_vmswitch_in_progress(struct vcpu *v)
+{
+ return false;
+}
+static inline void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m)
+{
+ ASSERT_UNREACHABLE();
+}
+
+#endif /* CONFIG_NESTED_VIRT */
+
static inline bool nestedhvm_is_n2(struct vcpu *v)
{
if ( !nestedhvm_enabled(v->domain) ||
diff --git a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
index da10d3fa96..d0c1ae29f6 100644
--- a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
+++ b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
@@ -73,6 +73,8 @@ union vmx_inst_info {
u32 word;
};
+#ifdef CONFIG_NESTED_VIRT
+
int cf_check nvmx_vcpu_initialise(struct vcpu *v);
void cf_check nvmx_vcpu_destroy(struct vcpu *v);
int cf_check nvmx_vcpu_reset(struct vcpu *v);
@@ -199,5 +201,77 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
uint64_t *exit_qual, uint32_t *exit_reason);
int nvmx_cpu_up_prepare(unsigned int cpu);
void nvmx_cpu_dead(unsigned int cpu);
+int cf_check nvmx_vmexit_event(struct vcpu *v, const struct x86_event *event);
+void nvmx_enqueue_n2_exceptions(struct vcpu *v,
+ unsigned long intr_fields, int error_code, uint8_t source);
+
+#else /* !CONFIG_NESTED_VIRT */
+
+static inline void nvmx_update_exec_control(struct vcpu *v, u32 value)
+{
+ ASSERT_UNREACHABLE();
+}
+static inline void nvmx_update_secondary_exec_control(struct vcpu *v,
+ unsigned long value)
+{
+ ASSERT_UNREACHABLE();
+}
+static inline void nvmx_update_exception_bitmap(struct vcpu *v,
+ unsigned long value)
+{
+ ASSERT_UNREACHABLE();
+}
+static inline u64 nvmx_get_tsc_offset(struct vcpu *v)
+{
+ ASSERT_UNREACHABLE();
+ return 0;
+}
+static inline void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
+{
+ ASSERT_UNREACHABLE();
+}
+static inline bool nvmx_intercepts_exception(struct vcpu *v, unsigned int vector,
+ int error_code)
+{
+ ASSERT_UNREACHABLE();
+ return false;
+}
+static inline int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
+ unsigned int exit_reason)
+{
+ ASSERT_UNREACHABLE();
+ return 0;
+}
+static inline void nvmx_idtv_handling(void)
+{
+ ASSERT_UNREACHABLE();
+}
+static inline int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
+{
+ /* return 0 to trigger #GP */
+ return 0;
+}
+static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
+ unsigned int exit_reason)
+{
+ ASSERT_UNREACHABLE();
+ return X86EMUL_EXCEPTION;
+}
+static inline int nvmx_cpu_up_prepare(unsigned int cpu) { return 0; }
+static inline void nvmx_cpu_dead(unsigned int cpu) { }
+static inline void nvmx_enqueue_n2_exceptions(struct vcpu *v,
+ unsigned long intr_fields, int error_code, uint8_t source)
+{
+ ASSERT_UNREACHABLE();
+}
+
+static inline u64 get_vvmcs(const struct vcpu *vcpu, u32 encoding)
+{
+ ASSERT_UNREACHABLE();
+ return 0;
+}
+
+#endif /* CONFIG_NESTED_VIRT */
+
#endif /* __ASM_X86_HVM_VVMX_H__ */
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 960f6e8409..aa15811c2e 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_SHADOW_PAGING) += guest_walk_4.o
obj-$(CONFIG_VM_EVENT) += mem_access.o
obj-$(CONFIG_MEM_PAGING) += mem_paging.o
obj-$(CONFIG_MEM_SHARING) += mem_sharing.o
-obj-$(CONFIG_HVM) += nested.o
+obj-$(CONFIG_NESTED_VIRT) += nested.o
obj-$(CONFIG_HVM) += p2m.o
obj-y += p2m-basic.o
obj-$(CONFIG_INTEL_VMX) += p2m-ept.o
diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
index 67c29b2162..efdc91ea82 100644
--- a/xen/arch/x86/mm/hap/Makefile
+++ b/xen/arch/x86/mm/hap/Makefile
@@ -2,5 +2,6 @@ obj-y += hap.o
obj-y += guest_walk_2.o
obj-y += guest_walk_3.o
obj-y += guest_walk_4.o
-obj-y += nested_hap.o
-obj-$(CONFIG_INTEL_VMX) += nested_ept.o
+nested-y := nested_hap.o
+nested-$(CONFIG_INTEL_VMX) += nested_ept.o
+obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
diff --git a/xen/arch/x86/mm/p2m.h b/xen/arch/x86/mm/p2m.h
index 635f5a7f45..63808dddcc 100644
--- a/xen/arch/x86/mm/p2m.h
+++ b/xen/arch/x86/mm/p2m.h
@@ -25,9 +25,15 @@ void p2m_teardown_altp2m(struct domain *d);
void p2m_flush_table_locked(struct p2m_domain *p2m);
int __must_check p2m_remove_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
unsigned int page_order);
+#ifdef CONFIG_NESTED_VIRT
void p2m_nestedp2m_init(struct p2m_domain *p2m);
int p2m_init_nestedp2m(struct domain *d);
void p2m_teardown_nestedp2m(struct domain *d);
+#else
+static inline void p2m_nestedp2m_init(struct p2m_domain *p2m) { }
+static inline int p2m_init_nestedp2m(struct domain *d) { return 0; }
+static inline void p2m_teardown_nestedp2m(struct domain *d) { }
+#endif
int ept_p2m_init(struct p2m_domain *p2m);
void ept_p2m_uninit(struct p2m_domain *p2m);
diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
index 1b04947516..b1d865e1c8 100644
--- a/xen/arch/x86/sysctl.c
+++ b/xen/arch/x86/sysctl.c
@@ -103,6 +103,8 @@ void arch_do_physinfo(struct xen_sysctl_physinfo *pi)
pi->capabilities |= XEN_SYSCTL_PHYSCAP_hap;
if ( IS_ENABLED(CONFIG_SHADOW_PAGING) )
pi->capabilities |= XEN_SYSCTL_PHYSCAP_shadow;
+ if ( hvm_nested_virt_supported() )
+ pi->capabilities |= XEN_SYSCTL_PHYSCAP_nestedhvm;
}
long arch_do_sysctl(
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 66c9b65465..b4bd1dd7b2 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -100,9 +100,11 @@ struct xen_sysctl_tbuf_op {
/* Xen supports the Grant v1 and/or v2 ABIs. */
#define XEN_SYSCTL_PHYSCAP_gnttab_v1 (1u << 8)
#define XEN_SYSCTL_PHYSCAP_gnttab_v2 (1u << 9)
+/* The platform supports nested HVM. */
+#define XEN_SYSCTL_PHYSCAP_nestedhvm (1u << 10)
/* Max XEN_SYSCTL_PHYSCAP_* constant. Used for ABI checking. */
-#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_gnttab_v2
+#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_nestedhvm
#if defined(__arm__) || defined(__aarch64__)
#define XEN_SYSCTL_PHYSCAP_ARM_SVE_MASK (0x1FU)
--
2.25.1
On Fri Feb 13, 2026 at 11:02 PM CET, Stefano Stabellini wrote:
> Introduce CONFIG_NESTED_VIRT (default n) to allow nested virtualization
> support to be disabled at build time. This is useful for embedded or
> safety-focused deployments where nested virtualization is not needed,
> reducing code size and attack surface.
>
> When CONFIG_NESTED_VIRT=n, the following source files are excluded:
> - arch/x86/hvm/nestedhvm.c
> - arch/x86/hvm/svm/nestedsvm.c
> - arch/x86/hvm/vmx/vvmx.c
> - arch/x86/mm/nested.c
> - arch/x86/mm/hap/nested_hap.c
> - arch/x86/mm/hap/nested_ept.c
>
> Add inline stubs where needed in headers. Guard assembly code paths
> for nested virt with #ifdef CONFIG_NESTED_VIRT. Move exception
> injection for VMX/SVM instructions to the callers in vmx.c/svm.c to
> avoid header dependency issues in the stubs.
>
> No functional change when CONFIG_NESTED_VIRT=y.
>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
>
> ---
> Changes in v3:
> - Kconfig: Change "depends on AMD_SVM || INTEL_VMX" to "depends on HVM"
> - Kconfig: Remove redundant "default n" line
> - Kconfig: Remove "If unsure, say N." from help text
> - mm/hap/Makefile: Simplify using intermediate nested-y variable:
> nested-y := nested_hap.o
> nested-$(CONFIG_INTEL_VMX) += nested_ept.o
> obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
> - svm/nestedhvm.h: Remove #ifdef CONFIG_NESTED_VIRT stubs, keep only
> function declarations (the functions are only called from code that
> is already compiled out when nested virt is disabled)
> - svm/nestedhvm.h: Add CONFIG_NESTED_VIRT guard to nsvm_efer_svm_enabled
> macro to return false when nested virt is disabled
> - svm/svm.c: Move #UD injection for STGI/CLGI to the caller instead of
> stub functions, checking nestedhvm_enabled()/nsvm_efer_svm_enabled()
> - svm/svm.c: Mark svm_vmexit_do_vmrun/vmload/vmsave as __maybe_unused
> - svm/svm.c: Remove empty nsvm_vcpu_switch stub (now guarded in asm)
> - svm/entry.S: Add #ifdef CONFIG_NESTED_VIRT guards around nested virt
> specific code paths
> - vmx/vmx.c: Remove empty nvmx_switch_guest stub (now guarded in asm)
> - vmx/vmx.c: Move nvmx_enqueue_n2_exceptions and nvmx_vmexit_event to
> vvmx.c where they belong
> - vmx/vvmx.h: Add declarations for nvmx_vmexit_event and
> nvmx_enqueue_n2_exceptions
> - vmx/vvmx.h: Fix nvmx_msr_read_intercept stub comment
> - vmx/vvmx.h: nvmx_handle_vmx_insn stub returns X86EMUL_EXCEPTION with
> ASSERT_UNREACHABLE (caller handles injection)
> - vmx/vvmx.h: Convert get_vvmcs macro to inline function in stubs
> - vmx/entry.S: Add #ifdef CONFIG_NESTED_VIRT guard around nvmx_switch_guest
> - nestedhvm.h: Convert macro stubs to proper inline functions
> ---
> xen/arch/x86/hvm/Kconfig | 7 +++
> xen/arch/x86/hvm/Makefile | 2 +-
> xen/arch/x86/hvm/svm/Makefile | 2 +-
> xen/arch/x86/hvm/svm/entry.S | 4 ++
> xen/arch/x86/hvm/svm/nestedhvm.h | 2 +-
> xen/arch/x86/hvm/svm/svm.c | 18 ++++--
> xen/arch/x86/hvm/vmx/Makefile | 2 +-
> xen/arch/x86/hvm/vmx/entry.S | 2 +
> xen/arch/x86/hvm/vmx/vmx.c | 31 +---------
> xen/arch/x86/hvm/vmx/vvmx.c | 26 +++++++++
> xen/arch/x86/include/asm/hvm/hvm.h | 2 +-
> xen/arch/x86/include/asm/hvm/nestedhvm.h | 64 +++++++++++++++++---
> xen/arch/x86/include/asm/hvm/vmx/vvmx.h | 74 ++++++++++++++++++++++++
> xen/arch/x86/mm/Makefile | 2 +-
> xen/arch/x86/mm/hap/Makefile | 5 +-
> xen/arch/x86/mm/p2m.h | 6 ++
> xen/arch/x86/sysctl.c | 2 +
> xen/include/public/sysctl.h | 4 +-
> 18 files changed, 204 insertions(+), 51 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
> index f32bf5cbb7..af661385b5 100644
> --- a/xen/arch/x86/hvm/Kconfig
> +++ b/xen/arch/x86/hvm/Kconfig
> @@ -92,4 +92,11 @@ config MEM_SHARING
> bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
> depends on INTEL_VMX
>
> +config NESTED_VIRT
> + bool "Nested virtualization support"
> + depends on HVM
> + help
> + Enable nested virtualization, allowing guests to run their own
> + hypervisors. This requires hardware support.
nit: If we state above "allowing HVM guests..." rather than plain "guests" we can
then get rid of the "This requires hardware support line". What you probably
meant is that this is HVM-only and we don't allow PV nesting.
"This requires hardware support" makes me (the user) think my hardware needs
something special to support nesting, when in reality I just need HVM support.
> +
> endif
> diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
> index f34fb03934..b8a0a68624 100644
> --- a/xen/arch/x86/hvm/Makefile
> +++ b/xen/arch/x86/hvm/Makefile
> @@ -18,7 +18,7 @@ obj-y += irq.o
> obj-y += mmio.o
> obj-$(CONFIG_VM_EVENT) += monitor.o
> obj-y += mtrr.o
> -obj-y += nestedhvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedhvm.o
> obj-y += pmtimer.o
> obj-y += quirks.o
> obj-y += rtc.o
> diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
> index 8a072cafd5..92418e3444 100644
> --- a/xen/arch/x86/hvm/svm/Makefile
> +++ b/xen/arch/x86/hvm/svm/Makefile
> @@ -2,6 +2,6 @@ obj-y += asid.o
> obj-y += emulate.o
> obj-bin-y += entry.o
> obj-y += intr.o
> -obj-y += nestedsvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedsvm.o
> obj-y += svm.o
> obj-y += vmcb.o
> diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
> index af8db23b03..7964c80750 100644
> --- a/xen/arch/x86/hvm/svm/entry.S
> +++ b/xen/arch/x86/hvm/svm/entry.S
> @@ -28,7 +28,9 @@ FUNC(svm_asm_do_resume)
> GET_CURRENT(bx)
> .Lsvm_do_resume:
> call svm_intr_assist
> +#ifdef CONFIG_NESTED_VIRT
> call nsvm_vcpu_switch
> +#endif
> ASSERT_NOT_IN_ATOMIC
>
> mov VCPU_processor(%rbx),%eax
> @@ -39,6 +41,7 @@ FUNC(svm_asm_do_resume)
> cmp %ecx,(%rdx,%rax,1)
> jne .Lsvm_process_softirqs
>
> +#ifdef CONFIG_NESTED_VIRT
> cmp %cl,VCPU_nsvm_hap_enabled(%rbx)
> UNLIKELY_START(ne, nsvm_hap)
> cmp %rcx,VCPU_nhvm_p2m(%rbx)
> @@ -52,6 +55,7 @@ UNLIKELY_START(ne, nsvm_hap)
> sti
> jmp .Lsvm_do_resume
> __UNLIKELY_END(nsvm_hap)
> +#endif
>
> call svm_vmenter_helper
>
> diff --git a/xen/arch/x86/hvm/svm/nestedhvm.h b/xen/arch/x86/hvm/svm/nestedhvm.h
> index 9bfed5ffd7..5cb85410f8 100644
> --- a/xen/arch/x86/hvm/svm/nestedhvm.h
> +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
> @@ -24,7 +24,7 @@
>
> /* True when l1 guest enabled SVM in EFER */
> #define nsvm_efer_svm_enabled(v) \
> - (!!((v)->arch.hvm.guest_efer & EFER_SVME))
> + (IS_ENABLED(CONFIG_NESTED_VIRT) && ((v)->arch.hvm.guest_efer & EFER_SVME))
>
> int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr);
> void nestedsvm_vmexit_defer(struct vcpu *v,
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index 18ba837738..2cabc89fb5 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -2165,7 +2165,7 @@ static void svm_vmexit_do_pause(struct cpu_user_regs *regs)
> vcpu_yield();
> }
>
> -static void
> +static void __maybe_unused
> svm_vmexit_do_vmrun(struct cpu_user_regs *regs,
> struct vcpu *v, uint64_t vmcbaddr)
> {
> @@ -2211,7 +2211,7 @@ nsvm_get_nvmcb_page(struct vcpu *v, uint64_t vmcbaddr)
> return page;
> }
>
> -static void
> +static void __maybe_unused
> svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
> struct cpu_user_regs *regs,
> struct vcpu *v, uint64_t vmcbaddr)
> @@ -2246,7 +2246,7 @@ svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
> __update_guest_eip(regs, inst_len);
> }
>
> -static void
> +static void __maybe_unused
> svm_vmexit_do_vmsave(struct vmcb_struct *vmcb,
> struct cpu_user_regs *regs,
> struct vcpu *v, uint64_t vmcbaddr)
> @@ -2465,6 +2465,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
> .set_rdtsc_exiting = svm_set_rdtsc_exiting,
> .get_insn_bytes = svm_get_insn_bytes,
>
> +#ifdef CONFIG_NESTED_VIRT
> .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
> .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
> .nhvm_vcpu_reset = nsvm_vcpu_reset,
> @@ -2474,6 +2475,7 @@ static struct hvm_function_table __initdata_cf_clobber svm_function_table = {
> .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
> .nhvm_intr_blocked = nsvm_intr_blocked,
> .nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m,
> +#endif
>
> .get_reg = svm_get_reg,
> .set_reg = svm_set_reg,
> @@ -3011,10 +3013,16 @@ void asmlinkage svm_vmexit_handler(void)
> svm_vmexit_do_vmsave(vmcb, regs, v, regs->rax);
> break;
> case VMEXIT_STGI:
> - svm_vmexit_do_stgi(regs, v);
> + if ( !nestedhvm_enabled(v->domain) )
> + hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
> + else
> + svm_vmexit_do_stgi(regs, v);
> break;
> case VMEXIT_CLGI:
> - svm_vmexit_do_clgi(regs, v);
> + if ( !nsvm_efer_svm_enabled(v) )
> + hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
> + else
> + svm_vmexit_do_clgi(regs, v);
> break;
nit: For readability I'd consider reversing the polarity and putting the enabled
cases in the first branch.
>
> case VMEXIT_XSETBV:
> diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
> index 04a29ce59d..902564b3e2 100644
> --- a/xen/arch/x86/hvm/vmx/Makefile
> +++ b/xen/arch/x86/hvm/vmx/Makefile
> @@ -3,4 +3,4 @@ obj-y += intr.o
> obj-y += realmode.o
> obj-y += vmcs.o
> obj-y += vmx.o
> -obj-y += vvmx.o
> +obj-$(CONFIG_NESTED_VIRT) += vvmx.o
> diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
> index 2bfee715b3..4d62efddf4 100644
> --- a/xen/arch/x86/hvm/vmx/entry.S
> +++ b/xen/arch/x86/hvm/vmx/entry.S
> @@ -83,7 +83,9 @@ FUNC(vmx_asm_vmexit_handler)
>
> .Lvmx_do_vmentry:
> call vmx_intr_assist
> +#ifdef CONFIG_NESTED_VIRT
> call nvmx_switch_guest
> +#endif
> ASSERT_NOT_IN_ATOMIC
>
> mov VCPU_processor(%rbx),%eax
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 82c55f49ae..4e3c8018d2 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2014,33 +2014,6 @@ static void cf_check vmx_update_guest_efer(struct vcpu *v)
> vmx_set_msr_intercept(v, MSR_EFER, VMX_MSR_R);
> }
>
> -static void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> - unsigned long intr_fields, int error_code, uint8_t source)
> -{
> - struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
> -
> - if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
> - /* enqueue the exception till the VMCS switch back to L1 */
> - nvmx->intr.intr_info = intr_fields;
> - nvmx->intr.error_code = error_code;
> - nvmx->intr.source = source;
> - vcpu_nestedhvm(v).nv_vmexit_pending = 1;
> - return;
> - }
> - else
> - gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
> - "on %lx %x\n", intr_fields, error_code,
> - nvmx->intr.intr_info, nvmx->intr.error_code);
> -}
> -
> -static int cf_check nvmx_vmexit_event(
> - struct vcpu *v, const struct x86_event *event)
> -{
> - nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
> - hvm_intsrc_none);
> - return NESTEDHVM_VMEXIT_DONE;
> -}
> -
> static void __vmx_inject_exception(int trap, int type, int error_code)
> {
> unsigned long intr_fields;
> @@ -2933,6 +2906,7 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
> .handle_cd = vmx_handle_cd,
> .set_info_guest = vmx_set_info_guest,
> .set_rdtsc_exiting = vmx_set_rdtsc_exiting,
> +#ifdef CONFIG_NESTED_VIRT
> .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
> .nhvm_vcpu_destroy = nvmx_vcpu_destroy,
> .nhvm_vcpu_reset = nvmx_vcpu_reset,
> @@ -2942,8 +2916,9 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
> .nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
> .nhvm_intr_blocked = nvmx_intr_blocked,
> .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
> - .update_vlapic_mode = vmx_vlapic_msr_changed,
> .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
> +#endif
> + .update_vlapic_mode = vmx_vlapic_msr_changed,
> #ifdef CONFIG_VM_EVENT
> .enable_msr_interception = vmx_enable_msr_interception,
> #endif
> diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
> index 38952f0696..2bb42678c5 100644
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -2821,6 +2821,32 @@ void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
> __vmwrite(read_shadow_field, v->arch.hvm.nvcpu.guest_cr[cr]);
> }
>
> +void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> + unsigned long intr_fields, int error_code, uint8_t source)
> +{
> + struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
> +
> + if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
> + /* enqueue the exception till the VMCS switch back to L1 */
> + nvmx->intr.intr_info = intr_fields;
> + nvmx->intr.error_code = error_code;
> + nvmx->intr.source = source;
> + vcpu_nestedhvm(v).nv_vmexit_pending = 1;
> + return;
> + }
> + else
> + gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
> + "on %lx %x\n", intr_fields, error_code,
> + nvmx->intr.intr_info, nvmx->intr.error_code);
> +}
> +
> +int cf_check nvmx_vmexit_event(struct vcpu *v, const struct x86_event *event)
> +{
> + nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
> + hvm_intsrc_none);
> + return NESTEDHVM_VMEXIT_DONE;
> +}
> +
> void __init start_nested_vmx(struct hvm_function_table *hvm_function_table)
> {
> /* TODO: Require hardware support before enabling nested virt */
> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
> index 7d9774df59..536a38b450 100644
> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -711,7 +711,7 @@ static inline bool hvm_altp2m_supported(void)
> /* Returns true if we have the minimum hardware requirements for nested virt */
> static inline bool hvm_nested_virt_supported(void)
> {
> - return hvm_funcs.caps.nested_virt;
> + return IS_ENABLED(CONFIG_NESTED_VIRT) && hvm_funcs.caps.nested_virt;
> }
>
> #ifdef CONFIG_ALTP2M
> diff --git a/xen/arch/x86/include/asm/hvm/nestedhvm.h b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> index ea2c1bc328..2f8209271a 100644
> --- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
> +++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> @@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
> /* Nested HVM on/off per domain */
> static inline bool nestedhvm_enabled(const struct domain *d)
> {
> - return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
> + return IS_ENABLED(CONFIG_NESTED_VIRT) &&
> + (d->options & XEN_DOMCTL_CDF_nested_virt);
> }
>
> +/* Nested paging */
> +#define NESTEDHVM_PAGEFAULT_DONE 0
> +#define NESTEDHVM_PAGEFAULT_INJECT 1
> +#define NESTEDHVM_PAGEFAULT_L1_ERROR 2
> +#define NESTEDHVM_PAGEFAULT_L0_ERROR 3
> +#define NESTEDHVM_PAGEFAULT_MMIO 4
> +#define NESTEDHVM_PAGEFAULT_RETRY 5
> +#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> +
> +#ifdef CONFIG_NESTED_VIRT
> +
> /* Nested VCPU */
> int nestedhvm_vcpu_initialise(struct vcpu *v);
> void nestedhvm_vcpu_destroy(struct vcpu *v);
> @@ -38,14 +50,6 @@ bool nestedhvm_vcpu_in_guestmode(struct vcpu *v);
> #define nestedhvm_vcpu_exit_guestmode(v) \
> vcpu_nestedhvm(v).nv_guestmode = 0
>
> -/* Nested paging */
> -#define NESTEDHVM_PAGEFAULT_DONE 0
> -#define NESTEDHVM_PAGEFAULT_INJECT 1
> -#define NESTEDHVM_PAGEFAULT_L1_ERROR 2
> -#define NESTEDHVM_PAGEFAULT_L0_ERROR 3
> -#define NESTEDHVM_PAGEFAULT_MMIO 4
> -#define NESTEDHVM_PAGEFAULT_RETRY 5
> -#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
> struct npfec npfec);
>
> @@ -59,6 +63,48 @@ unsigned long *nestedhvm_vcpu_iomap_get(bool ioport_80, bool ioport_ed);
>
> void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
>
> +#else /* !CONFIG_NESTED_VIRT */
There's a lot more stubs than needed here.
> +
> +static inline int nestedhvm_vcpu_initialise(struct vcpu *v)
> +{
> + ASSERT_UNREACHABLE();
> + return -EOPNOTSUPP;
> +}
Can remove.
> +static inline void nestedhvm_vcpu_destroy(struct vcpu *v) { }
Must stay.
> +static inline void nestedhvm_vcpu_reset(struct vcpu *v)
> +{
> + ASSERT_UNREACHABLE();
> +}
Can remove.
> +static inline bool nestedhvm_vcpu_in_guestmode(struct vcpu *v) { return false; }
Must stay.
> +static inline int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
> + struct npfec npfec)
> +{
> + ASSERT_UNREACHABLE();
> + return NESTEDHVM_PAGEFAULT_L0_ERROR;
> +}
Can remove
> +static inline void nestedhvm_vcpu_enter_guestmode(struct vcpu *v)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +static inline void nestedhvm_vcpu_exit_guestmode(struct vcpu *v)
> +{
> + ASSERT_UNREACHABLE();
> +}
These two can be removed. It might be good to keep the real macros hidden under
CONFIG_NESTED_VIRT though to ensure they can't be called.
> +static inline bool nestedhvm_paging_mode_hap(struct vcpu *v)
> +{
> + return false;
> +}
This can be removed with a cleaner IS_ENABLED() check in nhvm_vmcx_hap_enabled()
> +static inline bool nestedhvm_vmswitch_in_progress(struct vcpu *v)
> +{
> + return false;
> +}
Would be cleaner with an IS_ENABLED() check in the macro itself, IMO.
> +static inline void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m)
> +{
> + ASSERT_UNREACHABLE();
> +}
Can remove.
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
> static inline bool nestedhvm_is_n2(struct vcpu *v)
> {
> if ( !nestedhvm_enabled(v->domain) ||
> diff --git a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> index da10d3fa96..d0c1ae29f6 100644
> --- a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> +++ b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
Most stubs in this header aren't needed.
You only need nvmx_cpu_up(), nvmx_cpu_dead(), nvmx_msr_read_intercept() and
nvmx_handle_insn(). There's also a bug in that last one...
> @@ -73,6 +73,8 @@ union vmx_inst_info {
> u32 word;
> };
>
> +#ifdef CONFIG_NESTED_VIRT
> +
> int cf_check nvmx_vcpu_initialise(struct vcpu *v);
> void cf_check nvmx_vcpu_destroy(struct vcpu *v);
> int cf_check nvmx_vcpu_reset(struct vcpu *v);
> @@ -199,5 +201,77 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
> uint64_t *exit_qual, uint32_t *exit_reason);
> int nvmx_cpu_up_prepare(unsigned int cpu);
> void nvmx_cpu_dead(unsigned int cpu);
> +int cf_check nvmx_vmexit_event(struct vcpu *v, const struct x86_event *event);
> +void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> + unsigned long intr_fields, int error_code, uint8_t source);
> +
> +#else /* !CONFIG_NESTED_VIRT */
> +
> +static inline void nvmx_update_exec_control(struct vcpu *v, u32 value)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +static inline void nvmx_update_secondary_exec_control(struct vcpu *v,
> + unsigned long value)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +static inline void nvmx_update_exception_bitmap(struct vcpu *v,
> + unsigned long value)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +static inline u64 nvmx_get_tsc_offset(struct vcpu *v)
> +{
> + ASSERT_UNREACHABLE();
> + return 0;
> +}
> +static inline void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +static inline bool nvmx_intercepts_exception(struct vcpu *v, unsigned int vector,
> + int error_code)
> +{
> + ASSERT_UNREACHABLE();
> + return false;
> +}
> +static inline int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
> + unsigned int exit_reason)
> +{
> + ASSERT_UNREACHABLE();
> + return 0;
> +}
> +static inline void nvmx_idtv_handling(void)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +static inline int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
> +{
> + /* return 0 to trigger #GP */
> + return 0;
> +}
> +static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
> + unsigned int exit_reason)
> +{
> + ASSERT_UNREACHABLE();
> + return X86EMUL_EXCEPTION;
> +}
... here. This is perfectly reachable and will cause a hypervisor crash should
an L1 try to probe the VMX-family of instructions. Even on realease this would
behave very oddly because you're missing injecting #UD. This stub should be:
hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
return X86EMUL_EXCEPTION;
> +static inline int nvmx_cpu_up_prepare(unsigned int cpu) { return 0; }
> +static inline void nvmx_cpu_dead(unsigned int cpu) { }
> +static inline void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> + unsigned long intr_fields, int error_code, uint8_t source)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +
> +static inline u64 get_vvmcs(const struct vcpu *vcpu, u32 encoding)
> +{
> + ASSERT_UNREACHABLE();
> + return 0;
> +}
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
> #endif /* __ASM_X86_HVM_VVMX_H__ */
>
> diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
> index 960f6e8409..aa15811c2e 100644
> --- a/xen/arch/x86/mm/Makefile
> +++ b/xen/arch/x86/mm/Makefile
> @@ -7,7 +7,7 @@ obj-$(CONFIG_SHADOW_PAGING) += guest_walk_4.o
> obj-$(CONFIG_VM_EVENT) += mem_access.o
> obj-$(CONFIG_MEM_PAGING) += mem_paging.o
> obj-$(CONFIG_MEM_SHARING) += mem_sharing.o
> -obj-$(CONFIG_HVM) += nested.o
> +obj-$(CONFIG_NESTED_VIRT) += nested.o
> obj-$(CONFIG_HVM) += p2m.o
> obj-y += p2m-basic.o
> obj-$(CONFIG_INTEL_VMX) += p2m-ept.o
> diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
> index 67c29b2162..efdc91ea82 100644
> --- a/xen/arch/x86/mm/hap/Makefile
> +++ b/xen/arch/x86/mm/hap/Makefile
> @@ -2,5 +2,6 @@ obj-y += hap.o
> obj-y += guest_walk_2.o
> obj-y += guest_walk_3.o
> obj-y += guest_walk_4.o
> -obj-y += nested_hap.o
> -obj-$(CONFIG_INTEL_VMX) += nested_ept.o
> +nested-y := nested_hap.o
> +nested-$(CONFIG_INTEL_VMX) += nested_ept.o
> +obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
Why not use plain filter?
-obj-y += nested_hap.o
+obj-$(CONFIG_NESTED_VIRT) += nested_hap.o
-obj-$(CONFIG_INTEL_VMX) += nested_ept.o
+obj-$(filter $(CONFIG_NESTED_VIRT),$(CONFIG_INTEL_VMX)) += nested_ept.o
> diff --git a/xen/arch/x86/mm/p2m.h b/xen/arch/x86/mm/p2m.h
> index 635f5a7f45..63808dddcc 100644
> --- a/xen/arch/x86/mm/p2m.h
> +++ b/xen/arch/x86/mm/p2m.h
> @@ -25,9 +25,15 @@ void p2m_teardown_altp2m(struct domain *d);
> void p2m_flush_table_locked(struct p2m_domain *p2m);
> int __must_check p2m_remove_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
> unsigned int page_order);
> +#ifdef CONFIG_NESTED_VIRT
> void p2m_nestedp2m_init(struct p2m_domain *p2m);
> int p2m_init_nestedp2m(struct domain *d);
> void p2m_teardown_nestedp2m(struct domain *d);
> +#else
> +static inline void p2m_nestedp2m_init(struct p2m_domain *p2m) { }
> +static inline int p2m_init_nestedp2m(struct domain *d) { return 0; }
> +static inline void p2m_teardown_nestedp2m(struct domain *d) { }
> +#endif
Seeing how there's a single callsite I'd rather see those callsites check for
IS_ENABLED(), I think.
>
> int ept_p2m_init(struct p2m_domain *p2m);
> void ept_p2m_uninit(struct p2m_domain *p2m);
> diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
> index 1b04947516..b1d865e1c8 100644
> --- a/xen/arch/x86/sysctl.c
> +++ b/xen/arch/x86/sysctl.c
> @@ -103,6 +103,8 @@ void arch_do_physinfo(struct xen_sysctl_physinfo *pi)
> pi->capabilities |= XEN_SYSCTL_PHYSCAP_hap;
> if ( IS_ENABLED(CONFIG_SHADOW_PAGING) )
> pi->capabilities |= XEN_SYSCTL_PHYSCAP_shadow;
> + if ( hvm_nested_virt_supported() )
> + pi->capabilities |= XEN_SYSCTL_PHYSCAP_nestedhvm;
> }
>
> long arch_do_sysctl(
> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> index 66c9b65465..b4bd1dd7b2 100644
> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -100,9 +100,11 @@ struct xen_sysctl_tbuf_op {
> /* Xen supports the Grant v1 and/or v2 ABIs. */
> #define XEN_SYSCTL_PHYSCAP_gnttab_v1 (1u << 8)
> #define XEN_SYSCTL_PHYSCAP_gnttab_v2 (1u << 9)
> +/* The platform supports nested HVM. */
> +#define XEN_SYSCTL_PHYSCAP_nestedhvm (1u << 10)
>
> /* Max XEN_SYSCTL_PHYSCAP_* constant. Used for ABI checking. */
> -#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_gnttab_v2
> +#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_nestedhvm
>
> #if defined(__arm__) || defined(__aarch64__)
> #define XEN_SYSCTL_PHYSCAP_ARM_SVE_MASK (0x1FU)
Cheers,
Alejandro
On 16.02.2026 11:34, Alejandro Vallejo wrote: > On Fri Feb 13, 2026 at 11:02 PM CET, Stefano Stabellini wrote: >> --- a/xen/arch/x86/hvm/Kconfig >> +++ b/xen/arch/x86/hvm/Kconfig >> @@ -92,4 +92,11 @@ config MEM_SHARING >> bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED >> depends on INTEL_VMX >> >> +config NESTED_VIRT >> + bool "Nested virtualization support" >> + depends on HVM >> + help >> + Enable nested virtualization, allowing guests to run their own >> + hypervisors. This requires hardware support. > > nit: If we state above "allowing HVM guests..." rather than plain "guests" we can > then get rid of the "This requires hardware support line". What you probably > meant is that this is HVM-only and we don't allow PV nesting. > > "This requires hardware support" makes me (the user) think my hardware needs > something special to support nesting, when in reality I just need HVM support. When replying, I also initially meant to make this broad a statement, but then went to check: While indeed it ought to be possible to implement nested without further hw support, both demands HAP and SVM demands a few more advanced features (see start_nested_svm()). >> --- a/xen/arch/x86/mm/hap/Makefile >> +++ b/xen/arch/x86/mm/hap/Makefile >> @@ -2,5 +2,6 @@ obj-y += hap.o >> obj-y += guest_walk_2.o >> obj-y += guest_walk_3.o >> obj-y += guest_walk_4.o >> -obj-y += nested_hap.o >> -obj-$(CONFIG_INTEL_VMX) += nested_ept.o >> +nested-y := nested_hap.o >> +nested-$(CONFIG_INTEL_VMX) += nested_ept.o >> +obj-$(CONFIG_NESTED_VIRT) += $(nested-y) > > Why not use plain filter? > > -obj-y += nested_hap.o > +obj-$(CONFIG_NESTED_VIRT) += nested_hap.o > -obj-$(CONFIG_INTEL_VMX) += nested_ept.o > +obj-$(filter $(CONFIG_NESTED_VIRT),$(CONFIG_INTEL_VMX)) += nested_ept.o It may have been like this in v1. Problem being that this leads to long lines, which doesn't scale very well (and is - imo - harder to read). Especially when you consider what happens when it's more than two settings that need checking. The list approach easily scales to about anything (by using as many separate lists as you need). Jan
On Mon Feb 16, 2026 at 12:01 PM CET, Jan Beulich wrote: >>> --- a/xen/arch/x86/mm/hap/Makefile >>> +++ b/xen/arch/x86/mm/hap/Makefile >>> @@ -2,5 +2,6 @@ obj-y += hap.o >>> obj-y += guest_walk_2.o >>> obj-y += guest_walk_3.o >>> obj-y += guest_walk_4.o >>> -obj-y += nested_hap.o >>> -obj-$(CONFIG_INTEL_VMX) += nested_ept.o >>> +nested-y := nested_hap.o >>> +nested-$(CONFIG_INTEL_VMX) += nested_ept.o >>> +obj-$(CONFIG_NESTED_VIRT) += $(nested-y) >> >> Why not use plain filter? >> >> -obj-y += nested_hap.o >> +obj-$(CONFIG_NESTED_VIRT) += nested_hap.o >> -obj-$(CONFIG_INTEL_VMX) += nested_ept.o >> +obj-$(filter $(CONFIG_NESTED_VIRT),$(CONFIG_INTEL_VMX)) += nested_ept.o > > It may have been like this in v1. Problem being that this leads to long lines, > which doesn't scale very well (and is - imo - harder to read). Especially when > you consider what happens when it's more than two settings that need checking. > The list approach easily scales to about anything (by using as many separate > lists as you need). > > Jan I'd agree should we need more than 2 settings, long config parameter names or long filenames, but none of that applies here. It fits neatly in within 80 columns and the extra indirection bumps the cognitive load (subjectively speaking) way more than the single line does. Plus, it takes more vertical space. Even then, I'd rather have ifeq on the 3rd and/or 4th parameters and filter inside, which makes the group stand out much better and doesn't pollute the global namespace with even more names. And there's the matter of "filter" being in use very prevalently elsewhere. My .02, anyway. They are functionally equivalent, after all. Cheers, Alejandro
On 13.02.2026 23:02, Stefano Stabellini wrote:
> Introduce CONFIG_NESTED_VIRT (default n) to allow nested virtualization
> support to be disabled at build time. This is useful for embedded or
> safety-focused deployments where nested virtualization is not needed,
> reducing code size and attack surface.
>
> When CONFIG_NESTED_VIRT=n, the following source files are excluded:
> - arch/x86/hvm/nestedhvm.c
> - arch/x86/hvm/svm/nestedsvm.c
> - arch/x86/hvm/vmx/vvmx.c
> - arch/x86/mm/nested.c
> - arch/x86/mm/hap/nested_hap.c
> - arch/x86/mm/hap/nested_ept.c
>
> Add inline stubs where needed in headers. Guard assembly code paths
> for nested virt with #ifdef CONFIG_NESTED_VIRT. Move exception
> injection for VMX/SVM instructions to the callers in vmx.c/svm.c to
> avoid header dependency issues in the stubs.
>
> No functional change when CONFIG_NESTED_VIRT=y.
>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com>
>
> ---
> Changes in v3:
> - Kconfig: Change "depends on AMD_SVM || INTEL_VMX" to "depends on HVM"
> - Kconfig: Remove redundant "default n" line
> - Kconfig: Remove "If unsure, say N." from help text
> - mm/hap/Makefile: Simplify using intermediate nested-y variable:
> nested-y := nested_hap.o
> nested-$(CONFIG_INTEL_VMX) += nested_ept.o
> obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
> - svm/nestedhvm.h: Remove #ifdef CONFIG_NESTED_VIRT stubs, keep only
> function declarations (the functions are only called from code that
> is already compiled out when nested virt is disabled)
> - svm/nestedhvm.h: Add CONFIG_NESTED_VIRT guard to nsvm_efer_svm_enabled
> macro to return false when nested virt is disabled
> - svm/svm.c: Move #UD injection for STGI/CLGI to the caller instead of
> stub functions, checking nestedhvm_enabled()/nsvm_efer_svm_enabled()
> - svm/svm.c: Mark svm_vmexit_do_vmrun/vmload/vmsave as __maybe_unused
> - svm/svm.c: Remove empty nsvm_vcpu_switch stub (now guarded in asm)
> - svm/entry.S: Add #ifdef CONFIG_NESTED_VIRT guards around nested virt
> specific code paths
> - vmx/vmx.c: Remove empty nvmx_switch_guest stub (now guarded in asm)
> - vmx/vmx.c: Move nvmx_enqueue_n2_exceptions and nvmx_vmexit_event to
> vvmx.c where they belong
> - vmx/vvmx.h: Add declarations for nvmx_vmexit_event and
> nvmx_enqueue_n2_exceptions
> - vmx/vvmx.h: Fix nvmx_msr_read_intercept stub comment
> - vmx/vvmx.h: nvmx_handle_vmx_insn stub returns X86EMUL_EXCEPTION with
> ASSERT_UNREACHABLE (caller handles injection)
> - vmx/vvmx.h: Convert get_vvmcs macro to inline function in stubs
> - vmx/entry.S: Add #ifdef CONFIG_NESTED_VIRT guard around nvmx_switch_guest
> - nestedhvm.h: Convert macro stubs to proper inline functions
Oh, wow, that's an almost complete re-write?
> --- a/xen/arch/x86/hvm/Kconfig
> +++ b/xen/arch/x86/hvm/Kconfig
> @@ -92,4 +92,11 @@ config MEM_SHARING
> bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
> depends on INTEL_VMX
>
> +config NESTED_VIRT
> + bool "Nested virtualization support"
> + depends on HVM
> + help
> + Enable nested virtualization, allowing guests to run their own
> + hypervisors. This requires hardware support.
What's the last sentence about? HVM itself already requires hardware
support, yet that's about it especially for VMX (where only HAP is the
other requirement), isn't it? If this is about those advanced features,
perhaps this would then want to be more specific?
> --- a/xen/arch/x86/hvm/svm/nestedhvm.h
> +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
> @@ -24,7 +24,7 @@
>
> /* True when l1 guest enabled SVM in EFER */
> #define nsvm_efer_svm_enabled(v) \
> - (!!((v)->arch.hvm.guest_efer & EFER_SVME))
> + (IS_ENABLED(CONFIG_NESTED_VIRT) && ((v)->arch.hvm.guest_efer & EFER_SVME))
Constructs like these are on the edge: Yes, passing in an expression with a side
effect isn't very likely here. Yet still, this being a widely visible macro, I
wonder if it wouldn't better guarantee v to be evaluated exactly once.
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -2165,7 +2165,7 @@ static void svm_vmexit_do_pause(struct cpu_user_regs *regs)
> vcpu_yield();
> }
>
> -static void
> +static void __maybe_unused
> svm_vmexit_do_vmrun(struct cpu_user_regs *regs,
> struct vcpu *v, uint64_t vmcbaddr)
> {
> @@ -2211,7 +2211,7 @@ nsvm_get_nvmcb_page(struct vcpu *v, uint64_t vmcbaddr)
> return page;
> }
>
> -static void
> +static void __maybe_unused
> svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
> struct cpu_user_regs *regs,
> struct vcpu *v, uint64_t vmcbaddr)
> @@ -2246,7 +2246,7 @@ svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
> __update_guest_eip(regs, inst_len);
> }
>
> -static void
> +static void __maybe_unused
> svm_vmexit_do_vmsave(struct vmcb_struct *vmcb,
> struct cpu_user_regs *regs,
> struct vcpu *v, uint64_t vmcbaddr)
Why are these needed? The call sites don't go away afaics.
If these are nevertheless needed, question is whether a suitable single #ifdef
might not be tidier.
> @@ -3011,10 +3013,16 @@ void asmlinkage svm_vmexit_handler(void)
> svm_vmexit_do_vmsave(vmcb, regs, v, regs->rax);
> break;
> case VMEXIT_STGI:
> - svm_vmexit_do_stgi(regs, v);
> + if ( !nestedhvm_enabled(v->domain) )
> + hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
> + else
> + svm_vmexit_do_stgi(regs, v);
> break;
> case VMEXIT_CLGI:
> - svm_vmexit_do_clgi(regs, v);
> + if ( !nsvm_efer_svm_enabled(v) )
> + hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
> + else
> + svm_vmexit_do_clgi(regs, v);
> break;
These render respective checks in the functions themselves dead, which in
particular means the bodies of those if()s there are then unreachable (a
Misra violation of a rule we did accept).
> @@ -2942,8 +2916,9 @@ static struct hvm_function_table __initdata_cf_clobber vmx_function_table = {
> .nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
> .nhvm_intr_blocked = nvmx_intr_blocked,
> .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
> - .update_vlapic_mode = vmx_vlapic_msr_changed,
I realize the = wasn't properly padded here, but ...
> .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
> +#endif
> + .update_vlapic_mode = vmx_vlapic_msr_changed,
... can you please to so while moving the line?
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -2821,6 +2821,32 @@ void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
> __vmwrite(read_shadow_field, v->arch.hvm.nvcpu.guest_cr[cr]);
> }
>
> +void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> + unsigned long intr_fields, int error_code, uint8_t source)
While moving, can obvious style issues please be addressed? Bad indentation
here, ...
> +{
> + struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
> +
> + if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
... misplaced brace here, and ...
> + /* enqueue the exception till the VMCS switch back to L1 */
... malformed comment here.
> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -711,7 +711,7 @@ static inline bool hvm_altp2m_supported(void)
> /* Returns true if we have the minimum hardware requirements for nested virt */
> static inline bool hvm_nested_virt_supported(void)
> {
> - return hvm_funcs.caps.nested_virt;
> + return IS_ENABLED(CONFIG_NESTED_VIRT) && hvm_funcs.caps.nested_virt;
> }
Should the field itself perhaps become conditional?
> --- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
> +++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> @@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
> /* Nested HVM on/off per domain */
> static inline bool nestedhvm_enabled(const struct domain *d)
> {
> - return IS_ENABLED(CONFIG_HVM) && (d->options & XEN_DOMCTL_CDF_nested_virt);
> + return IS_ENABLED(CONFIG_NESTED_VIRT) &&
> + (d->options & XEN_DOMCTL_CDF_nested_virt);
> }
>
> +/* Nested paging */
> +#define NESTEDHVM_PAGEFAULT_DONE 0
> +#define NESTEDHVM_PAGEFAULT_INJECT 1
> +#define NESTEDHVM_PAGEFAULT_L1_ERROR 2
> +#define NESTEDHVM_PAGEFAULT_L0_ERROR 3
> +#define NESTEDHVM_PAGEFAULT_MMIO 4
> +#define NESTEDHVM_PAGEFAULT_RETRY 5
> +#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> +
> +#ifdef CONFIG_NESTED_VIRT
In a reply to my comment on v1 (Or was it v2? This submission isn't tagged.),
you referred me to the stub nestedhvm_hap_nested_page_fault() using the
constant. However, why would that stub be needed when the sole call site of
the function lives in a conditional using nestedhvm_enabled() (which is
compile-time fales when NESTED_VIRT=n)? All you need to make sure is that
the decl remains available. I then wonder for how many of the other stubs
which might be the case as well.
> @@ -199,5 +201,77 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
> uint64_t *exit_qual, uint32_t *exit_reason);
> int nvmx_cpu_up_prepare(unsigned int cpu);
> void nvmx_cpu_dead(unsigned int cpu);
> +int cf_check nvmx_vmexit_event(struct vcpu *v, const struct x86_event *event);
> +void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> + unsigned long intr_fields, int error_code, uint8_t source);
Nit: Bad indentation even copied here.
Jan
On 2/13/26 17:02, Stefano Stabellini wrote: > Introduce CONFIG_NESTED_VIRT (default n) to allow nested virtualization > support to be disabled at build time. This is useful for embedded or > safety-focused deployments where nested virtualization is not needed, > reducing code size and attack surface. > > When CONFIG_NESTED_VIRT=n, the following source files are excluded: > - arch/x86/hvm/nestedhvm.c > - arch/x86/hvm/svm/nestedsvm.c > - arch/x86/hvm/vmx/vvmx.c > - arch/x86/mm/nested.c > - arch/x86/mm/hap/nested_hap.c > - arch/x86/mm/hap/nested_ept.c > > Add inline stubs where needed in headers. Guard assembly code paths > for nested virt with #ifdef CONFIG_NESTED_VIRT. Move exception > injection for VMX/SVM instructions to the callers in vmx.c/svm.c to > avoid header dependency issues in the stubs. > > No functional change when CONFIG_NESTED_VIRT=y. > > Signed-off-by: Stefano Stabellini <stefano.stabellini@amd.com> > > --- > Changes in v3: > - Kconfig: Change "depends on AMD_SVM || INTEL_VMX" to "depends on HVM" > - Kconfig: Remove redundant "default n" line > - Kconfig: Remove "If unsure, say N." from help text > - mm/hap/Makefile: Simplify using intermediate nested-y variable: > nested-y := nested_hap.o > nested-$(CONFIG_INTEL_VMX) += nested_ept.o > obj-$(CONFIG_NESTED_VIRT) += $(nested-y) > - svm/nestedhvm.h: Remove #ifdef CONFIG_NESTED_VIRT stubs, keep only > function declarations (the functions are only called from code that > is already compiled out when nested virt is disabled) > - svm/nestedhvm.h: Add CONFIG_NESTED_VIRT guard to nsvm_efer_svm_enabled > macro to return false when nested virt is disabled > - svm/svm.c: Move #UD injection for STGI/CLGI to the caller instead of > stub functions, checking nestedhvm_enabled()/nsvm_efer_svm_enabled() > - svm/svm.c: Mark svm_vmexit_do_vmrun/vmload/vmsave as __maybe_unused > - svm/svm.c: Remove empty nsvm_vcpu_switch stub (now guarded in asm) > - svm/entry.S: Add #ifdef CONFIG_NESTED_VIRT guards around nested virt > specific code paths > - vmx/vmx.c: Remove empty nvmx_switch_guest stub (now guarded in asm) > - vmx/vmx.c: Move nvmx_enqueue_n2_exceptions and nvmx_vmexit_event to > vvmx.c where they belong > - vmx/vvmx.h: Add declarations for nvmx_vmexit_event and > nvmx_enqueue_n2_exceptions > - vmx/vvmx.h: Fix nvmx_msr_read_intercept stub comment > - vmx/vvmx.h: nvmx_handle_vmx_insn stub returns X86EMUL_EXCEPTION with > ASSERT_UNREACHABLE (caller handles injection) > - vmx/vvmx.h: Convert get_vvmcs macro to inline function in stubs > - vmx/entry.S: Add #ifdef CONFIG_NESTED_VIRT guard around nvmx_switch_guest > - nestedhvm.h: Convert macro stubs to proper inline functions > --- > xen/arch/x86/hvm/Kconfig | 7 +++ > xen/arch/x86/hvm/Makefile | 2 +- > xen/arch/x86/hvm/svm/Makefile | 2 +- > xen/arch/x86/hvm/svm/entry.S | 4 ++ > xen/arch/x86/hvm/svm/nestedhvm.h | 2 +- > xen/arch/x86/hvm/svm/svm.c | 18 ++++-- > xen/arch/x86/hvm/vmx/Makefile | 2 +- > xen/arch/x86/hvm/vmx/entry.S | 2 + > xen/arch/x86/hvm/vmx/vmx.c | 31 +--------- > xen/arch/x86/hvm/vmx/vvmx.c | 26 +++++++++ > xen/arch/x86/include/asm/hvm/hvm.h | 2 +- > xen/arch/x86/include/asm/hvm/nestedhvm.h | 64 +++++++++++++++++--- > xen/arch/x86/include/asm/hvm/vmx/vvmx.h | 74 ++++++++++++++++++++++++ > xen/arch/x86/mm/Makefile | 2 +- > xen/arch/x86/mm/hap/Makefile | 5 +- > xen/arch/x86/mm/p2m.h | 6 ++ > xen/arch/x86/sysctl.c | 2 + > xen/include/public/sysctl.h | 4 +- > 18 files changed, 204 insertions(+), 51 deletions(-) > > diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig > index f32bf5cbb7..af661385b5 100644 > --- a/xen/arch/x86/hvm/Kconfig > +++ b/xen/arch/x86/hvm/Kconfig > @@ -92,4 +92,11 @@ config MEM_SHARING > bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED > depends on INTEL_VMX > > +config NESTED_VIRT > + bool "Nested virtualization support> + depends on HVM > + help > + Enable nested virtualization, allowing guests to run their own > + hypervisors. This requires hardware support. Should this also come with a warning that allowing guests to use nested virtualization is insecure unless both L1 and L2 guests are trusted? -- Sincerely, Demi Marie Obenour (she/her/hers)
© 2016 - 2026 Red Hat, Inc.