[PATCH v2 3/7] x86/sev: add support for RMPOPT instruction

Ashish Kalra posted 7 patches 5 hours ago
[PATCH v2 3/7] x86/sev: add support for RMPOPT instruction
Posted by Ashish Kalra 5 hours ago
From: Ashish Kalra <ashish.kalra@amd.com>

As SEV-SNP is enabled by default on boot when an RMP table is
allocated by BIOS, the hypervisor and non-SNP guests are subject to
RMP write checks to provide integrity of SNP guest memory.

RMPOPT is a new instruction that minimizes the performance overhead of
RMP checks on the hypervisor and on non-SNP guests by allowing RMP
checks to be skipped for 1GB regions of memory that are known not to
contain any SEV-SNP guest memory.

Enable RMPOPT optimizations globally for all system RAM at RMP
initialization time. RMP checks can initially be skipped for 1GB memory
ranges that do not contain SEV-SNP guest memory (excluding preassigned
pages such as the RMP table and firmware pages). As SNP guests are
launched, RMPUPDATE will disable the corresponding RMPOPT optimizations.

Suggested-by: Thomas Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 arch/x86/virt/svm/sev.c | 78 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 405199c2f563..c99270dfe3b3 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -19,6 +19,7 @@
 #include <linux/iommu.h>
 #include <linux/amd-iommu.h>
 #include <linux/nospec.h>
+#include <linux/kthread.h>
 
 #include <asm/sev.h>
 #include <asm/processor.h>
@@ -122,6 +123,13 @@ static u64 rmp_cfg;
 
 static u64 probed_rmp_base, probed_rmp_size;
 
+enum rmpopt_function {
+	RMPOPT_FUNC_VERIFY_AND_REPORT_STATUS,
+	RMPOPT_FUNC_REPORT_STATUS
+};
+
+static struct task_struct *rmpopt_task;
+
 static LIST_HEAD(snp_leaked_pages_list);
 static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
 
@@ -500,6 +508,61 @@ static bool __init setup_rmptable(void)
 	}
 }
 
+/*
+ * 'val' is a system physical address aligned to 1GB OR'ed with
+ * a function selection. Currently supported functions are 0
+ * (verify and report status) and 1 (report status).
+ */
+static void rmpopt(void *val)
+{
+	asm volatile(".byte 0xf2, 0x0f, 0x01, 0xfc"
+		     : : "a" ((u64)val & PUD_MASK), "c" ((u64)val & 0x1)
+		     : "memory", "cc");
+}
+
+static int rmpopt_kthread(void *__unused)
+{
+	phys_addr_t pa_start, pa_end;
+
+	pa_start = ALIGN_DOWN(PFN_PHYS(min_low_pfn), PUD_SIZE);
+	pa_end = ALIGN(PFN_PHYS(max_pfn), PUD_SIZE);
+
+	/* Limit memory scanning to the first 2 TB of RAM */
+	pa_end = (pa_end - pa_start) <= SZ_2T ? pa_end : pa_start + SZ_2T;
+
+	while (!kthread_should_stop()) {
+		phys_addr_t pa;
+
+		pr_info("RMP optimizations enabled on physical address range @1GB alignment [0x%016llx - 0x%016llx]\n",
+			pa_start, pa_end);
+
+		/*
+		 * RMPOPT optimizations skip RMP checks at 1GB granularity if this range of
+		 * memory does not contain any SNP guest memory.
+		 */
+		for (pa = pa_start; pa < pa_end; pa += PUD_SIZE) {
+			/* Bit zero passes the function to the RMPOPT instruction. */
+			on_each_cpu_mask(cpu_online_mask, rmpopt,
+					 (void *)(pa | RMPOPT_FUNC_VERIFY_AND_REPORT_STATUS),
+					 true);
+
+			 /* Give a chance for other threads to run */
+			cond_resched();
+		}
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	}
+
+	return 0;
+}
+
+static void rmpopt_all_physmem(void)
+{
+	if (rmpopt_task)
+		wake_up_process(rmpopt_task);
+}
+
 static void __configure_rmpopt(void *val)
 {
 	u64 rmpopt_base = ((u64)val & PUD_MASK) | MSR_AMD64_RMPOPT_ENABLE;
@@ -533,6 +596,21 @@ static __init void configure_and_enable_rmpopt(void)
 	 * up to 2TB of system RAM on all CPUs.
 	 */
 	on_each_cpu_mask(cpu_online_mask, __configure_rmpopt, (void *)pa_start, true);
+
+	rmpopt_task = kthread_create(rmpopt_kthread, NULL, "rmpopt_kthread");
+	if (IS_ERR(rmpopt_task)) {
+		pr_warn("Unable to start RMPOPT kernel thread\n");
+		rmpopt_task = NULL;
+		return;
+	}
+
+	pr_info("RMPOPT worker thread created with PID %d\n", task_pid_nr(rmpopt_task));
+
+	/*
+	 * Once all per-CPU RMPOPT tables have been configured, enable RMPOPT
+	 * optimizations on all physical memory.
+	 */
+	rmpopt_all_physmem();
 }
 
 /*
-- 
2.43.0
Re: [PATCH v2 3/7] x86/sev: add support for RMPOPT instruction
Posted by Dave Hansen 4 hours ago
That subject could use a wee bit of work.

I'd probably talk about this adding a new kernel thread that does the
optimizations asynchronously.


On 3/2/26 13:36, Ashish Kalra wrote:
> From: Ashish Kalra <ashish.kalra@amd.com>
> 
> As SEV-SNP is enabled by default on boot when an RMP table is
> allocated by BIOS, the hypervisor and non-SNP guests are subject to
> RMP write checks to provide integrity of SNP guest memory.
> 
> RMPOPT is a new instruction that minimizes the performance overhead of
> RMP checks on the hypervisor and on non-SNP guests by allowing RMP
> checks to be skipped for 1GB regions of memory that are known not to
> contain any SEV-SNP guest memory.
> 
> Enable RMPOPT optimizations globally for all system RAM at RMP
> initialization time. RMP checks can initially be skipped for 1GB memory
> ranges that do not contain SEV-SNP guest memory (excluding preassigned
> pages such as the RMP table and firmware pages). As SNP guests are
> launched, RMPUPDATE will disable the corresponding RMPOPT optimizations.

This is heavy on the "what" and light on the "why" and "how".

> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
> index 405199c2f563..c99270dfe3b3 100644
> --- a/arch/x86/virt/svm/sev.c
> +++ b/arch/x86/virt/svm/sev.c
> @@ -19,6 +19,7 @@
>  #include <linux/iommu.h>
>  #include <linux/amd-iommu.h>
>  #include <linux/nospec.h>
> +#include <linux/kthread.h>
>  
>  #include <asm/sev.h>
>  #include <asm/processor.h>
> @@ -122,6 +123,13 @@ static u64 rmp_cfg;
>  
>  static u64 probed_rmp_base, probed_rmp_size;
>  
> +enum rmpopt_function {
> +	RMPOPT_FUNC_VERIFY_AND_REPORT_STATUS,
> +	RMPOPT_FUNC_REPORT_STATUS
> +};

Shouldn't these go by the instruction definition?

You could even call it rmpopt_rcx or something.

> +static struct task_struct *rmpopt_task;
> +
>  static LIST_HEAD(snp_leaked_pages_list);
>  static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
>  
> @@ -500,6 +508,61 @@ static bool __init setup_rmptable(void)
>  	}
>  }
>  
> +/*
> + * 'val' is a system physical address aligned to 1GB OR'ed with
> + * a function selection. Currently supported functions are 0
> + * (verify and report status) and 1 (report status).
> + */
> +static void rmpopt(void *val)
> +{
> +	asm volatile(".byte 0xf2, 0x0f, 0x01, 0xfc"
> +		     : : "a" ((u64)val & PUD_MASK), "c" ((u64)val & 0x1)
> +		     : "memory", "cc");
> +}

Doesn't this belong in:

arch/x86/include/asm/special_insns.h

Also, it's not reporting *any* status here, right? So why even talk
about it if the kernel isn't doing any status checks? It just makes it
more confusing.

> +static int rmpopt_kthread(void *__unused)
> +{
> +	phys_addr_t pa_start, pa_end;
> +
> +	pa_start = ALIGN_DOWN(PFN_PHYS(min_low_pfn), PUD_SIZE);
> +	pa_end = ALIGN(PFN_PHYS(max_pfn), PUD_SIZE);

Needs vertical alignment:

	pa_start = ALIGN_DOWN(PFN_PHYS(min_low_pfn), PUD_SIZE);
	pa_end   = ALIGN(     PFN_PHYS(max_pfn),     PUD_SIZE);

Nit: the architecture says "1GB" regions, not PUD_SIZE. If we ever got
fancy and changed the page tables, this code would break. Why make it
harder on ourselves than it has to be?

> +	/* Limit memory scanning to the first 2 TB of RAM */
> +	pa_end = (pa_end - pa_start) <= SZ_2T ? pa_end : pa_start + SZ_2T;

That's a rather unfortunate use of ternary form. Isn't this a billion
times more clear?

	if (pa_end - pa_start > SZ_2T)
		pa_end = pa_start + SZ_2T;

> +	while (!kthread_should_stop()) {
> +		phys_addr_t pa;
> +
> +		pr_info("RMP optimizations enabled on physical address range @1GB alignment [0x%016llx - 0x%016llx]\n",
> +			pa_start, pa_end);

This isn't really enabling optimizations. It's trying to enable them,
right? It might fall on its face and fail every time, right?

> +		/*
> +		 * RMPOPT optimizations skip RMP checks at 1GB granularity if this range of
> +		 * memory does not contain any SNP guest memory.
> +		 */
> +		for (pa = pa_start; pa < pa_end; pa += PUD_SIZE) {
> +			/* Bit zero passes the function to the RMPOPT instruction. */
> +			on_each_cpu_mask(cpu_online_mask, rmpopt,
> +					 (void *)(pa | RMPOPT_FUNC_VERIFY_AND_REPORT_STATUS),
> +					 true);
> +
> +			 /* Give a chance for other threads to run */
> +			cond_resched();
> +		}

Could you also put together some proper helpers, please? The
lowest-level helper should look a lot like the instruction reference:

void __rmpopt(u64 rax, u64 rcx)
{
	asm volatile(".byte 0xf2, 0x0f, 0x01, 0xfc"
		     : : "a" (rax), "c" (rcx)
		     : "memory", "cc");
}

Then you can have a higher-level instruction that shows how you convert
the logical things "physical address" and "rmpopt_function" into the
register arguments:

void rmpopt(unsigned long pa)
{
	u64 rax = ALIGN_DOWN(pa & SZ_1GB);
	u64 rcx = RMPOPT_FUNC_VERIFY_AND_REPORT_STATUS;

	__rmpopt(rax, rcx);
}

There's no need right now to pack and unpack rax/rcx from a pointer. Why
even bother when rcx is a fixed value?

> +		set_current_state(TASK_INTERRUPTIBLE);
> +		schedule();
> +	}
> +
> +	return 0;
> +}
> +
> +static void rmpopt_all_physmem(void)
> +{
> +	if (rmpopt_task)
> +		wake_up_process(rmpopt_task);
> +}

Wait a sec, doesn't this just run all the time? It'll be doing an RMPOPT
on some forever.
Re: [PATCH v2 3/7] x86/sev: add support for RMPOPT instruction
Posted by Kalra, Ashish 4 hours ago
Hello Dave,

On 3/2/2026 4:57 PM, Dave Hansen wrote:

>> +		set_current_state(TASK_INTERRUPTIBLE);
>> +		schedule();
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static void rmpopt_all_physmem(void)
>> +{
>> +	if (rmpopt_task)
>> +		wake_up_process(rmpopt_task);
>> +}
> 
> Wait a sec, doesn't this just run all the time? It'll be doing an RMPOPT
> on some forever.

The rmpopt_kthread will be sleeping, till it is woken explicitly by wake_up_process() here.

When the schedule() function is called with the state as TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE,
an additional step is performed: the currently executing process is moved off the run queue before
another process is scheduled. The effect of this is the executing process goes to sleep,
as it no longer is on the run queue. Hence, it never is scheduled by the scheduler.

The thread would then be woken up by calling wake_up_process().

I believe, this is probably the simplest way of sleeping and waking in the kernel.

Thanks,
Ashish
Re: [PATCH v2 3/7] x86/sev: add support for RMPOPT instruction
Posted by Dave Hansen 4 hours ago
On 3/2/26 15:09, Kalra, Ashish wrote:
>>> +static void rmpopt_all_physmem(void)
>>> +{
>>> +	if (rmpopt_task)
>>> +		wake_up_process(rmpopt_task);
>>> +}
>> Wait a sec, doesn't this just run all the time? It'll be doing an RMPOPT
>> on some forever.
> The rmpopt_kthread will be sleeping, till it is woken explicitly by wake_up_process() here.

Sorry for the noise, I totally parsed that bit wrong.