[PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment

Tom Lendacky posted 8 patches 3 weeks, 6 days ago
[PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment
Posted by Tom Lendacky 3 weeks, 6 days ago
In preparation for support of a segmented RMP table, treat the contiguous
RMP table as a segmented RMP table with a single segment covering all
of memory. By treating a contiguous RMP table as a single segment, much
of the code that initializes and accesses the RMP can be re-used.

Segmented RMP tables can have up to 512 segment entries. Each segment
will have metadata associated with it to identify the segment location,
the segment size, etc. The segment data and the physical address are used
to determine the index of the segment within the table and then the RMP
entry within the segment. For an actual segmented RMP table environment,
much of the segment information will come from a configuration MSR. For
the contiguous RMP, though, much of the information will be statically
defined.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
Reviewed-by: Neeraj Upadhyay <Neeraj.Upadhyay@amd.com>
---
 arch/x86/virt/svm/sev.c | 193 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 174 insertions(+), 19 deletions(-)

diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 5871c924c0b2..37ff4f98e8d1 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -18,6 +18,7 @@
 #include <linux/cpumask.h>
 #include <linux/iommu.h>
 #include <linux/amd-iommu.h>
+#include <linux/nospec.h>
 
 #include <asm/sev.h>
 #include <asm/processor.h>
@@ -77,12 +78,42 @@ struct rmpentry_raw {
  */
 #define RMPTABLE_CPU_BOOKKEEPING_SZ	0x4000
 
+/*
+ * For a non-segmented RMP table, use the maximum physical addressing as the
+ * segment size in order to always arrive at index 0 in the table.
+ */
+#define RMPTABLE_NON_SEGMENTED_SHIFT	52
+
+struct rmp_segment_desc {
+	struct rmpentry_raw *rmp_entry;
+	u64 max_index;
+	u64 size;
+};
+
+/*
+ * Segmented RMP Table support.
+ *   - The segment size is used for two purposes:
+ *     - Identify the amount of memory covered by an RMP segment
+ *     - Quickly locate an RMP segment table entry for a physical address
+ *
+ *   - The RMP segment table contains pointers to an RMP table that covers
+ *     a specific portion of memory. There can be up to 512 8-byte entries,
+ *     one pages worth.
+ */
+static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
+static unsigned int rst_max_index __ro_after_init = 512;
+
+static u64 rmp_segment_size_max;
+static unsigned int rmp_segment_coverage_shift;
+static u64 rmp_segment_coverage_size;
+static u64 rmp_segment_coverage_mask;
+#define RST_ENTRY_INDEX(x)	((x) >> rmp_segment_coverage_shift)
+#define RMP_ENTRY_INDEX(x)	((u64)(PHYS_PFN((x) & rmp_segment_coverage_mask)))
+
 /* Mask to apply to a PFN to get the first PFN of a 2MB page */
 #define PFN_PMD_MASK	GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
 
 static u64 probed_rmp_base, probed_rmp_size;
-static struct rmpentry_raw *rmptable __ro_after_init;
-static u64 rmptable_max_pfn __ro_after_init;
 
 static LIST_HEAD(snp_leaked_pages_list);
 static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
@@ -190,6 +221,92 @@ static bool __init init_rmptable_bookkeeping(void)
 	return true;
 }
 
+static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa)
+{
+	struct rmp_segment_desc *desc;
+	void *rmp_segment;
+	u64 rst_index;
+
+	/* Validate the RMP segment size */
+	if (segment_size > rmp_segment_size_max) {
+		pr_err("Invalid RMP size (%#llx) for configured segment size (%#llx)\n",
+		       segment_size, rmp_segment_size_max);
+		return false;
+	}
+
+	/* Validate the RMP segment table index */
+	rst_index = RST_ENTRY_INDEX(pa);
+	if (rst_index >= rst_max_index) {
+		pr_err("Invalid RMP segment base address (%#llx) for configured segment size (%#llx)\n",
+		       pa, rmp_segment_coverage_size);
+		return false;
+	}
+	rst_index = array_index_nospec(rst_index, rst_max_index);
+
+	if (rmp_segment_table[rst_index]) {
+		pr_err("RMP segment descriptor already exists at index %llu\n", rst_index);
+		return false;
+	}
+
+	/* Map the RMP entries */
+	rmp_segment = memremap(segment_pa, segment_size, MEMREMAP_WB);
+	if (!rmp_segment) {
+		pr_err("Failed to map RMP segment addr 0x%llx size 0x%llx\n",
+		       segment_pa, segment_size);
+		return false;
+	}
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc) {
+		memunmap(rmp_segment);
+		return false;
+	}
+
+	desc->rmp_entry = rmp_segment;
+	desc->max_index = segment_size / sizeof(*desc->rmp_entry);
+	desc->size = segment_size;
+
+	/* Add the segment descriptor to the table */
+	rmp_segment_table[rst_index] = desc;
+
+	return true;
+}
+
+static void __init free_rmp_segment_table(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < rst_max_index; i++) {
+		struct rmp_segment_desc *desc;
+
+		desc = rmp_segment_table[i];
+		if (!desc)
+			continue;
+
+		memunmap(desc->rmp_entry);
+
+		kfree(desc);
+	}
+
+	free_page((unsigned long)rmp_segment_table);
+
+	rmp_segment_table = NULL;
+}
+
+static bool __init alloc_rmp_segment_table(void)
+{
+	struct page *page;
+
+	/* Allocate the table used to index into the RMP segments */
+	page = alloc_page(__GFP_ZERO);
+	if (!page)
+		return false;
+
+	rmp_segment_table = page_address(page);
+
+	return true;
+}
+
 /*
  * Do the necessary preparations which are verified by the firmware as
  * described in the SNP_INIT_EX firmware command description in the SNP
@@ -197,8 +314,8 @@ static bool __init init_rmptable_bookkeeping(void)
  */
 static int __init snp_rmptable_init(void)
 {
-	u64 max_rmp_pfn, calc_rmp_sz, rmptable_size, rmp_end, val;
-	void *rmptable_start;
+	u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end, val;
+	unsigned int i;
 
 	if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
 		return 0;
@@ -227,17 +344,18 @@ static int __init snp_rmptable_init(void)
 		goto nosnp;
 	}
 
+	if (!alloc_rmp_segment_table())
+		goto nosnp;
+
 	/* Map only the RMP entries */
-	rmptable_start = memremap(probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ,
-				  probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ,
-				  MEMREMAP_WB);
-	if (!rmptable_start) {
-		pr_err("Failed to map RMP table\n");
+	rmptable_segment = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ;
+	rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
+
+	if (!alloc_rmp_segment_desc(rmptable_segment, rmptable_size, 0)) {
+		free_rmp_segment_table();
 		goto nosnp;
 	}
 
-	rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
-
 	/*
 	 * Check if SEV-SNP is already enabled, this can happen in case of
 	 * kexec boot.
@@ -248,12 +366,20 @@ static int __init snp_rmptable_init(void)
 
 	/* Zero out the RMP bookkeeping area */
 	if (!init_rmptable_bookkeeping()) {
-		memunmap(rmptable_start);
+		free_rmp_segment_table();
 		goto nosnp;
 	}
 
 	/* Zero out the RMP entries */
-	memset(rmptable_start, 0, rmptable_size);
+	for (i = 0; i < rst_max_index; i++) {
+		struct rmp_segment_desc *desc;
+
+		desc = rmp_segment_table[i];
+		if (!desc)
+			continue;
+
+		memset(desc->rmp_entry, 0, desc->size);
+	}
 
 	/* Flush the caches to ensure that data is written before SNP is enabled. */
 	wbinvd_on_all_cpus();
@@ -264,9 +390,6 @@ static int __init snp_rmptable_init(void)
 	on_each_cpu(snp_enable, NULL, 1);
 
 skip_enable:
-	rmptable = (struct rmpentry_raw *)rmptable_start;
-	rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry_raw) - 1;
-
 	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
 
 	/*
@@ -287,6 +410,17 @@ static int __init snp_rmptable_init(void)
  */
 device_initcall(snp_rmptable_init);
 
+static void set_rmp_segment_info(unsigned int segment_shift)
+{
+	rmp_segment_coverage_shift = segment_shift;
+	rmp_segment_coverage_size  = 1ULL << rmp_segment_coverage_shift;
+	rmp_segment_coverage_mask  = rmp_segment_coverage_size - 1;
+
+	/* Calculate the maximum size an RMP can be (16 bytes/page mapped) */
+	rmp_segment_size_max = PHYS_PFN(rmp_segment_coverage_size);
+	rmp_segment_size_max <<= 4;
+}
+
 #define RMP_ADDR_MASK GENMASK_ULL(51, 13)
 
 bool snp_probe_rmptable_info(void)
@@ -308,6 +442,11 @@ bool snp_probe_rmptable_info(void)
 
 	rmp_sz = rmp_end - rmp_base + 1;
 
+	/* Treat the contiguous RMP table as a single segment */
+	rst_max_index = 1;
+
+	set_rmp_segment_info(RMPTABLE_NON_SEGMENTED_SHIFT);
+
 	probed_rmp_base = rmp_base;
 	probed_rmp_size = rmp_sz;
 
@@ -319,13 +458,29 @@ bool snp_probe_rmptable_info(void)
 
 static struct rmpentry_raw *get_raw_rmpentry(u64 pfn)
 {
-	if (!rmptable)
+	struct rmp_segment_desc *desc;
+	u64 paddr, rst_index, segment_index;
+
+	if (!rmp_segment_table)
 		return ERR_PTR(-ENODEV);
 
-	if (unlikely(pfn > rmptable_max_pfn))
+	paddr = pfn << PAGE_SHIFT;
+
+	rst_index = RST_ENTRY_INDEX(paddr);
+	if (unlikely(rst_index >= rst_max_index))
+		return ERR_PTR(-EFAULT);
+	rst_index = array_index_nospec(rst_index, rst_max_index);
+
+	desc = rmp_segment_table[rst_index];
+	if (unlikely(!desc))
 		return ERR_PTR(-EFAULT);
 
-	return rmptable + pfn;
+	segment_index = RMP_ENTRY_INDEX(paddr);
+	if (unlikely(segment_index >= desc->max_index))
+		return ERR_PTR(-EFAULT);
+	segment_index = array_index_nospec(segment_index, desc->max_index);
+
+	return desc->rmp_entry + segment_index;
 }
 
 static int get_rmpentry(u64 pfn, struct rmpentry *e)
-- 
2.46.2
Re: [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment
Posted by Borislav Petkov 2 weeks, 6 days ago
On Mon, Oct 28, 2024 at 02:32:41PM -0500, Tom Lendacky wrote:
> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
> index 5871c924c0b2..37ff4f98e8d1 100644
> --- a/arch/x86/virt/svm/sev.c
> +++ b/arch/x86/virt/svm/sev.c
> @@ -18,6 +18,7 @@
>  #include <linux/cpumask.h>
>  #include <linux/iommu.h>
>  #include <linux/amd-iommu.h>
> +#include <linux/nospec.h>

What's that include for?

For array_index_nospec() below?

It builds without it or did you trigger a build error with some funky .config?

>  #include <asm/sev.h>
>  #include <asm/processor.h>
> @@ -77,12 +78,42 @@ struct rmpentry_raw {
>   */
>  #define RMPTABLE_CPU_BOOKKEEPING_SZ	0x4000
>  
> +/*
> + * For a non-segmented RMP table, use the maximum physical addressing as the
> + * segment size in order to always arrive at index 0 in the table.
> + */
> +#define RMPTABLE_NON_SEGMENTED_SHIFT	52
> +
> +struct rmp_segment_desc {
> +	struct rmpentry_raw *rmp_entry;
> +	u64 max_index;
> +	u64 size;
> +};
> +
> +/*
> + * Segmented RMP Table support.
> + *   - The segment size is used for two purposes:
> + *     - Identify the amount of memory covered by an RMP segment
> + *     - Quickly locate an RMP segment table entry for a physical address
> + *
> + *   - The RMP segment table contains pointers to an RMP table that covers
> + *     a specific portion of memory. There can be up to 512 8-byte entries,
> + *     one pages worth.
> + */
> +static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
> +static unsigned int rst_max_index __ro_after_init = 512;
> +
> +static u64 rmp_segment_size_max;

This one is used in a single function. Generate it there pls.

> +static unsigned int rmp_segment_coverage_shift;
> +static u64 rmp_segment_coverage_size;
> +static u64 rmp_segment_coverage_mask;

That "_coverage_" in there looks redundant to me and could go and make those
variables shorter.

> +#define RST_ENTRY_INDEX(x)	((x) >> rmp_segment_coverage_shift)
> +#define RMP_ENTRY_INDEX(x)	((u64)(PHYS_PFN((x) & rmp_segment_coverage_mask)))
> +
>  /* Mask to apply to a PFN to get the first PFN of a 2MB page */
>  #define PFN_PMD_MASK	GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
>  
>  static u64 probed_rmp_base, probed_rmp_size;
> -static struct rmpentry_raw *rmptable __ro_after_init;
> -static u64 rmptable_max_pfn __ro_after_init;
>  
>  static LIST_HEAD(snp_leaked_pages_list);
>  static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
> @@ -190,6 +221,92 @@ static bool __init init_rmptable_bookkeeping(void)
>  	return true;
>  }
>  
> +static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa)
> +{
> +	struct rmp_segment_desc *desc;
> +	void *rmp_segment;
> +	u64 rst_index;
> +
> +	/* Validate the RMP segment size */
> +	if (segment_size > rmp_segment_size_max) {
> +		pr_err("Invalid RMP size (%#llx) for configured segment size (%#llx)\n",
> +		       segment_size, rmp_segment_size_max);
> +		return false;
> +	}
> +
> +	/* Validate the RMP segment table index */
> +	rst_index = RST_ENTRY_INDEX(pa);
> +	if (rst_index >= rst_max_index) {
> +		pr_err("Invalid RMP segment base address (%#llx) for configured segment size (%#llx)\n",
> +		       pa, rmp_segment_coverage_size);
> +		return false;
> +	}
> +	rst_index = array_index_nospec(rst_index, rst_max_index);

Why are we doing this here? Are you expecting some out-of-bounds
user-controlled values here?

AFAICT, this is all read from the hw/fw so why are speculative accesses
a problem?

> +	if (rmp_segment_table[rst_index]) {
> +		pr_err("RMP segment descriptor already exists at index %llu\n", rst_index);
> +		return false;
> +	}
> +
> +	/* Map the RMP entries */

Kinda obvious...

> +	rmp_segment = memremap(segment_pa, segment_size, MEMREMAP_WB);
> +	if (!rmp_segment) {
> +		pr_err("Failed to map RMP segment addr 0x%llx size 0x%llx\n",
> +		       segment_pa, segment_size);
> +		return false;
> +	}
> +
> +	desc = kzalloc(sizeof(*desc), GFP_KERNEL);

			sizeof(struct rmp_segment_desc)


> +	if (!desc) {
> +		memunmap(rmp_segment);
> +		return false;
> +	}
> +
> +	desc->rmp_entry = rmp_segment;
> +	desc->max_index = segment_size / sizeof(*desc->rmp_entry);
> +	desc->size = segment_size;
> +
> +	/* Add the segment descriptor to the table */
> +	rmp_segment_table[rst_index] = desc;
> +
> +	return true;
> +}

...

> @@ -248,12 +366,20 @@ static int __init snp_rmptable_init(void)
>  
>  	/* Zero out the RMP bookkeeping area */
>  	if (!init_rmptable_bookkeeping()) {
> -		memunmap(rmptable_start);
> +		free_rmp_segment_table();
>  		goto nosnp;
>  	}
>  
>  	/* Zero out the RMP entries */
> -	memset(rmptable_start, 0, rmptable_size);
> +	for (i = 0; i < rst_max_index; i++) {
> +		struct rmp_segment_desc *desc;
> +
> +		desc = rmp_segment_table[i];
> +		if (!desc)
> +			continue;
> +
> +		memset(desc->rmp_entry, 0, desc->size);
> +	}

Why isn't this zeroing out part of alloc_rmp_segment_table()?

> @@ -319,13 +458,29 @@ bool snp_probe_rmptable_info(void)
>  
>  static struct rmpentry_raw *get_raw_rmpentry(u64 pfn)
>  {
> -	if (!rmptable)
> +	struct rmp_segment_desc *desc;
> +	u64 paddr, rst_index, segment_index;

Reverse xmas tree order pls.

> +	if (!rmp_segment_table)
>  		return ERR_PTR(-ENODEV);
>  
> -	if (unlikely(pfn > rmptable_max_pfn))
> +	paddr = pfn << PAGE_SHIFT;
> +
> +	rst_index = RST_ENTRY_INDEX(paddr);
> +	if (unlikely(rst_index >= rst_max_index))
> +		return ERR_PTR(-EFAULT);
> +	rst_index = array_index_nospec(rst_index, rst_max_index);

Same question as above.

> +
> +	desc = rmp_segment_table[rst_index];
> +	if (unlikely(!desc))
>  		return ERR_PTR(-EFAULT);
>  
> -	return rmptable + pfn;
> +	segment_index = RMP_ENTRY_INDEX(paddr);
> +	if (unlikely(segment_index >= desc->max_index))
> +		return ERR_PTR(-EFAULT);
> +	segment_index = array_index_nospec(segment_index, desc->max_index);
> +
> +	return desc->rmp_entry + segment_index;
>  }

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment
Posted by Tom Lendacky 2 weeks, 6 days ago
On 11/4/24 04:33, Borislav Petkov wrote:
> On Mon, Oct 28, 2024 at 02:32:41PM -0500, Tom Lendacky wrote:
>> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
>> index 5871c924c0b2..37ff4f98e8d1 100644
>> --- a/arch/x86/virt/svm/sev.c
>> +++ b/arch/x86/virt/svm/sev.c
>> @@ -18,6 +18,7 @@
>>  #include <linux/cpumask.h>
>>  #include <linux/iommu.h>
>>  #include <linux/amd-iommu.h>
>> +#include <linux/nospec.h>
> 
> What's that include for?
> 
> For array_index_nospec() below?
> 
> It builds without it or did you trigger a build error with some funky .config?

But the define is in linux/nospec.h, so I might only be a side effect of
another include.

> 
>>  #include <asm/sev.h>
>>  #include <asm/processor.h>
>> @@ -77,12 +78,42 @@ struct rmpentry_raw {
>>   */
>>  #define RMPTABLE_CPU_BOOKKEEPING_SZ	0x4000
>>  
>> +/*
>> + * For a non-segmented RMP table, use the maximum physical addressing as the
>> + * segment size in order to always arrive at index 0 in the table.
>> + */
>> +#define RMPTABLE_NON_SEGMENTED_SHIFT	52
>> +
>> +struct rmp_segment_desc {
>> +	struct rmpentry_raw *rmp_entry;
>> +	u64 max_index;
>> +	u64 size;
>> +};
>> +
>> +/*
>> + * Segmented RMP Table support.
>> + *   - The segment size is used for two purposes:
>> + *     - Identify the amount of memory covered by an RMP segment
>> + *     - Quickly locate an RMP segment table entry for a physical address
>> + *
>> + *   - The RMP segment table contains pointers to an RMP table that covers
>> + *     a specific portion of memory. There can be up to 512 8-byte entries,
>> + *     one pages worth.
>> + */
>> +static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
>> +static unsigned int rst_max_index __ro_after_init = 512;
>> +
>> +static u64 rmp_segment_size_max;
> 
> This one is used in a single function. Generate it there pls.

It's used in two functions, alloc_rmp_segment_desc() and
set_rmp_segment_info().

> 
>> +static unsigned int rmp_segment_coverage_shift;
>> +static u64 rmp_segment_coverage_size;
>> +static u64 rmp_segment_coverage_mask;
> 
> That "_coverage_" in there looks redundant to me and could go and make those
> variables shorter.

Sure, I can remove the _coverage_ portion.

> 
>> +#define RST_ENTRY_INDEX(x)	((x) >> rmp_segment_coverage_shift)
>> +#define RMP_ENTRY_INDEX(x)	((u64)(PHYS_PFN((x) & rmp_segment_coverage_mask)))
>> +
>>  /* Mask to apply to a PFN to get the first PFN of a 2MB page */
>>  #define PFN_PMD_MASK	GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
>>  
>>  static u64 probed_rmp_base, probed_rmp_size;
>> -static struct rmpentry_raw *rmptable __ro_after_init;
>> -static u64 rmptable_max_pfn __ro_after_init;
>>  
>>  static LIST_HEAD(snp_leaked_pages_list);
>>  static DEFINE_SPINLOCK(snp_leaked_pages_list_lock);
>> @@ -190,6 +221,92 @@ static bool __init init_rmptable_bookkeeping(void)
>>  	return true;
>>  }
>>  
>> +static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa)
>> +{
>> +	struct rmp_segment_desc *desc;
>> +	void *rmp_segment;
>> +	u64 rst_index;
>> +
>> +	/* Validate the RMP segment size */
>> +	if (segment_size > rmp_segment_size_max) {
>> +		pr_err("Invalid RMP size (%#llx) for configured segment size (%#llx)\n",
>> +		       segment_size, rmp_segment_size_max);
>> +		return false;
>> +	}
>> +
>> +	/* Validate the RMP segment table index */
>> +	rst_index = RST_ENTRY_INDEX(pa);
>> +	if (rst_index >= rst_max_index) {
>> +		pr_err("Invalid RMP segment base address (%#llx) for configured segment size (%#llx)\n",
>> +		       pa, rmp_segment_coverage_size);
>> +		return false;
>> +	}
>> +	rst_index = array_index_nospec(rst_index, rst_max_index);
> 
> Why are we doing this here? Are you expecting some out-of-bounds
> user-controlled values here?
> 
> AFAICT, this is all read from the hw/fw so why are speculative accesses
> a problem?

Yeah, not needed here since this is before userspace has even started.

> 
>> +	if (rmp_segment_table[rst_index]) {
>> +		pr_err("RMP segment descriptor already exists at index %llu\n", rst_index);
>> +		return false;
>> +	}
>> +
>> +	/* Map the RMP entries */
> 
> Kinda obvious...
> 
>> +	rmp_segment = memremap(segment_pa, segment_size, MEMREMAP_WB);
>> +	if (!rmp_segment) {
>> +		pr_err("Failed to map RMP segment addr 0x%llx size 0x%llx\n",
>> +		       segment_pa, segment_size);
>> +		return false;
>> +	}
>> +
>> +	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
> 
> 			sizeof(struct rmp_segment_desc)

Hmmm... I prefer to keep this as sizeof(*desc). If any changes are made
to the structure name, then this line doesn't need to be changed.

> 
> 
>> +	if (!desc) {
>> +		memunmap(rmp_segment);
>> +		return false;
>> +	}
>> +
>> +	desc->rmp_entry = rmp_segment;
>> +	desc->max_index = segment_size / sizeof(*desc->rmp_entry);
>> +	desc->size = segment_size;
>> +
>> +	/* Add the segment descriptor to the table */
>> +	rmp_segment_table[rst_index] = desc;
>> +
>> +	return true;
>> +}
> 
> ...
> 
>> @@ -248,12 +366,20 @@ static int __init snp_rmptable_init(void)
>>  
>>  	/* Zero out the RMP bookkeeping area */
>>  	if (!init_rmptable_bookkeeping()) {
>> -		memunmap(rmptable_start);
>> +		free_rmp_segment_table();
>>  		goto nosnp;
>>  	}
>>  
>>  	/* Zero out the RMP entries */
>> -	memset(rmptable_start, 0, rmptable_size);
>> +	for (i = 0; i < rst_max_index; i++) {
>> +		struct rmp_segment_desc *desc;
>> +
>> +		desc = rmp_segment_table[i];
>> +		if (!desc)
>> +			continue;
>> +
>> +		memset(desc->rmp_entry, 0, desc->size);
>> +	}
> 
> Why isn't this zeroing out part of alloc_rmp_segment_table()?

If the SNP_EN bit is set in the SYSCFG MSR, the code needs to skip the
zeroing of the RMP table since it no longer has write access to the
table (this happens with kexec).

This keeps the code consistent with the prior code as to where the
zeroing occurs. I can add another argument to alloc_rmp_segment_desc()
that tells it whether to clear it or not, if you prefer.

> 
>> @@ -319,13 +458,29 @@ bool snp_probe_rmptable_info(void)
>>  
>>  static struct rmpentry_raw *get_raw_rmpentry(u64 pfn)
>>  {
>> -	if (!rmptable)
>> +	struct rmp_segment_desc *desc;
>> +	u64 paddr, rst_index, segment_index;
> 
> Reverse xmas tree order pls.

Right.

> 
>> +	if (!rmp_segment_table)
>>  		return ERR_PTR(-ENODEV);
>>  
>> -	if (unlikely(pfn > rmptable_max_pfn))
>> +	paddr = pfn << PAGE_SHIFT;
>> +
>> +	rst_index = RST_ENTRY_INDEX(paddr);
>> +	if (unlikely(rst_index >= rst_max_index))
>> +		return ERR_PTR(-EFAULT);
>> +	rst_index = array_index_nospec(rst_index, rst_max_index);
> 
> Same question as above.

This is where I was worried about the VMM/guest being able to get into
this routine with a bad PFN value.

This function is invoked from dump_rmpentry(), which can be invoked from:

rmpupdate() - I think this is safe because the adjust_direct_map() will
fail if the PFN isn't valid, before the RMP is accessed.

snp_leak_pages() - I think this is safe because the PFN is based on an
actual allocation.

snp_dump_hva_rmpentry() - This is called from the page fault handler. I
think this invocation is safe for now because because it is only called
if the fault type is an RMP fault type, which implies that the RMP is
involved. But as an external function, there's no guarantee as to the
situation it can be called from in the future.

I can remove it for now if you think it will be safe going forward.

Thanks,
Tom

> 
>> +
>> +	desc = rmp_segment_table[rst_index];
>> +	if (unlikely(!desc))
>>  		return ERR_PTR(-EFAULT);
>>  
>> -	return rmptable + pfn;
>> +	segment_index = RMP_ENTRY_INDEX(paddr);
>> +	if (unlikely(segment_index >= desc->max_index))
>> +		return ERR_PTR(-EFAULT);
>> +	segment_index = array_index_nospec(segment_index, desc->max_index);
>> +
>> +	return desc->rmp_entry + segment_index;
>>  }
>
Re: [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment
Posted by Borislav Petkov 2 weeks, 2 days ago
On Mon, Nov 04, 2024 at 10:03:22AM -0600, Tom Lendacky wrote:
> >> +	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
> > 
> > 			sizeof(struct rmp_segment_desc)
> 
> Hmmm... I prefer to keep this as sizeof(*desc). If any changes are made
> to the structure name, then this line doesn't need to be changed.

Oh boy, we really do that:

from Documentation/process/coding-style.rst

"The preferred form for passing a size of a struct is the following:

	p = kmalloc(sizeof(*p), ...);

The alternative form where struct name is spelled out hurts readability and
introduces an opportunity for a bug when the pointer variable type is changed
but the corresponding sizeof that is passed to a memory allocator is not."

Well, my problem with using the variable name as sizeof() argument is that you
need to go and look at what type it is to know what you're sizing. And it
doesn't really hurt readability - on the contrary - it makes it more readable.

/facepalm.

> > Why isn't this zeroing out part of alloc_rmp_segment_table()?

First of all:

diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 37ff4f98e8d1..1ae782194626 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -293,18 +293,16 @@ static void __init free_rmp_segment_table(void)
 	rmp_segment_table = NULL;
 }
 
-static bool __init alloc_rmp_segment_table(void)
+static struct __init rmp_segment_desc **alloc_rmp_segment_table(void)
 {
 	struct page *page;
 
 	/* Allocate the table used to index into the RMP segments */
 	page = alloc_page(__GFP_ZERO);
 	if (!page)
-		return false;
-
-	rmp_segment_table = page_address(page);
+		return NULL;
 
-	return true;
+	return page_address(page);
 }
 
 /*
@@ -344,7 +342,8 @@ static int __init snp_rmptable_init(void)
 		goto nosnp;
 	}
 
-	if (!alloc_rmp_segment_table())
+	rmp_segment_table = alloc_rmp_segment_table();
+	if (!rmp_segment_table)
 		goto nosnp;
 
 	/* Map only the RMP entries */
---

which makes the code a lot more readable and natural.

> If the SNP_EN bit is set in the SYSCFG MSR, the code needs to skip the
> zeroing of the RMP table since it no longer has write access to the
> table (this happens with kexec).

So we allocate a rmp_segment_table page when we kexec but we can't write the
entries? Why?

This sounds like some weird limitation. Or maybe I'm missing something.

> This keeps the code consistent with the prior code as to where the
> zeroing occurs. I can add another argument to alloc_rmp_segment_desc()
> that tells it whether to clear it or not, if you prefer.

You can also merge that init_rmptable_bookkeeping() and the zeroing of the RMP
entries into one function because they happen back-to-back.

In any case the placement of this whole dance still doesn't look completely
optimal to me.

> This is where I was worried about the VMM/guest being able to get into
> this routine with a bad PFN value.
> 
> This function is invoked from dump_rmpentry(), which can be invoked from:
> 
> rmpupdate() - I think this is safe because the adjust_direct_map() will
> fail if the PFN isn't valid, before the RMP is accessed.
> 
> snp_leak_pages() - I think this is safe because the PFN is based on an
> actual allocation.
> 
> snp_dump_hva_rmpentry() - This is called from the page fault handler. I
> think this invocation is safe for now because because it is only called
> if the fault type is an RMP fault type, which implies that the RMP is
> involved. But as an external function, there's no guarantee as to the
> situation it can be called from in the future.
> 
> I can remove it for now if you think it will be safe going forward.

I like being cautious and you probably should put this deliberation above the
array_index_nospec() so that it is clear to future generations reading this
code.

And yeah, it might be safe but it is not like it costs compared to the
remaining operations happening here so maybe, out of abundance of caution, we
should keep it there and explain why we do so.

Hmmm?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment
Posted by Borislav Petkov 2 weeks, 6 days ago
On Mon, Nov 04, 2024 at 10:03:22AM -0600, Tom Lendacky wrote:
> >> +static u64 rmp_segment_size_max;
> > 
> > This one is used in a single function. Generate it there pls.
> 
> It's used in two functions, alloc_rmp_segment_desc() and
> set_rmp_segment_info().

You can always lift it up later if it is needed more than now:

diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 37ff4f98e8d1..77bdf4d6a8f4 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -103,7 +103,6 @@ struct rmp_segment_desc {
 static struct rmp_segment_desc **rmp_segment_table __ro_after_init;
 static unsigned int rst_max_index __ro_after_init = 512;
 
-static u64 rmp_segment_size_max;
 static unsigned int rmp_segment_coverage_shift;
 static u64 rmp_segment_coverage_size;
 static u64 rmp_segment_coverage_mask;
@@ -223,9 +222,13 @@ static bool __init init_rmptable_bookkeeping(void)
 
 static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa)
 {
+	u64 rst_index, rmp_segment_size_max;
 	struct rmp_segment_desc *desc;
 	void *rmp_segment;
-	u64 rst_index;
+
+	/* Calculate the maximum size an RMP can be (16 bytes/page mapped) */
+	rmp_segment_size_max = PHYS_PFN(rmp_segment_coverage_size);
+	rmp_segment_size_max <<= 4;
 
 	/* Validate the RMP segment size */
 	if (segment_size > rmp_segment_size_max) {
@@ -415,10 +418,6 @@ static void set_rmp_segment_info(unsigned int segment_shift)
 	rmp_segment_coverage_shift = segment_shift;
 	rmp_segment_coverage_size  = 1ULL << rmp_segment_coverage_shift;
 	rmp_segment_coverage_mask  = rmp_segment_coverage_size - 1;
-
-	/* Calculate the maximum size an RMP can be (16 bytes/page mapped) */
-	rmp_segment_size_max = PHYS_PFN(rmp_segment_coverage_size);
-	rmp_segment_size_max <<= 4;
 }
 
 #define RMP_ADDR_MASK GENMASK_ULL(51, 13)


-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH v5 6/8] x86/sev: Treat the contiguous RMP table as a single RMP segment
Posted by Tom Lendacky 2 weeks, 6 days ago
On 11/4/24 11:05, Borislav Petkov wrote:
> On Mon, Nov 04, 2024 at 10:03:22AM -0600, Tom Lendacky wrote:
>>>> +static u64 rmp_segment_size_max;
>>>
>>> This one is used in a single function. Generate it there pls.
>>
>> It's used in two functions, alloc_rmp_segment_desc() and
>> set_rmp_segment_info().
> 
> You can always lift it up later if it is needed more than now:

Will do.

Thanks,
Tom

>