[PATCH v3 1/8] x86/sev: Prepare for using the RMPREAD instruction to access the RMP

Tom Lendacky posted 8 patches 1 month, 4 weeks ago
There is a newer version of this series
[PATCH v3 1/8] x86/sev: Prepare for using the RMPREAD instruction to access the RMP
Posted by Tom Lendacky 1 month, 4 weeks ago
The RMPREAD instruction returns an architecture defined format of an
RMP entry. This is the preferred method for examining RMP entries.

In preparation for using the RMPREAD instruction, convert the existing
code that directly accesses the RMP to map the raw RMP information into
the architecture defined format.

RMPREAD output returns a status bit for the 2MB region status. If the
input page address is 2MB aligned and any other pages within the 2MB
region are assigned, then 2MB region status will be set to 1. Otherwise,
the 2MB region status will be set to 0. For systems that do not support
RMPREAD, calculating this value would require looping over all of the RMP
table entries within that range until one is found with the assigned bit
set. Since this bit is not defined in the current format, and so not used
today, do not incur the overhead associated with calculating it.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/virt/svm/sev.c | 141 ++++++++++++++++++++++++++++------------
 1 file changed, 98 insertions(+), 43 deletions(-)

diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 0ce17766c0e5..103a2dd6e81d 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -30,11 +30,27 @@
 #include <asm/cmdline.h>
 #include <asm/iommu.h>
 
+/*
+ * The RMP entry format as returned by the RMPREAD instruction.
+ */
+struct rmpentry {
+	u64 gpa;
+	u8  assigned		:1,
+	    rsvd1		:7;
+	u8  pagesize		:1,
+	    hpage_region_status	:1,
+	    rsvd2		:6;
+	u8  immutable		:1,
+	    rsvd3		:7;
+	u8  rsvd4;
+	u32 asid;
+} __packed;
+
 /*
  * The RMP entry format is not architectural. The format is defined in PPR
  * Family 19h Model 01h, Rev B1 processor.
  */
-struct rmpentry {
+struct rmpentry_raw {
 	union {
 		struct {
 			u64 assigned	: 1,
@@ -62,7 +78,7 @@ struct rmpentry {
 #define PFN_PMD_MASK	GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
 
 static u64 probed_rmp_base, probed_rmp_size;
-static struct rmpentry *rmptable __ro_after_init;
+static struct rmpentry_raw *rmptable __ro_after_init;
 static u64 rmptable_max_pfn __ro_after_init;
 
 static LIST_HEAD(snp_leaked_pages_list);
@@ -247,8 +263,8 @@ static int __init snp_rmptable_init(void)
 	rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ;
 	rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
 
-	rmptable = (struct rmpentry *)rmptable_start;
-	rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1;
+	rmptable = (struct rmpentry_raw *)rmptable_start;
+	rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry_raw) - 1;
 
 	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
 
@@ -270,48 +286,77 @@ static int __init snp_rmptable_init(void)
  */
 device_initcall(snp_rmptable_init);
 
-static struct rmpentry *get_rmpentry(u64 pfn)
+static struct rmpentry_raw *__get_rmpentry(unsigned long pfn)
 {
-	if (WARN_ON_ONCE(pfn > rmptable_max_pfn))
-		return ERR_PTR(-EFAULT);
-
-	return &rmptable[pfn];
-}
-
-static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level)
-{
-	struct rmpentry *large_entry, *entry;
-
-	if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+	if (!rmptable)
 		return ERR_PTR(-ENODEV);
 
-	entry = get_rmpentry(pfn);
-	if (IS_ERR(entry))
-		return entry;
+	if (unlikely(pfn > rmptable_max_pfn))
+		return ERR_PTR(-EFAULT);
+
+	return rmptable + pfn;
+}
+
+static int get_rmpentry(u64 pfn, struct rmpentry *entry)
+{
+	struct rmpentry_raw *e;
+
+	e = __get_rmpentry(pfn);
+	if (IS_ERR(e))
+		return PTR_ERR(e);
+
+	/*
+	 * Map the RMP table entry onto the RMPREAD output format.
+	 * The 2MB region status indicator (hpage_region_status field) is not
+	 * calculated, since the overhead could be significant and the field
+	 * is not used.
+	 */
+	memset(entry, 0, sizeof(*entry));
+	entry->gpa       = e->gpa << PAGE_SHIFT;
+	entry->asid      = e->asid;
+	entry->assigned  = e->assigned;
+	entry->pagesize  = e->pagesize;
+	entry->immutable = e->immutable;
+
+	return 0;
+}
+
+static int __snp_lookup_rmpentry(u64 pfn, struct rmpentry *entry, int *level)
+{
+	struct rmpentry large_entry;
+	int ret;
+
+	if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+		return -ENODEV;
+
+	ret = get_rmpentry(pfn, entry);
+	if (ret)
+		return ret;
 
 	/*
 	 * Find the authoritative RMP entry for a PFN. This can be either a 4K
 	 * RMP entry or a special large RMP entry that is authoritative for a
 	 * whole 2M area.
 	 */
-	large_entry = get_rmpentry(pfn & PFN_PMD_MASK);
-	if (IS_ERR(large_entry))
-		return large_entry;
+	ret = get_rmpentry(pfn & PFN_PMD_MASK, &large_entry);
+	if (ret)
+		return ret;
 
-	*level = RMP_TO_PG_LEVEL(large_entry->pagesize);
+	*level = RMP_TO_PG_LEVEL(large_entry.pagesize);
 
-	return entry;
+	return 0;
 }
 
 int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level)
 {
-	struct rmpentry *e;
+	struct rmpentry e;
+	int ret;
 
-	e = __snp_lookup_rmpentry(pfn, level);
-	if (IS_ERR(e))
-		return PTR_ERR(e);
+	ret = __snp_lookup_rmpentry(pfn, &e, level);
+	if (ret)
+		return ret;
 
-	*assigned = !!e->assigned;
+	*assigned = !!e.assigned;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
@@ -324,20 +369,28 @@ EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
  */
 static void dump_rmpentry(u64 pfn)
 {
+	struct rmpentry_raw *e_raw;
 	u64 pfn_i, pfn_end;
-	struct rmpentry *e;
-	int level;
+	struct rmpentry e;
+	int level, ret;
 
-	e = __snp_lookup_rmpentry(pfn, &level);
-	if (IS_ERR(e)) {
-		pr_err("Failed to read RMP entry for PFN 0x%llx, error %ld\n",
-		       pfn, PTR_ERR(e));
+	ret = __snp_lookup_rmpentry(pfn, &e, &level);
+	if (ret) {
+		pr_err("Failed to read RMP entry for PFN 0x%llx, error %d\n",
+		       pfn, ret);
 		return;
 	}
 
-	if (e->assigned) {
+	if (e.assigned) {
+		e_raw = __get_rmpentry(pfn);
+		if (IS_ERR(e_raw)) {
+			pr_err("Failed to read RMP contents for PFN 0x%llx, error %ld\n",
+			       pfn, PTR_ERR(e_raw));
+			return;
+		}
+
 		pr_info("PFN 0x%llx, RMP entry: [0x%016llx - 0x%016llx]\n",
-			pfn, e->lo, e->hi);
+			pfn, e_raw->lo, e_raw->hi);
 		return;
 	}
 
@@ -356,16 +409,18 @@ static void dump_rmpentry(u64 pfn)
 		pfn, pfn_i, pfn_end);
 
 	while (pfn_i < pfn_end) {
-		e = __snp_lookup_rmpentry(pfn_i, &level);
-		if (IS_ERR(e)) {
-			pr_err("Error %ld reading RMP entry for PFN 0x%llx\n",
-			       PTR_ERR(e), pfn_i);
+		e_raw = __get_rmpentry(pfn_i);
+		if (IS_ERR(e_raw)) {
+			pr_err("Error %ld reading RMP contents for PFN 0x%llx\n",
+			       PTR_ERR(e_raw), pfn_i);
 			pfn_i++;
 			continue;
 		}
 
-		if (e->lo || e->hi)
-			pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e->lo, e->hi);
+		if (e_raw->lo || e_raw->hi)
+			pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n",
+				pfn_i, e_raw->lo, e_raw->hi);
+
 		pfn_i++;
 	}
 }
-- 
2.43.2
Re: [PATCH v3 1/8] x86/sev: Prepare for using the RMPREAD instruction to access the RMP
Posted by Neeraj Upadhyay 1 month, 1 week ago

...

> +
> +static int get_rmpentry(u64 pfn, struct rmpentry *entry)
> +{
> +	struct rmpentry_raw *e;
> +
> +	e = __get_rmpentry(pfn);
> +	if (IS_ERR(e))
> +		return PTR_ERR(e);
> +
> +	/*
> +	 * Map the RMP table entry onto the RMPREAD output format.
> +	 * The 2MB region status indicator (hpage_region_status field) is not
> +	 * calculated, since the overhead could be significant and the field
> +	 * is not used.
> +	 */
> +	memset(entry, 0, sizeof(*entry));
> +	entry->gpa       = e->gpa << PAGE_SHIFT;

Nit: Do we need to use PAGE_SHIFT here or hard code the shift to 12?


- Neeraj

> +	entry->asid      = e->asid;
> +	entry->assigned  = e->assigned;
> +	entry->pagesize  = e->pagesize;
> +	entry->immutable = e->immutable;
> +
> +	return 0;
> +}
> +
Re: [PATCH v3 1/8] x86/sev: Prepare for using the RMPREAD instruction to access the RMP
Posted by Nikunj A. Dadhania 1 month, 1 week ago
On 9/30/2024 8:52 PM, Tom Lendacky wrote:
> The RMPREAD instruction returns an architecture defined format of an
> RMP entry. This is the preferred method for examining RMP entries.
> 
> In preparation for using the RMPREAD instruction, convert the existing
> code that directly accesses the RMP to map the raw RMP information into
> the architecture defined format.
> 
> RMPREAD output returns a status bit for the 2MB region status. If the
> input page address is 2MB aligned and any other pages within the 2MB
> region are assigned, then 2MB region status will be set to 1. Otherwise,
> the 2MB region status will be set to 0. For systems that do not support
> RMPREAD, calculating this value would require looping over all of the RMP
> table entries within that range until one is found with the assigned bit
> set. Since this bit is not defined in the current format, and so not used
> today, do not incur the overhead associated with calculating it.
> 
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> ---
>  arch/x86/virt/svm/sev.c | 141 ++++++++++++++++++++++++++++------------
>  1 file changed, 98 insertions(+), 43 deletions(-)
> 
> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
> index 0ce17766c0e5..103a2dd6e81d 100644
> --- a/arch/x86/virt/svm/sev.c
> +++ b/arch/x86/virt/svm/sev.c
> @@ -30,11 +30,27 @@
>  #include <asm/cmdline.h>
>  #include <asm/iommu.h>
>  
> +/*
> + * The RMP entry format as returned by the RMPREAD instruction.
> + */
> +struct rmpentry {
> +	u64 gpa;
> +	u8  assigned		:1,
> +	    rsvd1		:7;
> +	u8  pagesize		:1,
> +	    hpage_region_status	:1,
> +	    rsvd2		:6;
> +	u8  immutable		:1,
> +	    rsvd3		:7;
> +	u8  rsvd4;
> +	u32 asid;
> +} __packed;
> +
>  /*
>   * The RMP entry format is not architectural. The format is defined in PPR
>   * Family 19h Model 01h, Rev B1 processor.
>   */
> -struct rmpentry {
> +struct rmpentry_raw {
>  	union {
>  		struct {
>  			u64 assigned	: 1,
> @@ -62,7 +78,7 @@ struct rmpentry {
>  #define PFN_PMD_MASK	GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT)
>  
>  static u64 probed_rmp_base, probed_rmp_size;
> -static struct rmpentry *rmptable __ro_after_init;
> +static struct rmpentry_raw *rmptable __ro_after_init;
>  static u64 rmptable_max_pfn __ro_after_init;
>  
>  static LIST_HEAD(snp_leaked_pages_list);
> @@ -247,8 +263,8 @@ static int __init snp_rmptable_init(void)
>  	rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ;
>  	rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ;
>  
> -	rmptable = (struct rmpentry *)rmptable_start;
> -	rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1;
> +	rmptable = (struct rmpentry_raw *)rmptable_start;
> +	rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry_raw) - 1;
>  
>  	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
>  
> @@ -270,48 +286,77 @@ static int __init snp_rmptable_init(void)
>   */
>  device_initcall(snp_rmptable_init);
>  
> -static struct rmpentry *get_rmpentry(u64 pfn)
> +static struct rmpentry_raw *__get_rmpentry(unsigned long pfn)

pfn type has changed from u64 => unsigned long, is this intentional ?

>  {
> -	if (WARN_ON_ONCE(pfn > rmptable_max_pfn))
> -		return ERR_PTR(-EFAULT);
> -
> -	return &rmptable[pfn];
> -}
> -
> -static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level)
> -{
> -	struct rmpentry *large_entry, *entry;
> -
> -	if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
> +	if (!rmptable)
>  		return ERR_PTR(-ENODEV);
>  
> -	entry = get_rmpentry(pfn);
> -	if (IS_ERR(entry))
> -		return entry;
> +	if (unlikely(pfn > rmptable_max_pfn))
> +		return ERR_PTR(-EFAULT);
> +
> +	return rmptable + pfn;
> +}
> +
> +static int get_rmpentry(u64 pfn, struct rmpentry *entry)
> +{
> +	struct rmpentry_raw *e;
> +
> +	e = __get_rmpentry(pfn);
> +	if (IS_ERR(e))
> +		return PTR_ERR(e);
> +
> +	/*
> +	 * Map the RMP table entry onto the RMPREAD output format.
> +	 * The 2MB region status indicator (hpage_region_status field) is not
> +	 * calculated, since the overhead could be significant and the field
> +	 * is not used.
> +	 */
> +	memset(entry, 0, sizeof(*entry));
> +	entry->gpa       = e->gpa << PAGE_SHIFT;
> +	entry->asid      = e->asid;
> +	entry->assigned  = e->assigned;
> +	entry->pagesize  = e->pagesize;
> +	entry->immutable = e->immutable;
> +
> +	return 0;
> +}
> +
> +static int __snp_lookup_rmpentry(u64 pfn, struct rmpentry *entry, int *level)
> +{
> +	struct rmpentry large_entry;
> +	int ret;
> +
> +	if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
> +		return -ENODEV;

Can we rely on rmp_table check in __get_rmpentry() and remove the above check ?
If rmp_table is NULL, CC_ATTR_HOST_SEV_SNP is always cleared.

> +
> +	ret = get_rmpentry(pfn, entry);
> +	if (ret)
> +		return ret;

Regards
Nikunj
Re: [PATCH v3 1/8] x86/sev: Prepare for using the RMPREAD instruction to access the RMP
Posted by Tom Lendacky 1 month, 1 week ago
On 10/16/24 03:52, Nikunj A. Dadhania wrote:
> On 9/30/2024 8:52 PM, Tom Lendacky wrote:
>> The RMPREAD instruction returns an architecture defined format of an
>> RMP entry. This is the preferred method for examining RMP entries.
>>
>> In preparation for using the RMPREAD instruction, convert the existing
>> code that directly accesses the RMP to map the raw RMP information into
>> the architecture defined format.
>>
>> RMPREAD output returns a status bit for the 2MB region status. If the
>> input page address is 2MB aligned and any other pages within the 2MB
>> region are assigned, then 2MB region status will be set to 1. Otherwise,
>> the 2MB region status will be set to 0. For systems that do not support
>> RMPREAD, calculating this value would require looping over all of the RMP
>> table entries within that range until one is found with the assigned bit
>> set. Since this bit is not defined in the current format, and so not used
>> today, do not incur the overhead associated with calculating it.
>>
>> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
>> ---
>>  arch/x86/virt/svm/sev.c | 141 ++++++++++++++++++++++++++++------------
>>  1 file changed, 98 insertions(+), 43 deletions(-)
>>

>> -static struct rmpentry *get_rmpentry(u64 pfn)
>> +static struct rmpentry_raw *__get_rmpentry(unsigned long pfn)
> 
> pfn type has changed from u64 => unsigned long, is this intentional ?

No, not intentional, I'm just used to pfn's being unsigned longs... good
catch.

> 

>> +static int __snp_lookup_rmpentry(u64 pfn, struct rmpentry *entry, int *level)
>> +{
>> +	struct rmpentry large_entry;
>> +	int ret;
>> +
>> +	if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
>> +		return -ENODEV;
> 
> Can we rely on rmp_table check in __get_rmpentry() and remove the above check ?
> If rmp_table is NULL, CC_ATTR_HOST_SEV_SNP is always cleared.

I'm trying to not change the logic and just add the new struct usage.
Once RMPREAD is used there is no checking of the table address and if
SNP is not enabled in the SYSCFG MSR the instruction will #UD.

The table address check is just to ensure we don't accidentally call
this function without checking CC_ATTR_HOST_SEV_SNP in the future to
avoid a possible crash. If anything, I can remove the table address
check that I added here, but I would like to keep it just to be safe.

Thanks,
Tom

> 
>> +
>> +	ret = get_rmpentry(pfn, entry);
>> +	if (ret)
>> +		return ret;
> 
> Regards
> Nikunj
Re: [PATCH v3 1/8] x86/sev: Prepare for using the RMPREAD instruction to access the RMP
Posted by Nikunj A. Dadhania 1 month, 1 week ago

On 10/16/2024 8:13 PM, Tom Lendacky wrote:
> On 10/16/24 03:52, Nikunj A. Dadhania wrote:
>> On 9/30/2024 8:52 PM, Tom Lendacky wrote:
>>> The RMPREAD instruction returns an architecture defined format of an
>>> RMP entry. This is the preferred method for examining RMP entries.
>>>
 
>>> +static int __snp_lookup_rmpentry(u64 pfn, struct rmpentry *entry, int *level)
>>> +{
>>> +	struct rmpentry large_entry;
>>> +	int ret;
>>> +
>>> +	if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
>>> +		return -ENODEV;
>>
>> Can we rely on rmp_table check in __get_rmpentry() and remove the above check ?
>> If rmp_table is NULL, CC_ATTR_HOST_SEV_SNP is always cleared.
> 
> I'm trying to not change the logic and just add the new struct usage.
> Once RMPREAD is used there is no checking of the table address and if
> SNP is not enabled in the SYSCFG MSR the instruction will #UD.

Sure, makes sense.
 
> The table address check is just to ensure we don't accidentally call
> this function without checking CC_ATTR_HOST_SEV_SNP in the future to
> avoid a possible crash. If anything, I can remove the table address
> check that I added here, but I would like to keep it just to be safe.

Regards
Nikunj