[PATCH v2] x86/AMD: make HT range dynamic for Fam17 and up

Jan Beulich posted 1 patch 2 years, 10 months ago
Test gitlab-ci failed
Failed in applying to current master (apply log)
[PATCH v2] x86/AMD: make HT range dynamic for Fam17 and up
Posted by Jan Beulich 2 years, 10 months ago
At the time of d838ac2539cf ("x86: don't allow Dom0 access to the HT
address range") documentation correctly stated that the range was
completely fixed. For Fam17 and newer, it lives at the top of physical
address space, though.

To correctly determine the top of physical address space, we need to
account for their physical address reduction, hence the calculation of
paddr_bits also gets adjusted.

While for paddr_bits < 40 the HT range is completely hidden, there's no
need to suppress the range insertion in that case: It'll just have no
real meaning.

Reported-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Move adjustment last, to leave hap_paddr_bits unaffected. Add
    comment.

--- a/xen/arch/x86/cpu/common.c
+++ b/xen/arch/x86/cpu/common.c
@@ -349,16 +349,23 @@ void __init early_cpu_init(void)
 
 	eax = cpuid_eax(0x80000000);
 	if ((eax >> 16) == 0x8000 && eax >= 0x80000008) {
+		ebx = eax >= 0x8000001f ? cpuid_ebx(0x8000001f) : 0;
 		eax = cpuid_eax(0x80000008);
+
 		paddr_bits = eax & 0xff;
 		if (paddr_bits > PADDR_BITS)
 			paddr_bits = PADDR_BITS;
+
 		vaddr_bits = (eax >> 8) & 0xff;
 		if (vaddr_bits > VADDR_BITS)
 			vaddr_bits = VADDR_BITS;
+
 		hap_paddr_bits = ((eax >> 16) & 0xff) ?: paddr_bits;
 		if (hap_paddr_bits > PADDR_BITS)
 			hap_paddr_bits = PADDR_BITS;
+
+		/* Account for SME's physical address space reduction. */
+		paddr_bits -= (ebx >> 6) & 0x3f;
 	}
 
 	if (!(c->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)))
--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -524,8 +524,11 @@ int __init dom0_setup_permissions(struct
                                          MSI_ADDR_DEST_ID_MASK));
     /* HyperTransport range. */
     if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
-        rc |= iomem_deny_access(d, paddr_to_pfn(0xfdULL << 32),
-                                paddr_to_pfn((1ULL << 40) - 1));
+    {
+        mfn = paddr_to_pfn(1UL <<
+                           (boot_cpu_data.x86 < 0x17 ? 40 : paddr_bits));
+        rc |= iomem_deny_access(d, mfn - paddr_to_pfn(3UL << 32), mfn - 1);
+    }
 
     /* Remove access to E820_UNUSABLE I/O regions above 1MB. */
     for ( i = 0; i < e820.nr_map; i++ )


Ping: [PATCH v2] x86/AMD: make HT range dynamic for Fam17 and up
Posted by Jan Beulich 2 years, 6 months ago
On 28.06.2021 13:48, Jan Beulich wrote:
> At the time of d838ac2539cf ("x86: don't allow Dom0 access to the HT
> address range") documentation correctly stated that the range was
> completely fixed. For Fam17 and newer, it lives at the top of physical
> address space, though.
> 
> To correctly determine the top of physical address space, we need to
> account for their physical address reduction, hence the calculation of
> paddr_bits also gets adjusted.
> 
> While for paddr_bits < 40 the HT range is completely hidden, there's no
> need to suppress the range insertion in that case: It'll just have no
> real meaning.
> 
> Reported-by: Igor Druzhinin <igor.druzhinin@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v2: Move adjustment last, to leave hap_paddr_bits unaffected. Add
>     comment.

If there are any further adjustments needed here (or if the whole
situation needs to be addressed differently altogether), could
respective requests please be voiced? Or else may I please ask for
an ack?

Jan

> --- a/xen/arch/x86/cpu/common.c
> +++ b/xen/arch/x86/cpu/common.c
> @@ -349,16 +349,23 @@ void __init early_cpu_init(void)
>  
>  	eax = cpuid_eax(0x80000000);
>  	if ((eax >> 16) == 0x8000 && eax >= 0x80000008) {
> +		ebx = eax >= 0x8000001f ? cpuid_ebx(0x8000001f) : 0;
>  		eax = cpuid_eax(0x80000008);
> +
>  		paddr_bits = eax & 0xff;
>  		if (paddr_bits > PADDR_BITS)
>  			paddr_bits = PADDR_BITS;
> +
>  		vaddr_bits = (eax >> 8) & 0xff;
>  		if (vaddr_bits > VADDR_BITS)
>  			vaddr_bits = VADDR_BITS;
> +
>  		hap_paddr_bits = ((eax >> 16) & 0xff) ?: paddr_bits;
>  		if (hap_paddr_bits > PADDR_BITS)
>  			hap_paddr_bits = PADDR_BITS;
> +
> +		/* Account for SME's physical address space reduction. */
> +		paddr_bits -= (ebx >> 6) & 0x3f;
>  	}
>  
>  	if (!(c->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)))
> --- a/xen/arch/x86/dom0_build.c
> +++ b/xen/arch/x86/dom0_build.c
> @@ -524,8 +524,11 @@ int __init dom0_setup_permissions(struct
>                                           MSI_ADDR_DEST_ID_MASK));
>      /* HyperTransport range. */
>      if ( boot_cpu_data.x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON) )
> -        rc |= iomem_deny_access(d, paddr_to_pfn(0xfdULL << 32),
> -                                paddr_to_pfn((1ULL << 40) - 1));
> +    {
> +        mfn = paddr_to_pfn(1UL <<
> +                           (boot_cpu_data.x86 < 0x17 ? 40 : paddr_bits));
> +        rc |= iomem_deny_access(d, mfn - paddr_to_pfn(3UL << 32), mfn - 1);
> +    }
>  
>      /* Remove access to E820_UNUSABLE I/O regions above 1MB. */
>      for ( i = 0; i < e820.nr_map; i++ )
> 
> 


Re: [PATCH v2] x86/AMD: make HT range dynamic for Fam17 and up
Posted by Roger Pau Monné 2 years, 6 months ago
On Mon, Jun 28, 2021 at 01:48:53PM +0200, Jan Beulich wrote:
> At the time of d838ac2539cf ("x86: don't allow Dom0 access to the HT
> address range") documentation correctly stated that the range was
> completely fixed. For Fam17 and newer, it lives at the top of physical
> address space, though.
> 
> To correctly determine the top of physical address space, we need to
> account for their physical address reduction, hence the calculation of
> paddr_bits also gets adjusted.
> 
> While for paddr_bits < 40 the HT range is completely hidden, there's no
> need to suppress the range insertion in that case: It'll just have no
> real meaning.
> 
> Reported-by: Igor Druzhinin <igor.druzhinin@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

> ---
> v2: Move adjustment last, to leave hap_paddr_bits unaffected. Add
>     comment.
> 
> --- a/xen/arch/x86/cpu/common.c
> +++ b/xen/arch/x86/cpu/common.c
> @@ -349,16 +349,23 @@ void __init early_cpu_init(void)
>  
>  	eax = cpuid_eax(0x80000000);
>  	if ((eax >> 16) == 0x8000 && eax >= 0x80000008) {
> +		ebx = eax >= 0x8000001f ? cpuid_ebx(0x8000001f) : 0;
>  		eax = cpuid_eax(0x80000008);
> +
>  		paddr_bits = eax & 0xff;
>  		if (paddr_bits > PADDR_BITS)
>  			paddr_bits = PADDR_BITS;
> +
>  		vaddr_bits = (eax >> 8) & 0xff;
>  		if (vaddr_bits > VADDR_BITS)
>  			vaddr_bits = VADDR_BITS;
> +
>  		hap_paddr_bits = ((eax >> 16) & 0xff) ?: paddr_bits;
>  		if (hap_paddr_bits > PADDR_BITS)
>  			hap_paddr_bits = PADDR_BITS;
> +
> +		/* Account for SME's physical address space reduction. */
> +		paddr_bits -= (ebx >> 6) & 0x3f;

Does it make sense to check for 0x8000001f[eax] bit 0 in order to
assert that there's support for SME, or assuming that the reduction is
!= 0 in the other cpuid leaf is enough.

It's possible for firmware vendors to disable advertising the SME
support bit and leave the physical address space reduction one in
place?

Thanks, Roger.

Re: [PATCH v2] x86/AMD: make HT range dynamic for Fam17 and up
Posted by Jan Beulich 2 years, 6 months ago
On 18.10.2021 11:41, Roger Pau Monné wrote:
> On Mon, Jun 28, 2021 at 01:48:53PM +0200, Jan Beulich wrote:
>> At the time of d838ac2539cf ("x86: don't allow Dom0 access to the HT
>> address range") documentation correctly stated that the range was
>> completely fixed. For Fam17 and newer, it lives at the top of physical
>> address space, though.
>>
>> To correctly determine the top of physical address space, we need to
>> account for their physical address reduction, hence the calculation of
>> paddr_bits also gets adjusted.
>>
>> While for paddr_bits < 40 the HT range is completely hidden, there's no
>> need to suppress the range insertion in that case: It'll just have no
>> real meaning.
>>
>> Reported-by: Igor Druzhinin <igor.druzhinin@citrix.com>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks, but before applying this I'd prefer to resolve your concern
voiced below.

>> --- a/xen/arch/x86/cpu/common.c
>> +++ b/xen/arch/x86/cpu/common.c
>> @@ -349,16 +349,23 @@ void __init early_cpu_init(void)
>>  
>>  	eax = cpuid_eax(0x80000000);
>>  	if ((eax >> 16) == 0x8000 && eax >= 0x80000008) {
>> +		ebx = eax >= 0x8000001f ? cpuid_ebx(0x8000001f) : 0;
>>  		eax = cpuid_eax(0x80000008);
>> +
>>  		paddr_bits = eax & 0xff;
>>  		if (paddr_bits > PADDR_BITS)
>>  			paddr_bits = PADDR_BITS;
>> +
>>  		vaddr_bits = (eax >> 8) & 0xff;
>>  		if (vaddr_bits > VADDR_BITS)
>>  			vaddr_bits = VADDR_BITS;
>> +
>>  		hap_paddr_bits = ((eax >> 16) & 0xff) ?: paddr_bits;
>>  		if (hap_paddr_bits > PADDR_BITS)
>>  			hap_paddr_bits = PADDR_BITS;
>> +
>> +		/* Account for SME's physical address space reduction. */
>> +		paddr_bits -= (ebx >> 6) & 0x3f;
> 
> Does it make sense to check for 0x8000001f[eax] bit 0 in order to
> assert that there's support for SME, or assuming that the reduction is
> != 0 in the other cpuid leaf is enough.

Documentation doesn't really tie them together afaics, so I thought
I wouldn't either. I was reading into this lack of an explicit
connection the possibility of address space reduction to also,
hypothetically at this point, apply to other features.

> It's possible for firmware vendors to disable advertising the SME
> support bit and leave the physical address space reduction one in
> place?

I don't know if it's possible (I'm unaware of e.g. MSR-level control
allowing to modify these independently), but if it is I'd consider
it inconsistent if one but not the other was zapped. I'm unconvinced
that we really would need to deal with such inconsistencies, the
more that it's not really clear what the inconsistent setting would
really mean for the placement of the HT range.

While writing this, there was one more thing I came to think of:
Should we perhaps suppress the iomem_deny_access() altogether when
running virtualized ourselves?

Jan