[PATCH] x86/kvm: Override default caching mode for SEV-SNP and TDX

Kirill A. Shutemov posted 1 patch 1 year, 3 months ago
arch/x86/kernel/kvm.c | 4 ++++
1 file changed, 4 insertions(+)
[PATCH] x86/kvm: Override default caching mode for SEV-SNP and TDX
Posted by Kirill A. Shutemov 1 year, 3 months ago
AMD SEV-SNP and Intel TDX have limited access to MTRR: either it is not
advertised in CPUID or it cannot be programmed (on TDX, due to #VE on
CR0.CD clear).

This results in guests using uncached mappings where it shouldn't and
pmd/pud_set_huge() failures due to non-uniform memory type reported by
mtrr_type_lookup().

Override MTRR state, making it WB by default as the kernel does for
Hyper-V guests.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Suggested-by: Binbin Wu <binbin.wu@intel.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/kernel/kvm.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 263f8aed4e2c..21e9e4845354 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -37,6 +37,7 @@
 #include <asm/apic.h>
 #include <asm/apicdef.h>
 #include <asm/hypervisor.h>
+#include <asm/mtrr.h>
 #include <asm/tlb.h>
 #include <asm/cpuidle_haltpoll.h>
 #include <asm/ptrace.h>
@@ -980,6 +981,9 @@ static void __init kvm_init_platform(void)
 	}
 	kvmclock_init();
 	x86_platform.apic_post_init = kvm_apic_init;
+
+	/* Set WB as the default cache mode for SEV-SNP and TDX */
+	mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
 }
 
 #if defined(CONFIG_AMD_MEM_ENCRYPT)
-- 
2.45.2
Re: [PATCH] x86/kvm: Override default caching mode for SEV-SNP and TDX
Posted by Sean Christopherson 1 year ago
On Tue, Oct 15, 2024, Kirill A. Shutemov wrote:
> AMD SEV-SNP and Intel TDX have limited access to MTRR: either it is not
> advertised in CPUID or it cannot be programmed (on TDX, due to #VE on
> CR0.CD clear).
> 
> This results in guests using uncached mappings where it shouldn't and
> pmd/pud_set_huge() failures due to non-uniform memory type reported by
> mtrr_type_lookup().
> 
> Override MTRR state, making it WB by default as the kernel does for
> Hyper-V guests.

In a turn of events that should have shocked no one, simply overriding the default
memtype also results in breakage.

The insanity that is ACPI relies on UC MTRR mappings to force memory that is very
obviously not RAM to use non-WB mappings.  During acpi_init(), acpi_os_map_iomem()
will attempt to map devices as WB:

  static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
					      acpi_size size)
  {
       return ioremap_cache(phys, size);
  }

If acpi_init() runs before the corresponding device driver is probed, ACPI's WB
mapping will "win", and result in the driver's ioremap() failing because the
existing WB mapping isn't compatible with the requested UC-.

[    1.730459] ioremap error for 0xfed40000-0xfed45000, requested 0x2, got 0x0
[    1.732780] tpm_tis MSFT0101:00: probe with driver tpm_tis failed with error -12


Note, the '0x2' and '0x0' values refer to "enum page_cache_mode", not x86's
memtypes (which frustratingly are an almost pure inversion; 2 == WB, 0 == UC).

The above trace is from a Google-VMM based VM, but the same behavior happens with
a QEMU based VM, so unless QEMU is also building bad ACPI tables, I don't think
this is a VMM problem.

E.g. tracing mapping requests for HPET under QEMU yields

   Mapping HPET, req_type = 0
   WARNING: CPU: 5 PID: 1 at arch/x86/mm/pat/memtype.c:528 memtype_reserve+0x22f/0x3f0
   Call Trace:
    <TASK>
    __ioremap_caller.constprop.0+0xd6/0x330
    acpi_os_map_iomem+0x195/0x1b0
    acpi_ex_system_memory_space_handler+0x11c/0x2f0
    acpi_ev_address_space_dispatch+0x168/0x3b0
    acpi_ex_access_region+0xd7/0x280
    acpi_ex_field_datum_io+0x73/0x210
    acpi_ex_extract_from_field+0x267/0x2a0
    acpi_ex_read_data_from_field+0x8e/0x220
    acpi_ex_resolve_node_to_value+0xe2/0x2b0
    acpi_ds_evaluate_name_path+0xa9/0x100
    acpi_ds_exec_end_op+0x21f/0x4c0
    acpi_ps_parse_loop+0xf4/0x670
    acpi_ps_parse_aml+0x17b/0x3d0
    acpi_ps_execute_method+0x137/0x260
    acpi_ns_evaluate+0x1f0/0x2d0
    acpi_evaluate_object+0x13d/0x2e0
    acpi_evaluate_integer+0x50/0xe0
    acpi_bus_get_status+0x7b/0x140
    acpi_add_single_object+0x3f8/0x750
    acpi_bus_check_add+0xb2/0x340
    acpi_ns_walk_namespace+0xfe/0x200
    acpi_walk_namespace+0xbb/0xe0
    acpi_bus_scan+0x1b5/0x1d0
    acpi_scan_init+0xd5/0x290
    acpi_init+0x1fc/0x520
    do_one_initcall+0x41/0x1d0
    kernel_init_freeable+0x164/0x260
    kernel_init+0x16/0x1a0
    ret_from_fork+0x2d/0x50
    ret_from_fork_asm+0x11/0x20
   ---[ end trace 0000000000000000 ]---

The only reason this doesn't cause problems for HPET is because HPET gets special
treatment via x86_init.timers.timer_init(), and so gets a chance to creat its UC-
mapping before acpi_init() clobbers things.

E.g. modifying the horrid CoCo MTRR hack to apply to all VM types, and then disabling
the early call to hpet_time_init() yields the same behavior for HPET:

[    0.318264] ioremap error for 0xfed00000-0xfed01000, requested 0x2, got 0x0

---
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 7b29ebda024f..8b58024611e5 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -456,11 +456,13 @@ void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var,
         * - when running as SEV-SNP or TDX guest to avoid unnecessary
         *   VMM communication/Virtualization exceptions (#VC, #VE)
         */
+#if 0
        if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP) &&
            !hv_is_isolation_supported() &&
            !cpu_feature_enabled(X86_FEATURE_XENPV) &&
            !cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
                return;
+#endif
 
        /* Disable MTRR in order to disable MTRR modifications. */
        setup_clear_cpu_cap(X86_FEATURE_MTRR);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 21e9e4845354..11f08aba1b8c 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -982,6 +982,8 @@ static void __init kvm_init_platform(void)
        kvmclock_init();
        x86_platform.apic_post_init = kvm_apic_init;
 
+       x86_init.timers.timer_init = x86_init_noop;
+
        /* Set WB as the default cache mode for SEV-SNP and TDX */
        mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
 }
---

This is all honestly beyond ridiculous.  As of commit 0a7b73559b39 ("KVM: x86:
Remove VMX support for virtualizing guest MTRR memtypes"), MTRRs are never
virtualized under KVM, i.e. the memtype in the MTRR has *zero* impact on the
effective memtype.  And even before that commit, KVM only virtualized MTRRs for
VMs running on Intel hosts with passthrough non-coherent DMA devices.

Even worse, the regions that are typically covered by the default MTRR are either
host MMIO or emulated MMIO.  For host MMIO, KVM darn well needs to ensure that
memory is UC.  And for emulated MMIO, even if KVM did virtualize MTRRs, there
would *still* be zero impact on the effective memtype.

In other words, irrespective of SNP and TDX, programming the MTRRs under KVM
does nothing more than give the kernel warm fuzzies.  But the _software_ tracking
of what the kernel thinks are the requisite memtypes obviously matters.

As a stopgap, rather than carry this CoCo hack, what if we add a synthetic feature
flag that says it's ok to modify MTRRs without disabling caching?  I think that'll
make TDX happy, and should avoid a long game of whack-a-mole.  Then longterm,
figure out a clean way to eliminate accessing the "real" MTRRs entirely.

This is safe even under older KVM, because KVM obviously isn't writing the real
MTRRs, and KVM invalidates all affected EPT entries (where KVM shoves the guest's
desired memtype) on an MTRR update.  If that's a concern we could do this only
for "special" guests, and/or add a PV CPUID flag to KVM to announce that MTRRs
are only emulated.

E.g.

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 645aa360628d..b5699eeaef5d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -236,6 +236,7 @@
 #define X86_FEATURE_PVUNLOCK           ( 8*32+20) /* PV unlock function */
 #define X86_FEATURE_VCPUPREEMPT                ( 8*32+21) /* PV vcpu_is_preempted function */
 #define X86_FEATURE_TDX_GUEST          ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */
+#define X86_FEATURE_EMULATED_MTRRS     ( 8*32+22) /* MTRRs are emulated and can be modified while caching is enabled */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE           ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index e6fa03ed9172..c668032d1dc1 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -1053,6 +1053,9 @@ void cache_disable(void) __acquires(cache_disable_lock)
 {
        unsigned long cr0;
 
+       if (cpu_feature_enabled(X86_FEATURE_EMULATED_MTRRS))
+               return;
+
        /*
         * Note that this is not ideal
         * since the cache is only flushed/disabled for this CPU while the
@@ -1095,6 +1098,9 @@ void cache_disable(void) __acquires(cache_disable_lock)
 
 void cache_enable(void) __releases(cache_disable_lock)
 {
+       if (cpu_feature_enabled(X86_FEATURE_EMULATED_MTRRS))
+               return;
+
        /* Flush TLBs (no need to flush caches - they are disabled) */
        count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
        flush_tlb_local();
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 21e9e4845354..6266b132e359 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -982,8 +982,7 @@ static void __init kvm_init_platform(void)
        kvmclock_init();
        x86_platform.apic_post_init = kvm_apic_init;
 
-       /* Set WB as the default cache mode for SEV-SNP and TDX */
-       mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
+       setup_force_cpu_cap(X86_FEATURE_EMULATED_MTRRS);
 }
 
 #if defined(CONFIG_AMD_MEM_ENCRYPT)
Re: [PATCH] x86/kvm: Override default caching mode for SEV-SNP and TDX
Posted by Kirill A. Shutemov 1 year ago
On Fri, Jan 24, 2025 at 12:59:50PM -0800, Sean Christopherson wrote:
> As a stopgap, rather than carry this CoCo hack, what if we add a synthetic feature
> flag that says it's ok to modify MTRRs without disabling caching?  I think that'll
> make TDX happy, and should avoid a long game of whack-a-mole.  Then longterm,
> figure out a clean way to eliminate accessing the "real" MTRRs entirely.

That's kina make sense to me. But I obviously didn't understand scale of the
mess around MTRRs and can only hope it will not break anything else. :/

-- 
  Kiryl Shutsemau / Kirill A. Shutemov
Re: [PATCH] x86/kvm: Override default caching mode for SEV-SNP and TDX
Posted by Paolo Bonzini 1 year, 3 months ago
On 10/15/24 11:58, Kirill A. Shutemov wrote:
> AMD SEV-SNP and Intel TDX have limited access to MTRR: either it is not
> advertised in CPUID or it cannot be programmed (on TDX, due to #VE on
> CR0.CD clear).
> 
> This results in guests using uncached mappings where it shouldn't and
> pmd/pud_set_huge() failures due to non-uniform memory type reported by
> mtrr_type_lookup().
> 
> Override MTRR state, making it WB by default as the kernel does for
> Hyper-V guests.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Suggested-by: Binbin Wu <binbin.wu@intel.com>
> Cc: Juergen Gross <jgross@suse.com>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>

Queued, thanks.  I'll leave the follow up to the owners of the tip tree.

Paolo

> ---
>   arch/x86/kernel/kvm.c | 4 ++++
>   1 file changed, 4 insertions(+)
> 
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 263f8aed4e2c..21e9e4845354 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -37,6 +37,7 @@
>   #include <asm/apic.h>
>   #include <asm/apicdef.h>
>   #include <asm/hypervisor.h>
> +#include <asm/mtrr.h>
>   #include <asm/tlb.h>
>   #include <asm/cpuidle_haltpoll.h>
>   #include <asm/ptrace.h>
> @@ -980,6 +981,9 @@ static void __init kvm_init_platform(void)
>   	}
>   	kvmclock_init();
>   	x86_platform.apic_post_init = kvm_apic_init;
> +
> +	/* Set WB as the default cache mode for SEV-SNP and TDX */
> +	mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
>   }
>   
>   #if defined(CONFIG_AMD_MEM_ENCRYPT)
Re: [PATCH] x86/kvm: Override default caching mode for SEV-SNP and TDX
Posted by Jürgen Groß 1 year, 3 months ago
On 15.10.24 11:58, Kirill A. Shutemov wrote:
> AMD SEV-SNP and Intel TDX have limited access to MTRR: either it is not
> advertised in CPUID or it cannot be programmed (on TDX, due to #VE on
> CR0.CD clear).
> 
> This results in guests using uncached mappings where it shouldn't and
> pmd/pud_set_huge() failures due to non-uniform memory type reported by
> mtrr_type_lookup().
> 
> Override MTRR state, making it WB by default as the kernel does for
> Hyper-V guests.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Suggested-by: Binbin Wu <binbin.wu@intel.com>
> Cc: Juergen Gross <jgross@suse.com>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> ---
>   arch/x86/kernel/kvm.c | 4 ++++
>   1 file changed, 4 insertions(+)
> 
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 263f8aed4e2c..21e9e4845354 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -37,6 +37,7 @@
>   #include <asm/apic.h>
>   #include <asm/apicdef.h>
>   #include <asm/hypervisor.h>
> +#include <asm/mtrr.h>
>   #include <asm/tlb.h>
>   #include <asm/cpuidle_haltpoll.h>
>   #include <asm/ptrace.h>
> @@ -980,6 +981,9 @@ static void __init kvm_init_platform(void)
>   	}
>   	kvmclock_init();
>   	x86_platform.apic_post_init = kvm_apic_init;
> +
> +	/* Set WB as the default cache mode for SEV-SNP and TDX */
> +	mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);

Do you really want to do this for _all_ KVM guests?

I'd expect this call to be conditional on TDX or SEV-SNP.


Juergen
Re: [PATCH] x86/kvm: Override default caching mode for SEV-SNP and TDX
Posted by Dave Hansen 1 year, 3 months ago
On 10/15/24 03:12, Jürgen Groß wrote:
>>
>> +    /* Set WB as the default cache mode for SEV-SNP and TDX */
>> +    mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
> 
> Do you really want to do this for _all_ KVM guests?
> 
> I'd expect this call to be conditional on TDX or SEV-SNP.

I was confused by this as well.

Shouldn't mtrr_overwrite_state() be named something more like:

	guest_force_mtrr_state()

or something?

The mtrr_overwrite_state() comment is pretty good, but it looks quite
confusing from the caller.
Re: [PATCH] x86/kvm: Override default caching mode for SEV-SNP and TDX
Posted by Kirill A. Shutemov 1 year, 3 months ago
On Tue, Oct 15, 2024 at 06:14:29AM -0700, Dave Hansen wrote:
> On 10/15/24 03:12, Jürgen Groß wrote:
> >>
> >> +    /* Set WB as the default cache mode for SEV-SNP and TDX */
> >> +    mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
> > 
> > Do you really want to do this for _all_ KVM guests?
> > 
> > I'd expect this call to be conditional on TDX or SEV-SNP.
> 
> I was confused by this as well.
> 
> Shouldn't mtrr_overwrite_state() be named something more like:
> 
> 	guest_force_mtrr_state()
> 
> or something?
> 
> The mtrr_overwrite_state() comment is pretty good, but it looks quite
> confusing from the caller.

I can submit a following up patch with rename if it is fine.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov
Re: [PATCH] x86/kvm: Override default caching mode for SEV-SNP and TDX
Posted by Juergen Gross 1 year, 3 months ago
On 15.10.24 15:54, Kirill A. Shutemov wrote:
> On Tue, Oct 15, 2024 at 06:14:29AM -0700, Dave Hansen wrote:
>> On 10/15/24 03:12, Jürgen Groß wrote:
>>>>
>>>> +    /* Set WB as the default cache mode for SEV-SNP and TDX */
>>>> +    mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
>>>
>>> Do you really want to do this for _all_ KVM guests?
>>>
>>> I'd expect this call to be conditional on TDX or SEV-SNP.
>>
>> I was confused by this as well.
>>
>> Shouldn't mtrr_overwrite_state() be named something more like:
>>
>> 	guest_force_mtrr_state()
>>
>> or something?
>>
>> The mtrr_overwrite_state() comment is pretty good, but it looks quite
>> confusing from the caller.
> 
> I can submit a following up patch with rename if it is fine.
> 

Fine with me.


Juergen
Re: [PATCH] x86/kvm: Override default caching mode for SEV-SNP and TDX
Posted by Kirill A. Shutemov 1 year, 3 months ago
On Tue, Oct 15, 2024 at 12:12:51PM +0200, Jürgen Groß wrote:
> On 15.10.24 11:58, Kirill A. Shutemov wrote:
> > AMD SEV-SNP and Intel TDX have limited access to MTRR: either it is not
> > advertised in CPUID or it cannot be programmed (on TDX, due to #VE on
> > CR0.CD clear).
> > 
> > This results in guests using uncached mappings where it shouldn't and
> > pmd/pud_set_huge() failures due to non-uniform memory type reported by
> > mtrr_type_lookup().
> > 
> > Override MTRR state, making it WB by default as the kernel does for
> > Hyper-V guests.
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > Suggested-by: Binbin Wu <binbin.wu@intel.com>
> > Cc: Juergen Gross <jgross@suse.com>
> > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > ---
> >   arch/x86/kernel/kvm.c | 4 ++++
> >   1 file changed, 4 insertions(+)
> > 
> > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> > index 263f8aed4e2c..21e9e4845354 100644
> > --- a/arch/x86/kernel/kvm.c
> > +++ b/arch/x86/kernel/kvm.c
> > @@ -37,6 +37,7 @@
> >   #include <asm/apic.h>
> >   #include <asm/apicdef.h>
> >   #include <asm/hypervisor.h>
> > +#include <asm/mtrr.h>
> >   #include <asm/tlb.h>
> >   #include <asm/cpuidle_haltpoll.h>
> >   #include <asm/ptrace.h>
> > @@ -980,6 +981,9 @@ static void __init kvm_init_platform(void)
> >   	}
> >   	kvmclock_init();
> >   	x86_platform.apic_post_init = kvm_apic_init;
> > +
> > +	/* Set WB as the default cache mode for SEV-SNP and TDX */
> > +	mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
> 
> Do you really want to do this for _all_ KVM guests?
> 
> I'd expect this call to be conditional on TDX or SEV-SNP.

mtrr_overwrite_state() checks it internally.


-- 
  Kiryl Shutsemau / Kirill A. Shutemov
Re: [PATCH] x86/kvm: Override default caching mode for SEV-SNP and TDX
Posted by Jürgen Groß 1 year, 3 months ago
On 15.10.24 14:31, Kirill A. Shutemov wrote:
> On Tue, Oct 15, 2024 at 12:12:51PM +0200, Jürgen Groß wrote:
>> On 15.10.24 11:58, Kirill A. Shutemov wrote:
>>> AMD SEV-SNP and Intel TDX have limited access to MTRR: either it is not
>>> advertised in CPUID or it cannot be programmed (on TDX, due to #VE on
>>> CR0.CD clear).
>>>
>>> This results in guests using uncached mappings where it shouldn't and
>>> pmd/pud_set_huge() failures due to non-uniform memory type reported by
>>> mtrr_type_lookup().
>>>
>>> Override MTRR state, making it WB by default as the kernel does for
>>> Hyper-V guests.
>>>
>>> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
>>> Suggested-by: Binbin Wu <binbin.wu@intel.com>
>>> Cc: Juergen Gross <jgross@suse.com>
>>> Cc: Tom Lendacky <thomas.lendacky@amd.com>
>>> ---
>>>    arch/x86/kernel/kvm.c | 4 ++++
>>>    1 file changed, 4 insertions(+)
>>>
>>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>>> index 263f8aed4e2c..21e9e4845354 100644
>>> --- a/arch/x86/kernel/kvm.c
>>> +++ b/arch/x86/kernel/kvm.c
>>> @@ -37,6 +37,7 @@
>>>    #include <asm/apic.h>
>>>    #include <asm/apicdef.h>
>>>    #include <asm/hypervisor.h>
>>> +#include <asm/mtrr.h>
>>>    #include <asm/tlb.h>
>>>    #include <asm/cpuidle_haltpoll.h>
>>>    #include <asm/ptrace.h>
>>> @@ -980,6 +981,9 @@ static void __init kvm_init_platform(void)
>>>    	}
>>>    	kvmclock_init();
>>>    	x86_platform.apic_post_init = kvm_apic_init;
>>> +
>>> +	/* Set WB as the default cache mode for SEV-SNP and TDX */
>>> +	mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
>>
>> Do you really want to do this for _all_ KVM guests?
>>
>> I'd expect this call to be conditional on TDX or SEV-SNP.
> 
> mtrr_overwrite_state() checks it internally.

Ah, right, I forgot I added that check on request by Boris. :-)

Reviewed-by: Juergen Gross <jgross@suse.com>


Juergen

[PATCH] x86/mtrr: Rename mtrr_overwrite_state() to guest_force_mtrr_state()
Posted by Kirill A. Shutemov 1 year, 3 months ago
Rename the helper to better reflect its function.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Suggested-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/hyperv/ivm.c              |  2 +-
 arch/x86/include/asm/mtrr.h        | 10 +++++-----
 arch/x86/kernel/cpu/mtrr/generic.c |  6 +++---
 arch/x86/kernel/cpu/mtrr/mtrr.c    |  2 +-
 arch/x86/kernel/kvm.c              |  2 +-
 arch/x86/xen/enlighten_pv.c        |  4 ++--
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 60fc3ed72830..90aabe1fd3b6 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -664,7 +664,7 @@ void __init hv_vtom_init(void)
 	x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
 
 	/* Set WB as the default cache mode. */
-	mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
+	guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);
 }
 
 #endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 4218248083d9..c69e269937c5 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -58,8 +58,8 @@ struct mtrr_state_type {
  */
 # ifdef CONFIG_MTRR
 void mtrr_bp_init(void);
-void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var,
-			  mtrr_type def_type);
+void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var,
+			    mtrr_type def_type);
 extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform);
 extern void mtrr_save_fixed_ranges(void *);
 extern void mtrr_save_state(void);
@@ -75,9 +75,9 @@ void mtrr_disable(void);
 void mtrr_enable(void);
 void mtrr_generic_set_state(void);
 #  else
-static inline void mtrr_overwrite_state(struct mtrr_var_range *var,
-					unsigned int num_var,
-					mtrr_type def_type)
+static inline void guest_force_mtrr_state(struct mtrr_var_range *var,
+					  unsigned int num_var,
+					  mtrr_type def_type)
 {
 }
 
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 7b29ebda024f..2fdfda2b60e4 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -423,7 +423,7 @@ void __init mtrr_copy_map(void)
 }
 
 /**
- * mtrr_overwrite_state - set static MTRR state
+ * guest_force_mtrr_state - set static MTRR state for a guest
  *
  * Used to set MTRR state via different means (e.g. with data obtained from
  * a hypervisor).
@@ -436,8 +436,8 @@ void __init mtrr_copy_map(void)
  * @num_var: length of the @var array
  * @def_type: default caching type
  */
-void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var,
-			  mtrr_type def_type)
+void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var,
+			    mtrr_type def_type)
 {
 	unsigned int i;
 
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 989d368be04f..ecbda0341a8a 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -625,7 +625,7 @@ void mtrr_save_state(void)
 static int __init mtrr_init_finalize(void)
 {
 	/*
-	 * Map might exist if mtrr_overwrite_state() has been called or if
+	 * Map might exist if guest_force_mtrr_state() has been called or if
 	 * mtrr_enabled() returns true.
 	 */
 	mtrr_copy_map();
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 21e9e4845354..7a422a6c5983 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -983,7 +983,7 @@ static void __init kvm_init_platform(void)
 	x86_platform.apic_post_init = kvm_apic_init;
 
 	/* Set WB as the default cache mode for SEV-SNP and TDX */
-	mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
+	guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);
 }
 
 #if defined(CONFIG_AMD_MEM_ENCRYPT)
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index d6818c6cafda..633469fab536 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -171,7 +171,7 @@ static void __init xen_set_mtrr_data(void)
 
 	/* Only overwrite MTRR state if any MTRR could be got from Xen. */
 	if (reg)
-		mtrr_overwrite_state(var, reg, MTRR_TYPE_UNCACHABLE);
+		guest_force_mtrr_state(var, reg, MTRR_TYPE_UNCACHABLE);
 #endif
 }
 
@@ -195,7 +195,7 @@ static void __init xen_pv_init_platform(void)
 	if (xen_initial_domain())
 		xen_set_mtrr_data();
 	else
-		mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
+		guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);
 
 	/* Adjust nr_cpu_ids before "enumeration" happens */
 	xen_smp_count_cpus();
-- 
2.45.2
Re: [PATCH] x86/mtrr: Rename mtrr_overwrite_state() to guest_force_mtrr_state()
Posted by Kirill A. Shutemov 1 year, 3 months ago
On Wed, Oct 16, 2024 at 01:50:48PM +0300, Kirill A. Shutemov wrote:
> Rename the helper to better reflect its function.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Suggested-by: Dave Hansen <dave.hansen@intel.com>

KVM patch is Linus' tree.

Dave, can you take this one?

-- 
  Kiryl Shutsemau / Kirill A. Shutemov
Re: [PATCH] x86/mtrr: Rename mtrr_overwrite_state() to guest_force_mtrr_state()
Posted by Dave Hansen 1 year, 3 months ago
On 10/29/24 08:13, Kirill A. Shutemov wrote:
> On Wed, Oct 16, 2024 at 01:50:48PM +0300, Kirill A. Shutemov wrote:
>> Rename the helper to better reflect its function.
>>
>> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
>> Suggested-by: Dave Hansen <dave.hansen@intel.com>
> 
> KVM patch is Linus' tree.
> 
> Dave, can you take this one?

Not easily without a merge of Paolo's KVM bits.  The confusion that
might cause isn't quite worth it for a rename.  I can either stash this
somewhere or I'm also fine if Paolo takes it on top of your other patch:

Acked-by: Dave Hansen <dave.hansen@intel.com>
Re: [PATCH] x86/mtrr: Rename mtrr_overwrite_state() to guest_force_mtrr_state()
Posted by Kirill A. Shutemov 1 year, 3 months ago
On Tue, Oct 29, 2024 at 10:37:07AM -0700, Dave Hansen wrote:
> On 10/29/24 08:13, Kirill A. Shutemov wrote:
> > On Wed, Oct 16, 2024 at 01:50:48PM +0300, Kirill A. Shutemov wrote:
> >> Rename the helper to better reflect its function.
> >>
> >> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> >> Suggested-by: Dave Hansen <dave.hansen@intel.com>
> > 
> > KVM patch is Linus' tree.
> > 
> > Dave, can you take this one?
> 
> Not easily without a merge of Paolo's KVM bits.  The confusion that
> might cause isn't quite worth it for a rename.  I can either stash this
> somewhere or I'm also fine if Paolo takes it on top of your other patch:
> 
> Acked-by: Dave Hansen <dave.hansen@intel.com>

I don't follow what is the problem.

As I said KVM patch is already in Linus' tree -- v6.12-rc5 -- and tip tree
already uses the tag as basis for x86/urgent.

Hm?

-- 
  Kiryl Shutsemau / Kirill A. Shutemov
RE: [PATCH] x86/mtrr: Rename mtrr_overwrite_state() to guest_force_mtrr_state()
Posted by Michael Kelley 1 year, 3 months ago
From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Sent: Wednesday, October 16, 2024 3:51 AM
> 
> Rename the helper to better reflect its function.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Suggested-by: Dave Hansen <dave.hansen@intel.com>
> ---
>  arch/x86/hyperv/ivm.c              |  2 +-
>  arch/x86/include/asm/mtrr.h        | 10 +++++-----
>  arch/x86/kernel/cpu/mtrr/generic.c |  6 +++---
>  arch/x86/kernel/cpu/mtrr/mtrr.c    |  2 +-
>  arch/x86/kernel/kvm.c              |  2 +-
>  arch/x86/xen/enlighten_pv.c        |  4 ++--
>  6 files changed, 13 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 60fc3ed72830..90aabe1fd3b6 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -664,7 +664,7 @@ void __init hv_vtom_init(void)
>  	x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
> 
>  	/* Set WB as the default cache mode. */
> -	mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
> +	guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);
>  }
> 
>  #endif /* defined(CONFIG_AMD_MEM_ENCRYPT) ||
> defined(CONFIG_INTEL_TDX_GUEST) */
> diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
> index 4218248083d9..c69e269937c5 100644
> --- a/arch/x86/include/asm/mtrr.h
> +++ b/arch/x86/include/asm/mtrr.h
> @@ -58,8 +58,8 @@ struct mtrr_state_type {
>   */
>  # ifdef CONFIG_MTRR
>  void mtrr_bp_init(void);
> -void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var,
> -			  mtrr_type def_type);
> +void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var,
> +			    mtrr_type def_type);
>  extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform);
>  extern void mtrr_save_fixed_ranges(void *);
>  extern void mtrr_save_state(void);
> @@ -75,9 +75,9 @@ void mtrr_disable(void);
>  void mtrr_enable(void);
>  void mtrr_generic_set_state(void);
>  #  else
> -static inline void mtrr_overwrite_state(struct mtrr_var_range *var,
> -					unsigned int num_var,
> -					mtrr_type def_type)
> +static inline void guest_force_mtrr_state(struct mtrr_var_range *var,
> +					  unsigned int num_var,
> +					  mtrr_type def_type)
>  {
>  }
> 
> diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
> index 7b29ebda024f..2fdfda2b60e4 100644
> --- a/arch/x86/kernel/cpu/mtrr/generic.c
> +++ b/arch/x86/kernel/cpu/mtrr/generic.c
> @@ -423,7 +423,7 @@ void __init mtrr_copy_map(void)
>  }
> 
>  /**
> - * mtrr_overwrite_state - set static MTRR state
> + * guest_force_mtrr_state - set static MTRR state for a guest
>   *
>   * Used to set MTRR state via different means (e.g. with data obtained from
>   * a hypervisor).
> @@ -436,8 +436,8 @@ void __init mtrr_copy_map(void)
>   * @num_var: length of the @var array
>   * @def_type: default caching type
>   */
> -void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var,
> -			  mtrr_type def_type)
> +void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var,
> +			    mtrr_type def_type)
>  {
>  	unsigned int i;
> 
> diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
> index 989d368be04f..ecbda0341a8a 100644
> --- a/arch/x86/kernel/cpu/mtrr/mtrr.c
> +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
> @@ -625,7 +625,7 @@ void mtrr_save_state(void)
>  static int __init mtrr_init_finalize(void)
>  {
>  	/*
> -	 * Map might exist if mtrr_overwrite_state() has been called or if
> +	 * Map might exist if guest_force_mtrr_state() has been called or if
>  	 * mtrr_enabled() returns true.
>  	 */
>  	mtrr_copy_map();
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 21e9e4845354..7a422a6c5983 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -983,7 +983,7 @@ static void __init kvm_init_platform(void)
>  	x86_platform.apic_post_init = kvm_apic_init;
> 
>  	/* Set WB as the default cache mode for SEV-SNP and TDX */
> -	mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
> +	guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);
>  }
> 
>  #if defined(CONFIG_AMD_MEM_ENCRYPT)
> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
> index d6818c6cafda..633469fab536 100644
> --- a/arch/x86/xen/enlighten_pv.c
> +++ b/arch/x86/xen/enlighten_pv.c
> @@ -171,7 +171,7 @@ static void __init xen_set_mtrr_data(void)
> 
>  	/* Only overwrite MTRR state if any MTRR could be got from Xen. */
>  	if (reg)
> -		mtrr_overwrite_state(var, reg, MTRR_TYPE_UNCACHABLE);
> +		guest_force_mtrr_state(var, reg, MTRR_TYPE_UNCACHABLE);
>  #endif
>  }
> 
> @@ -195,7 +195,7 @@ static void __init xen_pv_init_platform(void)
>  	if (xen_initial_domain())
>  		xen_set_mtrr_data();
>  	else
> -		mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
> +		guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);
> 
>  	/* Adjust nr_cpu_ids before "enumeration" happens */
>  	xen_smp_count_cpus();
> --
> 2.45.2
> 

LGTM
Reviewed-by: Michael Kelley <mhklinux@outlook.com>