[PATCH v2] KVM: x86: Unify L1TF flushing under per-CPU variable

Brendan Jackman posted 1 patch 2 months ago
arch/x86/include/asm/hardirq.h  |  6 ++++++
arch/x86/include/asm/kvm_host.h |  3 ---
arch/x86/kvm/mmu/mmu.c          |  2 +-
arch/x86/kvm/vmx/nested.c       |  2 +-
arch/x86/kvm/vmx/vmx.c          | 20 +++++---------------
arch/x86/kvm/x86.c              |  6 +++---
6 files changed, 16 insertions(+), 23 deletions(-)
[PATCH v2] KVM: x86: Unify L1TF flushing under per-CPU variable
Posted by Brendan Jackman 2 months ago
Currently the tracking of the need to flush L1D for L1TF is tracked by
two bits: one per-CPU and one per-vCPU.

The per-vCPU bit is always set when the vCPU shows up on a core, so
there is no interesting state that's truly per-vCPU. Indeed, this is a
requirement, since L1D is a part of the physical CPU.

So simplify this by combining the two bits.

The vCPU bit was being written from preemption-enabled regions. For
those cases, use raw_cpu_write() (via a variant of the setter function)
to avoid DEBUG_PREEMPT failures. If the vCPU is getting migrated, the
CPU that gets its bit set in these paths is not important; vcpu_load()
must always set it on the destination CPU before the guest is resumed.

Signed-off-by: Brendan Jackman <jackmanb@google.com>
---
Changes in v2:
- Moved the bit back to irq_stat
- Fixed DEBUG_PREEMPT issues by adding a _raw variant
- Link to v1: https://lore.kernel.org/r/20251013-b4-l1tf-percpu-v1-1-d65c5366ea1a@google.com
---
 arch/x86/include/asm/hardirq.h  |  6 ++++++
 arch/x86/include/asm/kvm_host.h |  3 ---
 arch/x86/kvm/mmu/mmu.c          |  2 +-
 arch/x86/kvm/vmx/nested.c       |  2 +-
 arch/x86/kvm/vmx/vmx.c          | 20 +++++---------------
 arch/x86/kvm/x86.c              |  6 +++---
 6 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index f00c09ffe6a95f07342bb0c6cea3769d71eecfa9..8a5c5deadb5912cc9ae080740c8a7372e6ef7577 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_X86_HARDIRQ_H
 #define _ASM_X86_HARDIRQ_H
 
+#include <linux/percpu.h>
 #include <linux/threads.h>
 
 typedef struct {
@@ -78,6 +79,11 @@ static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
 	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
 }
 
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d_raw(void)
+{
+	raw_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
+}
+
 static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void)
 {
 	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 48598d017d6f3f07263a2ffffe670be2658eb9cb..fcdc65ab13d8383018577aacf19e832e6c4ceb0b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1055,9 +1055,6 @@ struct kvm_vcpu_arch {
 	/* be preempted when it's in kernel-mode(cpl=0) */
 	bool preempted_in_kernel;
 
-	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
-	bool l1tf_flush_l1d;
-
 	/* Host CPU on which VM-entry was most recently attempted */
 	int last_vmentry_cpu;
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 667d66cf76d5e52c22f9517914307244ae868eea..8c0dce401a42d977756ca82d249bb33c858b9c9f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4859,7 +4859,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
 	 */
 	BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK));
 
-	vcpu->arch.l1tf_flush_l1d = true;
+	kvm_set_cpu_l1tf_flush_l1d();
 	if (!flags) {
 		trace_kvm_page_fault(vcpu, fault_address, error_code);
 
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 76271962cb7083b475de6d7d24bf9cb918050650..1d376b4e6aa4abc475c1aac2ee937dbedb834cb1 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3880,7 +3880,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 		goto vmentry_failed;
 
 	/* Hide L1D cache contents from the nested guest.  */
-	vmx->vcpu.arch.l1tf_flush_l1d = true;
+	kvm_set_cpu_l1tf_flush_l1d_raw();
 
 	/*
 	 * Must happen outside of nested_vmx_enter_non_root_mode() as it will
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 546272a5d34da301710df1d89414f41fc9b24a1f..6515beefa1fc8da042c0b66c207250ccf79c888e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6673,26 +6673,16 @@ static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 	 * 'always'
 	 */
 	if (static_branch_likely(&vmx_l1d_flush_cond)) {
-		bool flush_l1d;
-
 		/*
-		 * Clear the per-vcpu flush bit, it gets set again if the vCPU
+		 * Clear the per-cpu flush bit, it gets set again if the vCPU
 		 * is reloaded, i.e. if the vCPU is scheduled out or if KVM
 		 * exits to userspace, or if KVM reaches one of the unsafe
-		 * VMEXIT handlers, e.g. if KVM calls into the emulator.
+		 * VMEXIT handlers, e.g. if KVM calls into the emulator,
+		 * or from the interrupt handlers.
 		 */
-		flush_l1d = vcpu->arch.l1tf_flush_l1d;
-		vcpu->arch.l1tf_flush_l1d = false;
-
-		/*
-		 * Clear the per-cpu flush bit, it gets set again from
-		 * the interrupt handlers.
-		 */
-		flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
-		kvm_clear_cpu_l1tf_flush_l1d();
-
-		if (!flush_l1d)
+		if (!kvm_get_cpu_l1tf_flush_l1d())
 			return;
+		kvm_clear_cpu_l1tf_flush_l1d();
 	}
 
 	vcpu->stat.l1d_flush++;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4b8138bd48572fd161eda73d2dbdc1dcd0bcbcac..dc886c4b9b1fe3d63a4c255ed4fc533d20fd1962 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5190,7 +5190,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 
-	vcpu->arch.l1tf_flush_l1d = true;
+	kvm_set_cpu_l1tf_flush_l1d();
 
 	if (vcpu->scheduled_out && pmu->version && pmu->event_count) {
 		pmu->need_cleanup = true;
@@ -8000,7 +8000,7 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
 				unsigned int bytes, struct x86_exception *exception)
 {
 	/* kvm_write_guest_virt_system can pull in tons of pages. */
-	vcpu->arch.l1tf_flush_l1d = true;
+	kvm_set_cpu_l1tf_flush_l1d_raw();
 
 	return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
 					   PFERR_WRITE_MASK, exception);
@@ -9396,7 +9396,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 		return handle_emulation_failure(vcpu, emulation_type);
 	}
 
-	vcpu->arch.l1tf_flush_l1d = true;
+	kvm_set_cpu_l1tf_flush_l1d_raw();
 
 	if (!(emulation_type & EMULTYPE_NO_DECODE)) {
 		kvm_clear_exception_queue(vcpu);

---
base-commit: 6b36119b94d0b2bb8cea9d512017efafd461d6ac
change-id: 20251013-b4-l1tf-percpu-793181fa5884

Best regards,
-- 
Brendan Jackman <jackmanb@google.com>
Re: [PATCH v2] KVM: x86: Unify L1TF flushing under per-CPU variable
Posted by Sean Christopherson 2 months ago
On Wed, Oct 15, 2025, Brendan Jackman wrote:
> Currently the tracking of the need to flush L1D for L1TF is tracked by
> two bits: one per-CPU and one per-vCPU.
> 
> The per-vCPU bit is always set when the vCPU shows up on a core, so
> there is no interesting state that's truly per-vCPU. Indeed, this is a
> requirement, since L1D is a part of the physical CPU.
> 
> So simplify this by combining the two bits.
> 
> The vCPU bit was being written from preemption-enabled regions. For
> those cases, use raw_cpu_write() (via a variant of the setter function)
> to avoid DEBUG_PREEMPT failures. If the vCPU is getting migrated, the
> CPU that gets its bit set in these paths is not important; vcpu_load()
> must always set it on the destination CPU before the guest is resumed.
> 
> Signed-off-by: Brendan Jackman <jackmanb@google.com>
> ---

...

> @@ -78,6 +79,11 @@ static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
>  	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
>  }
>  
> +static __always_inline void kvm_set_cpu_l1tf_flush_l1d_raw(void)
> +{
> +	raw_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
> +}

TL;DR: I'll post a v3 with a slightly tweaked version of this patch at the end.

Rather than add a "raw" variant, I would rather have a wrapper in arch/x86/kvm/x86.h
that disables preemption, with a comment explaining why it's ok to enable preemption
after setting the per-CPU flag.  Without such a comment, choosing between the two
variants looks entirely random

Alternatively, all writes could be raw, but that
feels wrong/weird, and in practice disabling preemption in the relevant paths is a
complete non-issue.

<me rummages around>

Gah, I followed a tangential thought about the "cost" of disabling/enabling preemtion
and ended up with a 4-patch series.  All of this code really should be conditioned
on CONFIG_CPU_MITIGATIONS=y.  With that, the wrapper can be:

static __always_inline void kvm_request_l1tf_flush_l1d(void)
{
#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
	/*
	 * Temporarily disable preemption (if necessary) as the tracking is
	 * per-CPU.  If the current vCPU task is migrated to a different CPU
	 * before the next VM-Entry, then kvm_arch_vcpu_load() will pend a
	 * flush on the new CPU.
	 */
	guard(preempt)();
	kvm_set_cpu_l1tf_flush_l1d();
#endif
}

and kvm_set_cpu_l1tf_flush_l1d() and irq_cpustat_t.kvm_cpu_l1tf_flush_l1d can
likewise be gated on CONFIG_CPU_MITIGATIONS && CONFIG_KVM_INTEL.


> +
>  static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void)
>  {
>  	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0);
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 48598d017d6f3f07263a2ffffe670be2658eb9cb..fcdc65ab13d8383018577aacf19e832e6c4ceb0b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1055,9 +1055,6 @@ struct kvm_vcpu_arch {
>  	/* be preempted when it's in kernel-mode(cpl=0) */
>  	bool preempted_in_kernel;
>  
> -	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
> -	bool l1tf_flush_l1d;
> -
>  	/* Host CPU on which VM-entry was most recently attempted */
>  	int last_vmentry_cpu;
>  
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 667d66cf76d5e52c22f9517914307244ae868eea..8c0dce401a42d977756ca82d249bb33c858b9c9f 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4859,7 +4859,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
>  	 */
>  	BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK));
>  
> -	vcpu->arch.l1tf_flush_l1d = true;
> +	kvm_set_cpu_l1tf_flush_l1d();

This is wrong, kvm_handle_page_fault() runs with preemption enabled.
Re: [PATCH v2] KVM: x86: Unify L1TF flushing under per-CPU variable
Posted by Brendan Jackman 2 months ago
On Thu Oct 16, 2025 at 3:50 PM UTC, Sean Christopherson wrote:
> On Wed, Oct 15, 2025, Brendan Jackman wrote:
>> Currently the tracking of the need to flush L1D for L1TF is tracked by
>> two bits: one per-CPU and one per-vCPU.
>> 
>> The per-vCPU bit is always set when the vCPU shows up on a core, so
>> there is no interesting state that's truly per-vCPU. Indeed, this is a
>> requirement, since L1D is a part of the physical CPU.
>> 
>> So simplify this by combining the two bits.
>> 
>> The vCPU bit was being written from preemption-enabled regions. For
>> those cases, use raw_cpu_write() (via a variant of the setter function)
>> to avoid DEBUG_PREEMPT failures. If the vCPU is getting migrated, the
>> CPU that gets its bit set in these paths is not important; vcpu_load()
>> must always set it on the destination CPU before the guest is resumed.
>> 
>> Signed-off-by: Brendan Jackman <jackmanb@google.com>
>> ---
>
> ...
>
>> @@ -78,6 +79,11 @@ static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
>>  	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
>>  }
>>  
>> +static __always_inline void kvm_set_cpu_l1tf_flush_l1d_raw(void)
>> +{
>> +	raw_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
>> +}
>
> TL;DR: I'll post a v3 with a slightly tweaked version of this patch at the end.
>
> Rather than add a "raw" variant, I would rather have a wrapper in arch/x86/kvm/x86.h
> that disables preemption, with a comment explaining why it's ok to enable preemption
> after setting the per-CPU flag.  Without such a comment, choosing between the two
> variants looks entirely random
>
> Alternatively, all writes could be raw, but that
> feels wrong/weird, and in practice disabling preemption in the relevant paths is a
> complete non-issue.

Hm, why does making every write _raw feel weird but adding
preempt_disable() to every write doesn't? Both feel equally weird to me.
But the latter has the additional weirdness of using preempt_disable()
as a way to signal "I know what I'm doing", when that signal is already
explicitly documented as the purpose of raw_cpu_write().

> <me rummages around>
>
> Gah, I followed a tangential thought about the "cost" of disabling/enabling preemtion
> and ended up with a 4-patch series.  All of this code really should be conditioned
> on CONFIG_CPU_MITIGATIONS=y.  With that, the wrapper can be:
>
> static __always_inline void kvm_request_l1tf_flush_l1d(void)
> {
> #if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
> 	/*
> 	 * Temporarily disable preemption (if necessary) as the tracking is
> 	 * per-CPU.  If the current vCPU task is migrated to a different CPU
> 	 * before the next VM-Entry, then kvm_arch_vcpu_load() will pend a
> 	 * flush on the new CPU.
> 	 */
> 	guard(preempt)();
> 	kvm_set_cpu_l1tf_flush_l1d();
> #endif
> }

Having a nice place to hang the comment definitely seems worthwhile, but
couldn't we just put it in the _raw varant?

> and kvm_set_cpu_l1tf_flush_l1d() and irq_cpustat_t.kvm_cpu_l1tf_flush_l1d can
> likewise be gated on CONFIG_CPU_MITIGATIONS && CONFIG_KVM_INTEL.
>
>
>> +
>>  static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void)
>>  {
>>  	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0);
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 48598d017d6f3f07263a2ffffe670be2658eb9cb..fcdc65ab13d8383018577aacf19e832e6c4ceb0b 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -1055,9 +1055,6 @@ struct kvm_vcpu_arch {
>>  	/* be preempted when it's in kernel-mode(cpl=0) */
>>  	bool preempted_in_kernel;
>>  
>> -	/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
>> -	bool l1tf_flush_l1d;
>> -
>>  	/* Host CPU on which VM-entry was most recently attempted */
>>  	int last_vmentry_cpu;
>>  
>> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
>> index 667d66cf76d5e52c22f9517914307244ae868eea..8c0dce401a42d977756ca82d249bb33c858b9c9f 100644
>> --- a/arch/x86/kvm/mmu/mmu.c
>> +++ b/arch/x86/kvm/mmu/mmu.c
>> @@ -4859,7 +4859,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
>>  	 */
>>  	BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK));
>>  
>> -	vcpu->arch.l1tf_flush_l1d = true;
>> +	kvm_set_cpu_l1tf_flush_l1d();
>
> This is wrong, kvm_handle_page_fault() runs with preemption enabled.

Ah, thanks.
Re: [PATCH v2] KVM: x86: Unify L1TF flushing under per-CPU variable
Posted by Sean Christopherson 2 months ago
On Thu, Oct 16, 2025, Brendan Jackman wrote:
> On Thu Oct 16, 2025 at 3:50 PM UTC, Sean Christopherson wrote:
> > On Wed, Oct 15, 2025, Brendan Jackman wrote:
> >> Currently the tracking of the need to flush L1D for L1TF is tracked by
> >> two bits: one per-CPU and one per-vCPU.
> >> 
> >> The per-vCPU bit is always set when the vCPU shows up on a core, so
> >> there is no interesting state that's truly per-vCPU. Indeed, this is a
> >> requirement, since L1D is a part of the physical CPU.
> >> 
> >> So simplify this by combining the two bits.
> >> 
> >> The vCPU bit was being written from preemption-enabled regions. For
> >> those cases, use raw_cpu_write() (via a variant of the setter function)
> >> to avoid DEBUG_PREEMPT failures. If the vCPU is getting migrated, the
> >> CPU that gets its bit set in these paths is not important; vcpu_load()
> >> must always set it on the destination CPU before the guest is resumed.
> >> 
> >> Signed-off-by: Brendan Jackman <jackmanb@google.com>
> >> ---
> >
> > ...
> >
> >> @@ -78,6 +79,11 @@ static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
> >>  	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
> >>  }
> >>  
> >> +static __always_inline void kvm_set_cpu_l1tf_flush_l1d_raw(void)
> >> +{
> >> +	raw_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
> >> +}
> >
> > TL;DR: I'll post a v3 with a slightly tweaked version of this patch at the end.
> >
> > Rather than add a "raw" variant, I would rather have a wrapper in arch/x86/kvm/x86.h
> > that disables preemption, with a comment explaining why it's ok to enable preemption
> > after setting the per-CPU flag.  Without such a comment, choosing between the two
> > variants looks entirely random
> >
> > Alternatively, all writes could be raw, but that
> > feels wrong/weird, and in practice disabling preemption in the relevant paths is a
> > complete non-issue.
> 
> Hm, why does making every write _raw feel weird but adding
> preempt_disable() to every write doesn't? Both feel equally weird to me.

I completely agree that both approaches are odd/weird.

> But the latter has the additional weirdness of using preempt_disable()
> as a way to signal "I know what I'm doing", when that signal is already
> explicitly documented as the purpose of raw_cpu_write().

True.  Aha!

With the #ifdefs in place, KVM doesn't need arch/x86/include/asm/hardirq.h to
provide a wrapper.  irq_stat is already exported, the wrapper exists purely so
that kvm_set_cpu_l1tf_flush_l1d() can be invoked without callers having to check
CONFIG_KVM_INTEL.

Not yet tested, but how about this?

static __always_inline void kvm_request_l1tf_flush_l1d(void)
{
#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
	/*
	 * Use a raw write to set the per-CPU flag, as KVM will ensure a flush
	 * even if preemption is currently enabled..  If the current vCPU task
	 * is migrated to a different CPU (or userspace runs the vCPU on a
	 * different task) before the next VM-Entry, then kvm_arch_vcpu_load()
	 * will request a flush on the new CPU.
	 */
	raw_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
#endif
}
Re: [PATCH v2] KVM: x86: Unify L1TF flushing under per-CPU variable
Posted by Brendan Jackman 2 months ago
On Thu Oct 16, 2025 at 4:41 PM UTC, Sean Christopherson wrote:
> On Thu, Oct 16, 2025, Brendan Jackman wrote:
>> On Thu Oct 16, 2025 at 3:50 PM UTC, Sean Christopherson wrote:
>> > On Wed, Oct 15, 2025, Brendan Jackman wrote:
>> >> Currently the tracking of the need to flush L1D for L1TF is tracked by
>> >> two bits: one per-CPU and one per-vCPU.
>> >> 
>> >> The per-vCPU bit is always set when the vCPU shows up on a core, so
>> >> there is no interesting state that's truly per-vCPU. Indeed, this is a
>> >> requirement, since L1D is a part of the physical CPU.
>> >> 
>> >> So simplify this by combining the two bits.
>> >> 
>> >> The vCPU bit was being written from preemption-enabled regions. For
>> >> those cases, use raw_cpu_write() (via a variant of the setter function)
>> >> to avoid DEBUG_PREEMPT failures. If the vCPU is getting migrated, the
>> >> CPU that gets its bit set in these paths is not important; vcpu_load()
>> >> must always set it on the destination CPU before the guest is resumed.
>> >> 
>> >> Signed-off-by: Brendan Jackman <jackmanb@google.com>
>> >> ---
>> >
>> > ...
>> >
>> >> @@ -78,6 +79,11 @@ static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
>> >>  	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
>> >>  }
>> >>  
>> >> +static __always_inline void kvm_set_cpu_l1tf_flush_l1d_raw(void)
>> >> +{
>> >> +	raw_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
>> >> +}
>> >
>> > TL;DR: I'll post a v3 with a slightly tweaked version of this patch at the end.
>> >
>> > Rather than add a "raw" variant, I would rather have a wrapper in arch/x86/kvm/x86.h
>> > that disables preemption, with a comment explaining why it's ok to enable preemption
>> > after setting the per-CPU flag.  Without such a comment, choosing between the two
>> > variants looks entirely random
>> >
>> > Alternatively, all writes could be raw, but that
>> > feels wrong/weird, and in practice disabling preemption in the relevant paths is a
>> > complete non-issue.
>> 
>> Hm, why does making every write _raw feel weird but adding
>> preempt_disable() to every write doesn't? Both feel equally weird to me.
>
> I completely agree that both approaches are odd/weird.
>
>> But the latter has the additional weirdness of using preempt_disable()
>> as a way to signal "I know what I'm doing", when that signal is already
>> explicitly documented as the purpose of raw_cpu_write().
>
> True.  Aha!
>
> With the #ifdefs in place, KVM doesn't need arch/x86/include/asm/hardirq.h to
> provide a wrapper.  irq_stat is already exported, the wrapper exists purely so
> that kvm_set_cpu_l1tf_flush_l1d() can be invoked without callers having to check
> CONFIG_KVM_INTEL.
>
> Not yet tested, but how about this?
>
> static __always_inline void kvm_request_l1tf_flush_l1d(void)
> {
> #if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
> 	/*
> 	 * Use a raw write to set the per-CPU flag, as KVM will ensure a flush
> 	 * even if preemption is currently enabled..  If the current vCPU task
> 	 * is migrated to a different CPU (or userspace runs the vCPU on a
> 	 * different task) before the next VM-Entry, then kvm_arch_vcpu_load()
> 	 * will request a flush on the new CPU.
> 	 */
> 	raw_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
> #endif
> }

Yeah, just poking irq_stat directly seems fine to me.
Re: [PATCH v2] KVM: x86: Unify L1TF flushing under per-CPU variable
Posted by kernel test robot 2 months ago
Hi Brendan,

kernel test robot noticed the following build errors:

[auto build test ERROR on 6b36119b94d0b2bb8cea9d512017efafd461d6ac]

url:    https://github.com/intel-lab-lkp/linux/commits/Brendan-Jackman/KVM-x86-Unify-L1TF-flushing-under-per-CPU-variable/20251016-011539
base:   6b36119b94d0b2bb8cea9d512017efafd461d6ac
patch link:    https://lore.kernel.org/r/20251015-b4-l1tf-percpu-v2-1-6d7a8d3d40e9%40google.com
patch subject: [PATCH v2] KVM: x86: Unify L1TF flushing under per-CPU variable
config: i386-buildonly-randconfig-006-20251016 (https://download.01.org/0day-ci/archive/20251016/202510161649.QbfhDLy3-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251016/202510161649.QbfhDLy3-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202510161649.QbfhDLy3-lkp@intel.com/

All errors (new ones prefixed by >>):

   arch/x86/kvm/x86.c: In function 'kvm_write_guest_virt_system':
>> arch/x86/kvm/x86.c:8003:9: error: implicit declaration of function 'kvm_set_cpu_l1tf_flush_l1d_raw'; did you mean 'kvm_set_cpu_l1tf_flush_l1d'? [-Wimplicit-function-declaration]
    8003 |         kvm_set_cpu_l1tf_flush_l1d_raw();
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |         kvm_set_cpu_l1tf_flush_l1d


vim +8003 arch/x86/kvm/x86.c

  7998	
  7999	int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
  8000					unsigned int bytes, struct x86_exception *exception)
  8001	{
  8002		/* kvm_write_guest_virt_system can pull in tons of pages. */
> 8003		kvm_set_cpu_l1tf_flush_l1d_raw();
  8004	
  8005		return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
  8006						   PFERR_WRITE_MASK, exception);
  8007	}
  8008	EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_write_guest_virt_system);
  8009	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH v2] KVM: x86: Unify L1TF flushing under per-CPU variable
Posted by kernel test robot 2 months ago
Hi Brendan,

kernel test robot noticed the following build errors:

[auto build test ERROR on 6b36119b94d0b2bb8cea9d512017efafd461d6ac]

url:    https://github.com/intel-lab-lkp/linux/commits/Brendan-Jackman/KVM-x86-Unify-L1TF-flushing-under-per-CPU-variable/20251016-011539
base:   6b36119b94d0b2bb8cea9d512017efafd461d6ac
patch link:    https://lore.kernel.org/r/20251015-b4-l1tf-percpu-v2-1-6d7a8d3d40e9%40google.com
patch subject: [PATCH v2] KVM: x86: Unify L1TF flushing under per-CPU variable
config: i386-buildonly-randconfig-005-20251016 (https://download.01.org/0day-ci/archive/20251016/202510161648.7Ere0Lfz-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251016/202510161648.7Ere0Lfz-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202510161648.7Ere0Lfz-lkp@intel.com/

All errors (new ones prefixed by >>):

>> arch/x86/kvm/x86.c:8003:2: error: call to undeclared function 'kvm_set_cpu_l1tf_flush_l1d_raw'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
    8003 |         kvm_set_cpu_l1tf_flush_l1d_raw();
         |         ^
   arch/x86/kvm/x86.c:8003:2: note: did you mean 'kvm_set_cpu_l1tf_flush_l1d'?
   arch/x86/include/asm/hardirq.h:97:29: note: 'kvm_set_cpu_l1tf_flush_l1d' declared here
      97 | static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
         |                             ^
   arch/x86/kvm/x86.c:9399:2: error: call to undeclared function 'kvm_set_cpu_l1tf_flush_l1d_raw'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
    9399 |         kvm_set_cpu_l1tf_flush_l1d_raw();
         |         ^
   2 errors generated.


vim +/kvm_set_cpu_l1tf_flush_l1d_raw +8003 arch/x86/kvm/x86.c

  7998	
  7999	int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
  8000					unsigned int bytes, struct x86_exception *exception)
  8001	{
  8002		/* kvm_write_guest_virt_system can pull in tons of pages. */
> 8003		kvm_set_cpu_l1tf_flush_l1d_raw();
  8004	
  8005		return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
  8006						   PFERR_WRITE_MASK, exception);
  8007	}
  8008	EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_write_guest_virt_system);
  8009	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki