[PATCH v3 4/4] LoongArch: KVM: Add FPU delay load support

Bibo Mao posted 4 patches 6 days, 17 hours ago
[PATCH v3 4/4] LoongArch: KVM: Add FPU delay load support
Posted by Bibo Mao 6 days, 17 hours ago
FPU is lazy enabled with KVM hypervisor. After FPU is enabled and
loaded, vCPU can be preempted and FPU will be lost again, there will
be unnecessary FPU exception, load and store process. Here FPU is
delay load until guest enter entry.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
---
 arch/loongarch/include/asm/kvm_host.h |  2 ++
 arch/loongarch/kvm/exit.c             | 21 ++++++++++-----
 arch/loongarch/kvm/vcpu.c             | 37 ++++++++++++++++++---------
 3 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index e4fe5b8e8149..902ff7bc0e35 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -37,6 +37,7 @@
 #define KVM_REQ_TLB_FLUSH_GPA		KVM_ARCH_REQ(0)
 #define KVM_REQ_STEAL_UPDATE		KVM_ARCH_REQ(1)
 #define KVM_REQ_PMU			KVM_ARCH_REQ(2)
+#define KVM_REQ_FPU_LOAD		KVM_ARCH_REQ(3)
 
 #define KVM_GUESTDBG_SW_BP_MASK		\
 	(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
@@ -234,6 +235,7 @@ struct kvm_vcpu_arch {
 	u64 vpid;
 	gpa_t flush_gpa;
 
+	int fpu_load_type;
 	/* Frequency of stable timer in Hz */
 	u64 timer_mhz;
 	ktime_t expire;
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index 65ec10a7245a..62403c7c6f9a 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -754,7 +754,8 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu, int ecode)
 		return RESUME_HOST;
 	}
 
-	kvm_own_fpu(vcpu);
+	vcpu->arch.fpu_load_type = KVM_LARCH_FPU;
+	kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
 
 	return RESUME_GUEST;
 }
@@ -794,8 +795,10 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu, int ecode)
 {
 	if (!kvm_guest_has_lsx(&vcpu->arch))
 		kvm_queue_exception(vcpu, EXCCODE_INE, 0);
-	else
-		kvm_own_lsx(vcpu);
+	else {
+		vcpu->arch.fpu_load_type = KVM_LARCH_LSX;
+		kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
+	}
 
 	return RESUME_GUEST;
 }
@@ -812,8 +815,10 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu, int ecode)
 {
 	if (!kvm_guest_has_lasx(&vcpu->arch))
 		kvm_queue_exception(vcpu, EXCCODE_INE, 0);
-	else
-		kvm_own_lasx(vcpu);
+	else {
+		vcpu->arch.fpu_load_type = KVM_LARCH_LASX;
+		kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
+	}
 
 	return RESUME_GUEST;
 }
@@ -822,8 +827,10 @@ static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu, int ecode)
 {
 	if (!kvm_guest_has_lbt(&vcpu->arch))
 		kvm_queue_exception(vcpu, EXCCODE_INE, 0);
-	else
-		kvm_own_lbt(vcpu);
+	else {
+		vcpu->arch.fpu_load_type = KVM_LARCH_LBT;
+		kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
+	}
 
 	return RESUME_GUEST;
 }
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 995461d724b5..d05fe6c8f456 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -232,6 +232,31 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
 			kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa);
 			vcpu->arch.flush_gpa = INVALID_GPA;
 		}
+
+	if (kvm_check_request(KVM_REQ_FPU_LOAD, vcpu)) {
+		switch (vcpu->arch.fpu_load_type) {
+		case KVM_LARCH_FPU:
+			kvm_own_fpu(vcpu);
+			break;
+
+		case KVM_LARCH_LSX:
+			kvm_own_lsx(vcpu);
+			break;
+
+		case KVM_LARCH_LASX:
+			kvm_own_lasx(vcpu);
+			break;
+
+		case KVM_LARCH_LBT:
+			kvm_own_lbt(vcpu);
+			break;
+
+		default:
+			break;
+		}
+
+		vcpu->arch.fpu_load_type = 0;
+	}
 }
 
 /*
@@ -1286,13 +1311,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 #ifdef CONFIG_CPU_HAS_LBT
 int kvm_own_lbt(struct kvm_vcpu *vcpu)
 {
-	preempt_disable();
 	if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) {
 		set_csr_euen(CSR_EUEN_LBTEN);
 		_restore_lbt(&vcpu->arch.lbt);
 		vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
 	}
-	preempt_enable();
 
 	return 0;
 }
@@ -1335,8 +1358,6 @@ static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
 /* Enable FPU and restore context */
 void kvm_own_fpu(struct kvm_vcpu *vcpu)
 {
-	preempt_disable();
-
 	/*
 	 * Enable FPU for guest
 	 * Set FR and FRE according to guest context
@@ -1347,16 +1368,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
 	kvm_restore_fpu(&vcpu->arch.fpu);
 	vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
 	trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
-
-	preempt_enable();
 }
 
 #ifdef CONFIG_CPU_HAS_LSX
 /* Enable LSX and restore context */
 int kvm_own_lsx(struct kvm_vcpu *vcpu)
 {
-	preempt_disable();
-
 	/* Enable LSX for guest */
 	kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
 	set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
@@ -1378,7 +1395,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
 
 	trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
 	vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
-	preempt_enable();
 
 	return 0;
 }
@@ -1388,8 +1404,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
 /* Enable LASX and restore context */
 int kvm_own_lasx(struct kvm_vcpu *vcpu)
 {
-	preempt_disable();
-
 	kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
 	set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
 	switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
@@ -1411,7 +1425,6 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
 
 	trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
 	vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
-	preempt_enable();
 
 	return 0;
 }
-- 
2.39.3
Re: [PATCH v3 4/4] LoongArch: KVM: Add FPU delay load support
Posted by Huacai Chen 6 days, 16 hours ago
Hi, Bibo,

On Tue, Feb 3, 2026 at 11:31 AM Bibo Mao <maobibo@loongson.cn> wrote:
>
> FPU is lazy enabled with KVM hypervisor. After FPU is enabled and
> loaded, vCPU can be preempted and FPU will be lost again, there will
> be unnecessary FPU exception, load and store process. Here FPU is
> delay load until guest enter entry.
Calling LSX/LASX as FPU is a little strange, but somewhat reasonable.
Calling LBT as FPU is very strange. So I still like the V1 logic.

If you insist on this version, please rename KVM_REQ_FPU_LOAD to
KVM_REQ_AUX_LOAD and rename fpu_load_type to aux_type, which is
similar to aux_inuse.

Huacai

>
> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
> ---
>  arch/loongarch/include/asm/kvm_host.h |  2 ++
>  arch/loongarch/kvm/exit.c             | 21 ++++++++++-----
>  arch/loongarch/kvm/vcpu.c             | 37 ++++++++++++++++++---------
>  3 files changed, 41 insertions(+), 19 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> index e4fe5b8e8149..902ff7bc0e35 100644
> --- a/arch/loongarch/include/asm/kvm_host.h
> +++ b/arch/loongarch/include/asm/kvm_host.h
> @@ -37,6 +37,7 @@
>  #define KVM_REQ_TLB_FLUSH_GPA          KVM_ARCH_REQ(0)
>  #define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
>  #define KVM_REQ_PMU                    KVM_ARCH_REQ(2)
> +#define KVM_REQ_FPU_LOAD               KVM_ARCH_REQ(3)
>
>  #define KVM_GUESTDBG_SW_BP_MASK                \
>         (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
> @@ -234,6 +235,7 @@ struct kvm_vcpu_arch {
>         u64 vpid;
>         gpa_t flush_gpa;
>
> +       int fpu_load_type;
>         /* Frequency of stable timer in Hz */
>         u64 timer_mhz;
>         ktime_t expire;
> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
> index 65ec10a7245a..62403c7c6f9a 100644
> --- a/arch/loongarch/kvm/exit.c
> +++ b/arch/loongarch/kvm/exit.c
> @@ -754,7 +754,8 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu, int ecode)
>                 return RESUME_HOST;
>         }
>
> -       kvm_own_fpu(vcpu);
> +       vcpu->arch.fpu_load_type = KVM_LARCH_FPU;
> +       kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>
>         return RESUME_GUEST;
>  }
> @@ -794,8 +795,10 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu, int ecode)
>  {
>         if (!kvm_guest_has_lsx(&vcpu->arch))
>                 kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> -       else
> -               kvm_own_lsx(vcpu);
> +       else {
> +               vcpu->arch.fpu_load_type = KVM_LARCH_LSX;
> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> +       }
>
>         return RESUME_GUEST;
>  }
> @@ -812,8 +815,10 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu, int ecode)
>  {
>         if (!kvm_guest_has_lasx(&vcpu->arch))
>                 kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> -       else
> -               kvm_own_lasx(vcpu);
> +       else {
> +               vcpu->arch.fpu_load_type = KVM_LARCH_LASX;
> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> +       }
>
>         return RESUME_GUEST;
>  }
> @@ -822,8 +827,10 @@ static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu, int ecode)
>  {
>         if (!kvm_guest_has_lbt(&vcpu->arch))
>                 kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> -       else
> -               kvm_own_lbt(vcpu);
> +       else {
> +               vcpu->arch.fpu_load_type = KVM_LARCH_LBT;
> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> +       }
>
>         return RESUME_GUEST;
>  }
> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
> index 995461d724b5..d05fe6c8f456 100644
> --- a/arch/loongarch/kvm/vcpu.c
> +++ b/arch/loongarch/kvm/vcpu.c
> @@ -232,6 +232,31 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
>                         kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa);
>                         vcpu->arch.flush_gpa = INVALID_GPA;
>                 }
> +
> +       if (kvm_check_request(KVM_REQ_FPU_LOAD, vcpu)) {
> +               switch (vcpu->arch.fpu_load_type) {
> +               case KVM_LARCH_FPU:
> +                       kvm_own_fpu(vcpu);
> +                       break;
> +
> +               case KVM_LARCH_LSX:
> +                       kvm_own_lsx(vcpu);
> +                       break;
> +
> +               case KVM_LARCH_LASX:
> +                       kvm_own_lasx(vcpu);
> +                       break;
> +
> +               case KVM_LARCH_LBT:
> +                       kvm_own_lbt(vcpu);
> +                       break;
> +
> +               default:
> +                       break;
> +               }
> +
> +               vcpu->arch.fpu_load_type = 0;
> +       }
>  }
>
>  /*
> @@ -1286,13 +1311,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>  #ifdef CONFIG_CPU_HAS_LBT
>  int kvm_own_lbt(struct kvm_vcpu *vcpu)
>  {
> -       preempt_disable();
>         if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) {
>                 set_csr_euen(CSR_EUEN_LBTEN);
>                 _restore_lbt(&vcpu->arch.lbt);
>                 vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
>         }
> -       preempt_enable();
>
>         return 0;
>  }
> @@ -1335,8 +1358,6 @@ static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
>  /* Enable FPU and restore context */
>  void kvm_own_fpu(struct kvm_vcpu *vcpu)
>  {
> -       preempt_disable();
> -
>         /*
>          * Enable FPU for guest
>          * Set FR and FRE according to guest context
> @@ -1347,16 +1368,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
>         kvm_restore_fpu(&vcpu->arch.fpu);
>         vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
>         trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
> -
> -       preempt_enable();
>  }
>
>  #ifdef CONFIG_CPU_HAS_LSX
>  /* Enable LSX and restore context */
>  int kvm_own_lsx(struct kvm_vcpu *vcpu)
>  {
> -       preempt_disable();
> -
>         /* Enable LSX for guest */
>         kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
>         set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
> @@ -1378,7 +1395,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
>
>         trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
>         vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
> -       preempt_enable();
>
>         return 0;
>  }
> @@ -1388,8 +1404,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
>  /* Enable LASX and restore context */
>  int kvm_own_lasx(struct kvm_vcpu *vcpu)
>  {
> -       preempt_disable();
> -
>         kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
>         set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
>         switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
> @@ -1411,7 +1425,6 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
>
>         trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
>         vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
> -       preempt_enable();
>
>         return 0;
>  }
> --
> 2.39.3
>
>
Re: [PATCH v3 4/4] LoongArch: KVM: Add FPU delay load support
Posted by Bibo Mao 6 days, 14 hours ago

On 2026/2/3 下午12:15, Huacai Chen wrote:
> Hi, Bibo,
> 
> On Tue, Feb 3, 2026 at 11:31 AM Bibo Mao <maobibo@loongson.cn> wrote:
>>
>> FPU is lazy enabled with KVM hypervisor. After FPU is enabled and
>> loaded, vCPU can be preempted and FPU will be lost again, there will
>> be unnecessary FPU exception, load and store process. Here FPU is
>> delay load until guest enter entry.
> Calling LSX/LASX as FPU is a little strange, but somewhat reasonable.
> Calling LBT as FPU is very strange. So I still like the V1 logic.
yeap, LBT can use another different BIT and separate with FPU. It is 
actually normal use one bit + fpu type variant to represent different 
different FPU load requirement, such as 
TIF_FOREIGN_FPSTATE/TIF_NEED_FPU_LOAD on other architectures.

I think it is better to put int fpu_load_type in structure loongarch_fpu.

And there will be another optimization to avoid load FPU again if FPU HW 
is owned by current thread/vCPU, that will add last_cpu int type in 
structure loongarch_fpu also.

Regards
Bibo Mao
> 
> If you insist on this version, please rename KVM_REQ_FPU_LOAD to
> KVM_REQ_AUX_LOAD and rename fpu_load_type to aux_type, which is
> similar to aux_inuse.
> 
> Huacai
> 
>>
>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>> ---
>>   arch/loongarch/include/asm/kvm_host.h |  2 ++
>>   arch/loongarch/kvm/exit.c             | 21 ++++++++++-----
>>   arch/loongarch/kvm/vcpu.c             | 37 ++++++++++++++++++---------
>>   3 files changed, 41 insertions(+), 19 deletions(-)
>>
>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>> index e4fe5b8e8149..902ff7bc0e35 100644
>> --- a/arch/loongarch/include/asm/kvm_host.h
>> +++ b/arch/loongarch/include/asm/kvm_host.h
>> @@ -37,6 +37,7 @@
>>   #define KVM_REQ_TLB_FLUSH_GPA          KVM_ARCH_REQ(0)
>>   #define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
>>   #define KVM_REQ_PMU                    KVM_ARCH_REQ(2)
>> +#define KVM_REQ_FPU_LOAD               KVM_ARCH_REQ(3)
>>
>>   #define KVM_GUESTDBG_SW_BP_MASK                \
>>          (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
>> @@ -234,6 +235,7 @@ struct kvm_vcpu_arch {
>>          u64 vpid;
>>          gpa_t flush_gpa;
>>
>> +       int fpu_load_type;
>>          /* Frequency of stable timer in Hz */
>>          u64 timer_mhz;
>>          ktime_t expire;
>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
>> index 65ec10a7245a..62403c7c6f9a 100644
>> --- a/arch/loongarch/kvm/exit.c
>> +++ b/arch/loongarch/kvm/exit.c
>> @@ -754,7 +754,8 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu, int ecode)
>>                  return RESUME_HOST;
>>          }
>>
>> -       kvm_own_fpu(vcpu);
>> +       vcpu->arch.fpu_load_type = KVM_LARCH_FPU;
>> +       kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>
>>          return RESUME_GUEST;
>>   }
>> @@ -794,8 +795,10 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu, int ecode)
>>   {
>>          if (!kvm_guest_has_lsx(&vcpu->arch))
>>                  kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>> -       else
>> -               kvm_own_lsx(vcpu);
>> +       else {
>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LSX;
>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>> +       }
>>
>>          return RESUME_GUEST;
>>   }
>> @@ -812,8 +815,10 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu, int ecode)
>>   {
>>          if (!kvm_guest_has_lasx(&vcpu->arch))
>>                  kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>> -       else
>> -               kvm_own_lasx(vcpu);
>> +       else {
>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LASX;
>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>> +       }
>>
>>          return RESUME_GUEST;
>>   }
>> @@ -822,8 +827,10 @@ static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu, int ecode)
>>   {
>>          if (!kvm_guest_has_lbt(&vcpu->arch))
>>                  kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>> -       else
>> -               kvm_own_lbt(vcpu);
>> +       else {
>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LBT;
>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>> +       }
>>
>>          return RESUME_GUEST;
>>   }
>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>> index 995461d724b5..d05fe6c8f456 100644
>> --- a/arch/loongarch/kvm/vcpu.c
>> +++ b/arch/loongarch/kvm/vcpu.c
>> @@ -232,6 +232,31 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
>>                          kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa);
>>                          vcpu->arch.flush_gpa = INVALID_GPA;
>>                  }
>> +
>> +       if (kvm_check_request(KVM_REQ_FPU_LOAD, vcpu)) {
>> +               switch (vcpu->arch.fpu_load_type) {
>> +               case KVM_LARCH_FPU:
>> +                       kvm_own_fpu(vcpu);
>> +                       break;
>> +
>> +               case KVM_LARCH_LSX:
>> +                       kvm_own_lsx(vcpu);
>> +                       break;
>> +
>> +               case KVM_LARCH_LASX:
>> +                       kvm_own_lasx(vcpu);
>> +                       break;
>> +
>> +               case KVM_LARCH_LBT:
>> +                       kvm_own_lbt(vcpu);
>> +                       break;
>> +
>> +               default:
>> +                       break;
>> +               }
>> +
>> +               vcpu->arch.fpu_load_type = 0;
>> +       }
>>   }
>>
>>   /*
>> @@ -1286,13 +1311,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>>   #ifdef CONFIG_CPU_HAS_LBT
>>   int kvm_own_lbt(struct kvm_vcpu *vcpu)
>>   {
>> -       preempt_disable();
>>          if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) {
>>                  set_csr_euen(CSR_EUEN_LBTEN);
>>                  _restore_lbt(&vcpu->arch.lbt);
>>                  vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
>>          }
>> -       preempt_enable();
>>
>>          return 0;
>>   }
>> @@ -1335,8 +1358,6 @@ static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
>>   /* Enable FPU and restore context */
>>   void kvm_own_fpu(struct kvm_vcpu *vcpu)
>>   {
>> -       preempt_disable();
>> -
>>          /*
>>           * Enable FPU for guest
>>           * Set FR and FRE according to guest context
>> @@ -1347,16 +1368,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
>>          kvm_restore_fpu(&vcpu->arch.fpu);
>>          vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
>>          trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
>> -
>> -       preempt_enable();
>>   }
>>
>>   #ifdef CONFIG_CPU_HAS_LSX
>>   /* Enable LSX and restore context */
>>   int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>   {
>> -       preempt_disable();
>> -
>>          /* Enable LSX for guest */
>>          kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
>>          set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
>> @@ -1378,7 +1395,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>
>>          trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
>>          vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
>> -       preempt_enable();
>>
>>          return 0;
>>   }
>> @@ -1388,8 +1404,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>   /* Enable LASX and restore context */
>>   int kvm_own_lasx(struct kvm_vcpu *vcpu)
>>   {
>> -       preempt_disable();
>> -
>>          kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
>>          set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
>>          switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
>> @@ -1411,7 +1425,6 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
>>
>>          trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
>>          vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
>> -       preempt_enable();
>>
>>          return 0;
>>   }
>> --
>> 2.39.3
>>
>>

Re: [PATCH v3 4/4] LoongArch: KVM: Add FPU delay load support
Posted by Huacai Chen 6 days, 13 hours ago
On Tue, Feb 3, 2026 at 2:48 PM Bibo Mao <maobibo@loongson.cn> wrote:
>
>
>
> On 2026/2/3 下午12:15, Huacai Chen wrote:
> > Hi, Bibo,
> >
> > On Tue, Feb 3, 2026 at 11:31 AM Bibo Mao <maobibo@loongson.cn> wrote:
> >>
> >> FPU is lazy enabled with KVM hypervisor. After FPU is enabled and
> >> loaded, vCPU can be preempted and FPU will be lost again, there will
> >> be unnecessary FPU exception, load and store process. Here FPU is
> >> delay load until guest enter entry.
> > Calling LSX/LASX as FPU is a little strange, but somewhat reasonable.
> > Calling LBT as FPU is very strange. So I still like the V1 logic.
> yeap, LBT can use another different BIT and separate with FPU. It is
> actually normal use one bit + fpu type variant to represent different
> different FPU load requirement, such as
> TIF_FOREIGN_FPSTATE/TIF_NEED_FPU_LOAD on other architectures.
>
> I think it is better to put int fpu_load_type in structure loongarch_fpu.
>
> And there will be another optimization to avoid load FPU again if FPU HW
> is owned by current thread/vCPU, that will add last_cpu int type in
> structure loongarch_fpu also.
>
> Regards
> Bibo Mao
> >
> > If you insist on this version, please rename KVM_REQ_FPU_LOAD to
> > KVM_REQ_AUX_LOAD and rename fpu_load_type to aux_type, which is
> > similar to aux_inuse.
Then why not consider this?

Huacai

> >
> > Huacai
> >
> >>
> >> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
> >> ---
> >>   arch/loongarch/include/asm/kvm_host.h |  2 ++
> >>   arch/loongarch/kvm/exit.c             | 21 ++++++++++-----
> >>   arch/loongarch/kvm/vcpu.c             | 37 ++++++++++++++++++---------
> >>   3 files changed, 41 insertions(+), 19 deletions(-)
> >>
> >> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> >> index e4fe5b8e8149..902ff7bc0e35 100644
> >> --- a/arch/loongarch/include/asm/kvm_host.h
> >> +++ b/arch/loongarch/include/asm/kvm_host.h
> >> @@ -37,6 +37,7 @@
> >>   #define KVM_REQ_TLB_FLUSH_GPA          KVM_ARCH_REQ(0)
> >>   #define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
> >>   #define KVM_REQ_PMU                    KVM_ARCH_REQ(2)
> >> +#define KVM_REQ_FPU_LOAD               KVM_ARCH_REQ(3)
> >>
> >>   #define KVM_GUESTDBG_SW_BP_MASK                \
> >>          (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
> >> @@ -234,6 +235,7 @@ struct kvm_vcpu_arch {
> >>          u64 vpid;
> >>          gpa_t flush_gpa;
> >>
> >> +       int fpu_load_type;
> >>          /* Frequency of stable timer in Hz */
> >>          u64 timer_mhz;
> >>          ktime_t expire;
> >> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
> >> index 65ec10a7245a..62403c7c6f9a 100644
> >> --- a/arch/loongarch/kvm/exit.c
> >> +++ b/arch/loongarch/kvm/exit.c
> >> @@ -754,7 +754,8 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>                  return RESUME_HOST;
> >>          }
> >>
> >> -       kvm_own_fpu(vcpu);
> >> +       vcpu->arch.fpu_load_type = KVM_LARCH_FPU;
> >> +       kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >>
> >>          return RESUME_GUEST;
> >>   }
> >> @@ -794,8 +795,10 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>   {
> >>          if (!kvm_guest_has_lsx(&vcpu->arch))
> >>                  kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> >> -       else
> >> -               kvm_own_lsx(vcpu);
> >> +       else {
> >> +               vcpu->arch.fpu_load_type = KVM_LARCH_LSX;
> >> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >> +       }
> >>
> >>          return RESUME_GUEST;
> >>   }
> >> @@ -812,8 +815,10 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>   {
> >>          if (!kvm_guest_has_lasx(&vcpu->arch))
> >>                  kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> >> -       else
> >> -               kvm_own_lasx(vcpu);
> >> +       else {
> >> +               vcpu->arch.fpu_load_type = KVM_LARCH_LASX;
> >> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >> +       }
> >>
> >>          return RESUME_GUEST;
> >>   }
> >> @@ -822,8 +827,10 @@ static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>   {
> >>          if (!kvm_guest_has_lbt(&vcpu->arch))
> >>                  kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> >> -       else
> >> -               kvm_own_lbt(vcpu);
> >> +       else {
> >> +               vcpu->arch.fpu_load_type = KVM_LARCH_LBT;
> >> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >> +       }
> >>
> >>          return RESUME_GUEST;
> >>   }
> >> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
> >> index 995461d724b5..d05fe6c8f456 100644
> >> --- a/arch/loongarch/kvm/vcpu.c
> >> +++ b/arch/loongarch/kvm/vcpu.c
> >> @@ -232,6 +232,31 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
> >>                          kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa);
> >>                          vcpu->arch.flush_gpa = INVALID_GPA;
> >>                  }
> >> +
> >> +       if (kvm_check_request(KVM_REQ_FPU_LOAD, vcpu)) {
> >> +               switch (vcpu->arch.fpu_load_type) {
> >> +               case KVM_LARCH_FPU:
> >> +                       kvm_own_fpu(vcpu);
> >> +                       break;
> >> +
> >> +               case KVM_LARCH_LSX:
> >> +                       kvm_own_lsx(vcpu);
> >> +                       break;
> >> +
> >> +               case KVM_LARCH_LASX:
> >> +                       kvm_own_lasx(vcpu);
> >> +                       break;
> >> +
> >> +               case KVM_LARCH_LBT:
> >> +                       kvm_own_lbt(vcpu);
> >> +                       break;
> >> +
> >> +               default:
> >> +                       break;
> >> +               }
> >> +
> >> +               vcpu->arch.fpu_load_type = 0;
> >> +       }
> >>   }
> >>
> >>   /*
> >> @@ -1286,13 +1311,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> >>   #ifdef CONFIG_CPU_HAS_LBT
> >>   int kvm_own_lbt(struct kvm_vcpu *vcpu)
> >>   {
> >> -       preempt_disable();
> >>          if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) {
> >>                  set_csr_euen(CSR_EUEN_LBTEN);
> >>                  _restore_lbt(&vcpu->arch.lbt);
> >>                  vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
> >>          }
> >> -       preempt_enable();
> >>
> >>          return 0;
> >>   }
> >> @@ -1335,8 +1358,6 @@ static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
> >>   /* Enable FPU and restore context */
> >>   void kvm_own_fpu(struct kvm_vcpu *vcpu)
> >>   {
> >> -       preempt_disable();
> >> -
> >>          /*
> >>           * Enable FPU for guest
> >>           * Set FR and FRE according to guest context
> >> @@ -1347,16 +1368,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
> >>          kvm_restore_fpu(&vcpu->arch.fpu);
> >>          vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
> >>          trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
> >> -
> >> -       preempt_enable();
> >>   }
> >>
> >>   #ifdef CONFIG_CPU_HAS_LSX
> >>   /* Enable LSX and restore context */
> >>   int kvm_own_lsx(struct kvm_vcpu *vcpu)
> >>   {
> >> -       preempt_disable();
> >> -
> >>          /* Enable LSX for guest */
> >>          kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
> >>          set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
> >> @@ -1378,7 +1395,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
> >>
> >>          trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
> >>          vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
> >> -       preempt_enable();
> >>
> >>          return 0;
> >>   }
> >> @@ -1388,8 +1404,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
> >>   /* Enable LASX and restore context */
> >>   int kvm_own_lasx(struct kvm_vcpu *vcpu)
> >>   {
> >> -       preempt_disable();
> >> -
> >>          kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
> >>          set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
> >>          switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
> >> @@ -1411,7 +1425,6 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
> >>
> >>          trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
> >>          vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
> >> -       preempt_enable();
> >>
> >>          return 0;
> >>   }
> >> --
> >> 2.39.3
> >>
> >>
>
>
Re: [PATCH v3 4/4] LoongArch: KVM: Add FPU delay load support
Posted by Bibo Mao 6 days, 13 hours ago

On 2026/2/3 下午3:34, Huacai Chen wrote:
> On Tue, Feb 3, 2026 at 2:48 PM Bibo Mao <maobibo@loongson.cn> wrote:
>>
>>
>>
>> On 2026/2/3 下午12:15, Huacai Chen wrote:
>>> Hi, Bibo,
>>>
>>> On Tue, Feb 3, 2026 at 11:31 AM Bibo Mao <maobibo@loongson.cn> wrote:
>>>>
>>>> FPU is lazy enabled with KVM hypervisor. After FPU is enabled and
>>>> loaded, vCPU can be preempted and FPU will be lost again, there will
>>>> be unnecessary FPU exception, load and store process. Here FPU is
>>>> delay load until guest enter entry.
>>> Calling LSX/LASX as FPU is a little strange, but somewhat reasonable.
>>> Calling LBT as FPU is very strange. So I still like the V1 logic.
>> yeap, LBT can use another different BIT and separate with FPU. It is
>> actually normal use one bit + fpu type variant to represent different
>> different FPU load requirement, such as
>> TIF_FOREIGN_FPSTATE/TIF_NEED_FPU_LOAD on other architectures.
>>
>> I think it is better to put int fpu_load_type in structure loongarch_fpu.
>>
>> And there will be another optimization to avoid load FPU again if FPU HW
>> is owned by current thread/vCPU, that will add last_cpu int type in
>> structure loongarch_fpu also.
>>
>> Regards
>> Bibo Mao
>>>
>>> If you insist on this version, please rename KVM_REQ_FPU_LOAD to
>>> KVM_REQ_AUX_LOAD and rename fpu_load_type to aux_type, which is
>>> similar to aux_inuse.
> Then why not consider this?
this can work now. However there is two different structure struct 
loongarch_fpu and struct loongarch_lbt.

1. If kernel wants to use late FPU load, new element fpu_load_type can 
be added in struct loongarch_fpu for both user app/KVM.

2. With further optimization, FPU HW can own by user app/kernel/KVM, 
there will be another last_cpu int type added in struct loongarch_fpu.

Regards
Bibo Mao

Regards
Bibo Mao

> 
> Huacai
> 
>>>
>>> Huacai
>>>
>>>>
>>>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>>>> ---
>>>>    arch/loongarch/include/asm/kvm_host.h |  2 ++
>>>>    arch/loongarch/kvm/exit.c             | 21 ++++++++++-----
>>>>    arch/loongarch/kvm/vcpu.c             | 37 ++++++++++++++++++---------
>>>>    3 files changed, 41 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>>>> index e4fe5b8e8149..902ff7bc0e35 100644
>>>> --- a/arch/loongarch/include/asm/kvm_host.h
>>>> +++ b/arch/loongarch/include/asm/kvm_host.h
>>>> @@ -37,6 +37,7 @@
>>>>    #define KVM_REQ_TLB_FLUSH_GPA          KVM_ARCH_REQ(0)
>>>>    #define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
>>>>    #define KVM_REQ_PMU                    KVM_ARCH_REQ(2)
>>>> +#define KVM_REQ_FPU_LOAD               KVM_ARCH_REQ(3)
>>>>
>>>>    #define KVM_GUESTDBG_SW_BP_MASK                \
>>>>           (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
>>>> @@ -234,6 +235,7 @@ struct kvm_vcpu_arch {
>>>>           u64 vpid;
>>>>           gpa_t flush_gpa;
>>>>
>>>> +       int fpu_load_type;
>>>>           /* Frequency of stable timer in Hz */
>>>>           u64 timer_mhz;
>>>>           ktime_t expire;
>>>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
>>>> index 65ec10a7245a..62403c7c6f9a 100644
>>>> --- a/arch/loongarch/kvm/exit.c
>>>> +++ b/arch/loongarch/kvm/exit.c
>>>> @@ -754,7 +754,8 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>                   return RESUME_HOST;
>>>>           }
>>>>
>>>> -       kvm_own_fpu(vcpu);
>>>> +       vcpu->arch.fpu_load_type = KVM_LARCH_FPU;
>>>> +       kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>
>>>>           return RESUME_GUEST;
>>>>    }
>>>> @@ -794,8 +795,10 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>    {
>>>>           if (!kvm_guest_has_lsx(&vcpu->arch))
>>>>                   kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>>>> -       else
>>>> -               kvm_own_lsx(vcpu);
>>>> +       else {
>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LSX;
>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>> +       }
>>>>
>>>>           return RESUME_GUEST;
>>>>    }
>>>> @@ -812,8 +815,10 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>    {
>>>>           if (!kvm_guest_has_lasx(&vcpu->arch))
>>>>                   kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>>>> -       else
>>>> -               kvm_own_lasx(vcpu);
>>>> +       else {
>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LASX;
>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>> +       }
>>>>
>>>>           return RESUME_GUEST;
>>>>    }
>>>> @@ -822,8 +827,10 @@ static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>    {
>>>>           if (!kvm_guest_has_lbt(&vcpu->arch))
>>>>                   kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>>>> -       else
>>>> -               kvm_own_lbt(vcpu);
>>>> +       else {
>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LBT;
>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>> +       }
>>>>
>>>>           return RESUME_GUEST;
>>>>    }
>>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>>>> index 995461d724b5..d05fe6c8f456 100644
>>>> --- a/arch/loongarch/kvm/vcpu.c
>>>> +++ b/arch/loongarch/kvm/vcpu.c
>>>> @@ -232,6 +232,31 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
>>>>                           kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa);
>>>>                           vcpu->arch.flush_gpa = INVALID_GPA;
>>>>                   }
>>>> +
>>>> +       if (kvm_check_request(KVM_REQ_FPU_LOAD, vcpu)) {
>>>> +               switch (vcpu->arch.fpu_load_type) {
>>>> +               case KVM_LARCH_FPU:
>>>> +                       kvm_own_fpu(vcpu);
>>>> +                       break;
>>>> +
>>>> +               case KVM_LARCH_LSX:
>>>> +                       kvm_own_lsx(vcpu);
>>>> +                       break;
>>>> +
>>>> +               case KVM_LARCH_LASX:
>>>> +                       kvm_own_lasx(vcpu);
>>>> +                       break;
>>>> +
>>>> +               case KVM_LARCH_LBT:
>>>> +                       kvm_own_lbt(vcpu);
>>>> +                       break;
>>>> +
>>>> +               default:
>>>> +                       break;
>>>> +               }
>>>> +
>>>> +               vcpu->arch.fpu_load_type = 0;
>>>> +       }
>>>>    }
>>>>
>>>>    /*
>>>> @@ -1286,13 +1311,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>>>>    #ifdef CONFIG_CPU_HAS_LBT
>>>>    int kvm_own_lbt(struct kvm_vcpu *vcpu)
>>>>    {
>>>> -       preempt_disable();
>>>>           if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) {
>>>>                   set_csr_euen(CSR_EUEN_LBTEN);
>>>>                   _restore_lbt(&vcpu->arch.lbt);
>>>>                   vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
>>>>           }
>>>> -       preempt_enable();
>>>>
>>>>           return 0;
>>>>    }
>>>> @@ -1335,8 +1358,6 @@ static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
>>>>    /* Enable FPU and restore context */
>>>>    void kvm_own_fpu(struct kvm_vcpu *vcpu)
>>>>    {
>>>> -       preempt_disable();
>>>> -
>>>>           /*
>>>>            * Enable FPU for guest
>>>>            * Set FR and FRE according to guest context
>>>> @@ -1347,16 +1368,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
>>>>           kvm_restore_fpu(&vcpu->arch.fpu);
>>>>           vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
>>>>           trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
>>>> -
>>>> -       preempt_enable();
>>>>    }
>>>>
>>>>    #ifdef CONFIG_CPU_HAS_LSX
>>>>    /* Enable LSX and restore context */
>>>>    int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>>>    {
>>>> -       preempt_disable();
>>>> -
>>>>           /* Enable LSX for guest */
>>>>           kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
>>>>           set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
>>>> @@ -1378,7 +1395,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>>>
>>>>           trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
>>>>           vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
>>>> -       preempt_enable();
>>>>
>>>>           return 0;
>>>>    }
>>>> @@ -1388,8 +1404,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>>>    /* Enable LASX and restore context */
>>>>    int kvm_own_lasx(struct kvm_vcpu *vcpu)
>>>>    {
>>>> -       preempt_disable();
>>>> -
>>>>           kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
>>>>           set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
>>>>           switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
>>>> @@ -1411,7 +1425,6 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
>>>>
>>>>           trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
>>>>           vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
>>>> -       preempt_enable();
>>>>
>>>>           return 0;
>>>>    }
>>>> --
>>>> 2.39.3
>>>>
>>>>
>>
>>

Re: [PATCH v3 4/4] LoongArch: KVM: Add FPU delay load support
Posted by Huacai Chen 6 days, 12 hours ago
On Tue, Feb 3, 2026 at 3:51 PM Bibo Mao <maobibo@loongson.cn> wrote:
>
>
>
> On 2026/2/3 下午3:34, Huacai Chen wrote:
> > On Tue, Feb 3, 2026 at 2:48 PM Bibo Mao <maobibo@loongson.cn> wrote:
> >>
> >>
> >>
> >> On 2026/2/3 下午12:15, Huacai Chen wrote:
> >>> Hi, Bibo,
> >>>
> >>> On Tue, Feb 3, 2026 at 11:31 AM Bibo Mao <maobibo@loongson.cn> wrote:
> >>>>
> >>>> FPU is lazy enabled with KVM hypervisor. After FPU is enabled and
> >>>> loaded, vCPU can be preempted and FPU will be lost again, there will
> >>>> be unnecessary FPU exception, load and store process. Here FPU is
> >>>> delay load until guest enter entry.
> >>> Calling LSX/LASX as FPU is a little strange, but somewhat reasonable.
> >>> Calling LBT as FPU is very strange. So I still like the V1 logic.
> >> yeap, LBT can use another different BIT and separate with FPU. It is
> >> actually normal use one bit + fpu type variant to represent different
> >> different FPU load requirement, such as
> >> TIF_FOREIGN_FPSTATE/TIF_NEED_FPU_LOAD on other architectures.
> >>
> >> I think it is better to put int fpu_load_type in structure loongarch_fpu.
> >>
> >> And there will be another optimization to avoid load FPU again if FPU HW
> >> is owned by current thread/vCPU, that will add last_cpu int type in
> >> structure loongarch_fpu also.
> >>
> >> Regards
> >> Bibo Mao
> >>>
> >>> If you insist on this version, please rename KVM_REQ_FPU_LOAD to
> >>> KVM_REQ_AUX_LOAD and rename fpu_load_type to aux_type, which is
> >>> similar to aux_inuse.
> > Then why not consider this?
> this can work now. However there is two different structure struct
> loongarch_fpu and struct loongarch_lbt.
Yes, but two structures don't block us from using KVM_REQ_AUX_LOAD and
aux_type to abstract both FPU and LBT, which is similar to aux_inuse.
>
> 1. If kernel wants to use late FPU load, new element fpu_load_type can
> be added in struct loongarch_fpu for both user app/KVM.
>
> 2. With further optimization, FPU HW can own by user app/kernel/KVM,
> there will be another last_cpu int type added in struct loongarch_fpu.
Both loongarch_fpu and loongarch_lbt are register copies, so adding
fpu_load_type/last_cpu is not a good idea.


Huacai
>
> Regards
> Bibo Mao
>
> Regards
> Bibo Mao
>
> >
> > Huacai
> >
> >>>
> >>> Huacai
> >>>
> >>>>
> >>>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
> >>>> ---
> >>>>    arch/loongarch/include/asm/kvm_host.h |  2 ++
> >>>>    arch/loongarch/kvm/exit.c             | 21 ++++++++++-----
> >>>>    arch/loongarch/kvm/vcpu.c             | 37 ++++++++++++++++++---------
> >>>>    3 files changed, 41 insertions(+), 19 deletions(-)
> >>>>
> >>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> >>>> index e4fe5b8e8149..902ff7bc0e35 100644
> >>>> --- a/arch/loongarch/include/asm/kvm_host.h
> >>>> +++ b/arch/loongarch/include/asm/kvm_host.h
> >>>> @@ -37,6 +37,7 @@
> >>>>    #define KVM_REQ_TLB_FLUSH_GPA          KVM_ARCH_REQ(0)
> >>>>    #define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
> >>>>    #define KVM_REQ_PMU                    KVM_ARCH_REQ(2)
> >>>> +#define KVM_REQ_FPU_LOAD               KVM_ARCH_REQ(3)
> >>>>
> >>>>    #define KVM_GUESTDBG_SW_BP_MASK                \
> >>>>           (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
> >>>> @@ -234,6 +235,7 @@ struct kvm_vcpu_arch {
> >>>>           u64 vpid;
> >>>>           gpa_t flush_gpa;
> >>>>
> >>>> +       int fpu_load_type;
> >>>>           /* Frequency of stable timer in Hz */
> >>>>           u64 timer_mhz;
> >>>>           ktime_t expire;
> >>>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
> >>>> index 65ec10a7245a..62403c7c6f9a 100644
> >>>> --- a/arch/loongarch/kvm/exit.c
> >>>> +++ b/arch/loongarch/kvm/exit.c
> >>>> @@ -754,7 +754,8 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>>>                   return RESUME_HOST;
> >>>>           }
> >>>>
> >>>> -       kvm_own_fpu(vcpu);
> >>>> +       vcpu->arch.fpu_load_type = KVM_LARCH_FPU;
> >>>> +       kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >>>>
> >>>>           return RESUME_GUEST;
> >>>>    }
> >>>> @@ -794,8 +795,10 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>>>    {
> >>>>           if (!kvm_guest_has_lsx(&vcpu->arch))
> >>>>                   kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> >>>> -       else
> >>>> -               kvm_own_lsx(vcpu);
> >>>> +       else {
> >>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LSX;
> >>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >>>> +       }
> >>>>
> >>>>           return RESUME_GUEST;
> >>>>    }
> >>>> @@ -812,8 +815,10 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>>>    {
> >>>>           if (!kvm_guest_has_lasx(&vcpu->arch))
> >>>>                   kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> >>>> -       else
> >>>> -               kvm_own_lasx(vcpu);
> >>>> +       else {
> >>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LASX;
> >>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >>>> +       }
> >>>>
> >>>>           return RESUME_GUEST;
> >>>>    }
> >>>> @@ -822,8 +827,10 @@ static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>>>    {
> >>>>           if (!kvm_guest_has_lbt(&vcpu->arch))
> >>>>                   kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> >>>> -       else
> >>>> -               kvm_own_lbt(vcpu);
> >>>> +       else {
> >>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LBT;
> >>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >>>> +       }
> >>>>
> >>>>           return RESUME_GUEST;
> >>>>    }
> >>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
> >>>> index 995461d724b5..d05fe6c8f456 100644
> >>>> --- a/arch/loongarch/kvm/vcpu.c
> >>>> +++ b/arch/loongarch/kvm/vcpu.c
> >>>> @@ -232,6 +232,31 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
> >>>>                           kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa);
> >>>>                           vcpu->arch.flush_gpa = INVALID_GPA;
> >>>>                   }
> >>>> +
> >>>> +       if (kvm_check_request(KVM_REQ_FPU_LOAD, vcpu)) {
> >>>> +               switch (vcpu->arch.fpu_load_type) {
> >>>> +               case KVM_LARCH_FPU:
> >>>> +                       kvm_own_fpu(vcpu);
> >>>> +                       break;
> >>>> +
> >>>> +               case KVM_LARCH_LSX:
> >>>> +                       kvm_own_lsx(vcpu);
> >>>> +                       break;
> >>>> +
> >>>> +               case KVM_LARCH_LASX:
> >>>> +                       kvm_own_lasx(vcpu);
> >>>> +                       break;
> >>>> +
> >>>> +               case KVM_LARCH_LBT:
> >>>> +                       kvm_own_lbt(vcpu);
> >>>> +                       break;
> >>>> +
> >>>> +               default:
> >>>> +                       break;
> >>>> +               }
> >>>> +
> >>>> +               vcpu->arch.fpu_load_type = 0;
> >>>> +       }
> >>>>    }
> >>>>
> >>>>    /*
> >>>> @@ -1286,13 +1311,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> >>>>    #ifdef CONFIG_CPU_HAS_LBT
> >>>>    int kvm_own_lbt(struct kvm_vcpu *vcpu)
> >>>>    {
> >>>> -       preempt_disable();
> >>>>           if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) {
> >>>>                   set_csr_euen(CSR_EUEN_LBTEN);
> >>>>                   _restore_lbt(&vcpu->arch.lbt);
> >>>>                   vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
> >>>>           }
> >>>> -       preempt_enable();
> >>>>
> >>>>           return 0;
> >>>>    }
> >>>> @@ -1335,8 +1358,6 @@ static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
> >>>>    /* Enable FPU and restore context */
> >>>>    void kvm_own_fpu(struct kvm_vcpu *vcpu)
> >>>>    {
> >>>> -       preempt_disable();
> >>>> -
> >>>>           /*
> >>>>            * Enable FPU for guest
> >>>>            * Set FR and FRE according to guest context
> >>>> @@ -1347,16 +1368,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
> >>>>           kvm_restore_fpu(&vcpu->arch.fpu);
> >>>>           vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
> >>>>           trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
> >>>> -
> >>>> -       preempt_enable();
> >>>>    }
> >>>>
> >>>>    #ifdef CONFIG_CPU_HAS_LSX
> >>>>    /* Enable LSX and restore context */
> >>>>    int kvm_own_lsx(struct kvm_vcpu *vcpu)
> >>>>    {
> >>>> -       preempt_disable();
> >>>> -
> >>>>           /* Enable LSX for guest */
> >>>>           kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
> >>>>           set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
> >>>> @@ -1378,7 +1395,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
> >>>>
> >>>>           trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
> >>>>           vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
> >>>> -       preempt_enable();
> >>>>
> >>>>           return 0;
> >>>>    }
> >>>> @@ -1388,8 +1404,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
> >>>>    /* Enable LASX and restore context */
> >>>>    int kvm_own_lasx(struct kvm_vcpu *vcpu)
> >>>>    {
> >>>> -       preempt_disable();
> >>>> -
> >>>>           kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
> >>>>           set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
> >>>>           switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
> >>>> @@ -1411,7 +1425,6 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
> >>>>
> >>>>           trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
> >>>>           vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
> >>>> -       preempt_enable();
> >>>>
> >>>>           return 0;
> >>>>    }
> >>>> --
> >>>> 2.39.3
> >>>>
> >>>>
> >>
> >>
>
>
Re: [PATCH v3 4/4] LoongArch: KVM: Add FPU delay load support
Posted by Bibo Mao 6 days, 11 hours ago

On 2026/2/3 下午4:50, Huacai Chen wrote:
> On Tue, Feb 3, 2026 at 3:51 PM Bibo Mao <maobibo@loongson.cn> wrote:
>>
>>
>>
>> On 2026/2/3 下午3:34, Huacai Chen wrote:
>>> On Tue, Feb 3, 2026 at 2:48 PM Bibo Mao <maobibo@loongson.cn> wrote:
>>>>
>>>>
>>>>
>>>> On 2026/2/3 下午12:15, Huacai Chen wrote:
>>>>> Hi, Bibo,
>>>>>
>>>>> On Tue, Feb 3, 2026 at 11:31 AM Bibo Mao <maobibo@loongson.cn> wrote:
>>>>>>
>>>>>> FPU is lazy enabled with KVM hypervisor. After FPU is enabled and
>>>>>> loaded, vCPU can be preempted and FPU will be lost again, there will
>>>>>> be unnecessary FPU exception, load and store process. Here FPU is
>>>>>> delay load until guest enter entry.
>>>>> Calling LSX/LASX as FPU is a little strange, but somewhat reasonable.
>>>>> Calling LBT as FPU is very strange. So I still like the V1 logic.
>>>> yeap, LBT can use another different BIT and separate with FPU. It is
>>>> actually normal use one bit + fpu type variant to represent different
>>>> different FPU load requirement, such as
>>>> TIF_FOREIGN_FPSTATE/TIF_NEED_FPU_LOAD on other architectures.
>>>>
>>>> I think it is better to put int fpu_load_type in structure loongarch_fpu.
>>>>
>>>> And there will be another optimization to avoid load FPU again if FPU HW
>>>> is owned by current thread/vCPU, that will add last_cpu int type in
>>>> structure loongarch_fpu also.
>>>>
>>>> Regards
>>>> Bibo Mao
>>>>>
>>>>> If you insist on this version, please rename KVM_REQ_FPU_LOAD to
>>>>> KVM_REQ_AUX_LOAD and rename fpu_load_type to aux_type, which is
>>>>> similar to aux_inuse.
>>> Then why not consider this?
>> this can work now. However there is two different structure struct
>> loongarch_fpu and struct loongarch_lbt.
> Yes, but two structures don't block us from using KVM_REQ_AUX_LOAD and
> aux_type to abstract both FPU and LBT, which is similar to aux_inuse.
>>
>> 1. If kernel wants to use late FPU load, new element fpu_load_type can
>> be added in struct loongarch_fpu for both user app/KVM.
where aux_type is put for kernel/kvm? Put it in thread structure with 
kernel late FPU load and vcpu.arch with KVM late FPU load?
>>
>> 2. With further optimization, FPU HW can own by user app/kernel/KVM,
>> there will be another last_cpu int type added in struct loongarch_fpu.
> Both loongarch_fpu and loongarch_lbt are register copies, so adding
> fpu_load_type/last_cpu is not a good idea.
If vCPU using FPU is preempted by kernel thread and kernel thread does 
not use FPU, HW FPU is the same with SW FPU state, HW FPU load can be 
skipped.

BTW do you ever investigate FPU load/save process on other general 
architectures except MIPS?

Regards
Bibo Mao
> 
> 
> Huacai
>>
>> Regards
>> Bibo Mao
>>
>> Regards
>> Bibo Mao
>>
>>>
>>> Huacai
>>>
>>>>>
>>>>> Huacai
>>>>>
>>>>>>
>>>>>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>>>>>> ---
>>>>>>     arch/loongarch/include/asm/kvm_host.h |  2 ++
>>>>>>     arch/loongarch/kvm/exit.c             | 21 ++++++++++-----
>>>>>>     arch/loongarch/kvm/vcpu.c             | 37 ++++++++++++++++++---------
>>>>>>     3 files changed, 41 insertions(+), 19 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>>>>>> index e4fe5b8e8149..902ff7bc0e35 100644
>>>>>> --- a/arch/loongarch/include/asm/kvm_host.h
>>>>>> +++ b/arch/loongarch/include/asm/kvm_host.h
>>>>>> @@ -37,6 +37,7 @@
>>>>>>     #define KVM_REQ_TLB_FLUSH_GPA          KVM_ARCH_REQ(0)
>>>>>>     #define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
>>>>>>     #define KVM_REQ_PMU                    KVM_ARCH_REQ(2)
>>>>>> +#define KVM_REQ_FPU_LOAD               KVM_ARCH_REQ(3)
>>>>>>
>>>>>>     #define KVM_GUESTDBG_SW_BP_MASK                \
>>>>>>            (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
>>>>>> @@ -234,6 +235,7 @@ struct kvm_vcpu_arch {
>>>>>>            u64 vpid;
>>>>>>            gpa_t flush_gpa;
>>>>>>
>>>>>> +       int fpu_load_type;
>>>>>>            /* Frequency of stable timer in Hz */
>>>>>>            u64 timer_mhz;
>>>>>>            ktime_t expire;
>>>>>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
>>>>>> index 65ec10a7245a..62403c7c6f9a 100644
>>>>>> --- a/arch/loongarch/kvm/exit.c
>>>>>> +++ b/arch/loongarch/kvm/exit.c
>>>>>> @@ -754,7 +754,8 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>>>                    return RESUME_HOST;
>>>>>>            }
>>>>>>
>>>>>> -       kvm_own_fpu(vcpu);
>>>>>> +       vcpu->arch.fpu_load_type = KVM_LARCH_FPU;
>>>>>> +       kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>>>
>>>>>>            return RESUME_GUEST;
>>>>>>     }
>>>>>> @@ -794,8 +795,10 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>>>     {
>>>>>>            if (!kvm_guest_has_lsx(&vcpu->arch))
>>>>>>                    kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>>>>>> -       else
>>>>>> -               kvm_own_lsx(vcpu);
>>>>>> +       else {
>>>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LSX;
>>>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>>> +       }
>>>>>>
>>>>>>            return RESUME_GUEST;
>>>>>>     }
>>>>>> @@ -812,8 +815,10 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>>>     {
>>>>>>            if (!kvm_guest_has_lasx(&vcpu->arch))
>>>>>>                    kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>>>>>> -       else
>>>>>> -               kvm_own_lasx(vcpu);
>>>>>> +       else {
>>>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LASX;
>>>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>>> +       }
>>>>>>
>>>>>>            return RESUME_GUEST;
>>>>>>     }
>>>>>> @@ -822,8 +827,10 @@ static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>>>     {
>>>>>>            if (!kvm_guest_has_lbt(&vcpu->arch))
>>>>>>                    kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>>>>>> -       else
>>>>>> -               kvm_own_lbt(vcpu);
>>>>>> +       else {
>>>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LBT;
>>>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>>> +       }
>>>>>>
>>>>>>            return RESUME_GUEST;
>>>>>>     }
>>>>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>>>>>> index 995461d724b5..d05fe6c8f456 100644
>>>>>> --- a/arch/loongarch/kvm/vcpu.c
>>>>>> +++ b/arch/loongarch/kvm/vcpu.c
>>>>>> @@ -232,6 +232,31 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
>>>>>>                            kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa);
>>>>>>                            vcpu->arch.flush_gpa = INVALID_GPA;
>>>>>>                    }
>>>>>> +
>>>>>> +       if (kvm_check_request(KVM_REQ_FPU_LOAD, vcpu)) {
>>>>>> +               switch (vcpu->arch.fpu_load_type) {
>>>>>> +               case KVM_LARCH_FPU:
>>>>>> +                       kvm_own_fpu(vcpu);
>>>>>> +                       break;
>>>>>> +
>>>>>> +               case KVM_LARCH_LSX:
>>>>>> +                       kvm_own_lsx(vcpu);
>>>>>> +                       break;
>>>>>> +
>>>>>> +               case KVM_LARCH_LASX:
>>>>>> +                       kvm_own_lasx(vcpu);
>>>>>> +                       break;
>>>>>> +
>>>>>> +               case KVM_LARCH_LBT:
>>>>>> +                       kvm_own_lbt(vcpu);
>>>>>> +                       break;
>>>>>> +
>>>>>> +               default:
>>>>>> +                       break;
>>>>>> +               }
>>>>>> +
>>>>>> +               vcpu->arch.fpu_load_type = 0;
>>>>>> +       }
>>>>>>     }
>>>>>>
>>>>>>     /*
>>>>>> @@ -1286,13 +1311,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>>>>>>     #ifdef CONFIG_CPU_HAS_LBT
>>>>>>     int kvm_own_lbt(struct kvm_vcpu *vcpu)
>>>>>>     {
>>>>>> -       preempt_disable();
>>>>>>            if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) {
>>>>>>                    set_csr_euen(CSR_EUEN_LBTEN);
>>>>>>                    _restore_lbt(&vcpu->arch.lbt);
>>>>>>                    vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
>>>>>>            }
>>>>>> -       preempt_enable();
>>>>>>
>>>>>>            return 0;
>>>>>>     }
>>>>>> @@ -1335,8 +1358,6 @@ static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
>>>>>>     /* Enable FPU and restore context */
>>>>>>     void kvm_own_fpu(struct kvm_vcpu *vcpu)
>>>>>>     {
>>>>>> -       preempt_disable();
>>>>>> -
>>>>>>            /*
>>>>>>             * Enable FPU for guest
>>>>>>             * Set FR and FRE according to guest context
>>>>>> @@ -1347,16 +1368,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
>>>>>>            kvm_restore_fpu(&vcpu->arch.fpu);
>>>>>>            vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
>>>>>>            trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
>>>>>> -
>>>>>> -       preempt_enable();
>>>>>>     }
>>>>>>
>>>>>>     #ifdef CONFIG_CPU_HAS_LSX
>>>>>>     /* Enable LSX and restore context */
>>>>>>     int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>>>>>     {
>>>>>> -       preempt_disable();
>>>>>> -
>>>>>>            /* Enable LSX for guest */
>>>>>>            kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
>>>>>>            set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
>>>>>> @@ -1378,7 +1395,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>>>>>
>>>>>>            trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
>>>>>>            vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
>>>>>> -       preempt_enable();
>>>>>>
>>>>>>            return 0;
>>>>>>     }
>>>>>> @@ -1388,8 +1404,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>>>>>     /* Enable LASX and restore context */
>>>>>>     int kvm_own_lasx(struct kvm_vcpu *vcpu)
>>>>>>     {
>>>>>> -       preempt_disable();
>>>>>> -
>>>>>>            kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
>>>>>>            set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
>>>>>>            switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
>>>>>> @@ -1411,7 +1425,6 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
>>>>>>
>>>>>>            trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
>>>>>>            vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
>>>>>> -       preempt_enable();
>>>>>>
>>>>>>            return 0;
>>>>>>     }
>>>>>> --
>>>>>> 2.39.3
>>>>>>
>>>>>>
>>>>
>>>>
>>
>>

Re: [PATCH v3 4/4] LoongArch: KVM: Add FPU delay load support
Posted by Huacai Chen 6 days, 11 hours ago
On Tue, Feb 3, 2026 at 4:59 PM Bibo Mao <maobibo@loongson.cn> wrote:
>
>
>
> On 2026/2/3 下午4:50, Huacai Chen wrote:
> > On Tue, Feb 3, 2026 at 3:51 PM Bibo Mao <maobibo@loongson.cn> wrote:
> >>
> >>
> >>
> >> On 2026/2/3 下午3:34, Huacai Chen wrote:
> >>> On Tue, Feb 3, 2026 at 2:48 PM Bibo Mao <maobibo@loongson.cn> wrote:
> >>>>
> >>>>
> >>>>
> >>>> On 2026/2/3 下午12:15, Huacai Chen wrote:
> >>>>> Hi, Bibo,
> >>>>>
> >>>>> On Tue, Feb 3, 2026 at 11:31 AM Bibo Mao <maobibo@loongson.cn> wrote:
> >>>>>>
> >>>>>> FPU is lazy enabled with KVM hypervisor. After FPU is enabled and
> >>>>>> loaded, vCPU can be preempted and FPU will be lost again, there will
> >>>>>> be unnecessary FPU exception, load and store process. Here FPU is
> >>>>>> delay load until guest enter entry.
> >>>>> Calling LSX/LASX as FPU is a little strange, but somewhat reasonable.
> >>>>> Calling LBT as FPU is very strange. So I still like the V1 logic.
> >>>> yeap, LBT can use another different BIT and separate with FPU. It is
> >>>> actually normal use one bit + fpu type variant to represent different
> >>>> different FPU load requirement, such as
> >>>> TIF_FOREIGN_FPSTATE/TIF_NEED_FPU_LOAD on other architectures.
> >>>>
> >>>> I think it is better to put int fpu_load_type in structure loongarch_fpu.
> >>>>
> >>>> And there will be another optimization to avoid load FPU again if FPU HW
> >>>> is owned by current thread/vCPU, that will add last_cpu int type in
> >>>> structure loongarch_fpu also.
> >>>>
> >>>> Regards
> >>>> Bibo Mao
> >>>>>
> >>>>> If you insist on this version, please rename KVM_REQ_FPU_LOAD to
> >>>>> KVM_REQ_AUX_LOAD and rename fpu_load_type to aux_type, which is
> >>>>> similar to aux_inuse.
> >>> Then why not consider this?
> >> this can work now. However there is two different structure struct
> >> loongarch_fpu and struct loongarch_lbt.
> > Yes, but two structures don't block us from using KVM_REQ_AUX_LOAD and
> > aux_type to abstract both FPU and LBT, which is similar to aux_inuse.
> >>
> >> 1. If kernel wants to use late FPU load, new element fpu_load_type can
> >> be added in struct loongarch_fpu for both user app/KVM.
> where aux_type is put for kernel/kvm? Put it in thread structure with
> kernel late FPU load and vcpu.arch with KVM late FPU load?
aux_type is renamed from fpu_load_type, so where fpu_load_type is,
then where aux_type is.

> >>
> >> 2. With further optimization, FPU HW can own by user app/kernel/KVM,
> >> there will be another last_cpu int type added in struct loongarch_fpu.
> > Both loongarch_fpu and loongarch_lbt are register copies, so adding
> > fpu_load_type/last_cpu is not a good idea.
> If vCPU using FPU is preempted by kernel thread and kernel thread does
> not use FPU, HW FPU is the same with SW FPU state, HW FPU load can be
> skipped.
>
> BTW do you ever investigate FPU load/save process on other general
> architectures except MIPS?
I investigate nothing, including MIPS. Other architectures may give us
some inspiration, but that doesn't mean we should copy them, no matter
X86 or MIPS.

X86 introduced lazy fpu, then others also use lazy fpu; but now X86
have switched to eager fpu, others should also do the same?

On the other hand, when you use separate FPU/LSX/LASX, I only mention
the trace functions. Then you changed to centralized FPU/LSX/LASX/LBT.
Then I suggest you improve centralized FPU/LSX/LASX/LBT, you changed
to separate FPU/LBT again, where is the end?



Huacai
>
> Regards
> Bibo Mao
> >
> >
> > Huacai
> >>
> >> Regards
> >> Bibo Mao
> >>
> >> Regards
> >> Bibo Mao
> >>
> >>>
> >>> Huacai
> >>>
> >>>>>
> >>>>> Huacai
> >>>>>
> >>>>>>
> >>>>>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
> >>>>>> ---
> >>>>>>     arch/loongarch/include/asm/kvm_host.h |  2 ++
> >>>>>>     arch/loongarch/kvm/exit.c             | 21 ++++++++++-----
> >>>>>>     arch/loongarch/kvm/vcpu.c             | 37 ++++++++++++++++++---------
> >>>>>>     3 files changed, 41 insertions(+), 19 deletions(-)
> >>>>>>
> >>>>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> >>>>>> index e4fe5b8e8149..902ff7bc0e35 100644
> >>>>>> --- a/arch/loongarch/include/asm/kvm_host.h
> >>>>>> +++ b/arch/loongarch/include/asm/kvm_host.h
> >>>>>> @@ -37,6 +37,7 @@
> >>>>>>     #define KVM_REQ_TLB_FLUSH_GPA          KVM_ARCH_REQ(0)
> >>>>>>     #define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
> >>>>>>     #define KVM_REQ_PMU                    KVM_ARCH_REQ(2)
> >>>>>> +#define KVM_REQ_FPU_LOAD               KVM_ARCH_REQ(3)
> >>>>>>
> >>>>>>     #define KVM_GUESTDBG_SW_BP_MASK                \
> >>>>>>            (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
> >>>>>> @@ -234,6 +235,7 @@ struct kvm_vcpu_arch {
> >>>>>>            u64 vpid;
> >>>>>>            gpa_t flush_gpa;
> >>>>>>
> >>>>>> +       int fpu_load_type;
> >>>>>>            /* Frequency of stable timer in Hz */
> >>>>>>            u64 timer_mhz;
> >>>>>>            ktime_t expire;
> >>>>>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
> >>>>>> index 65ec10a7245a..62403c7c6f9a 100644
> >>>>>> --- a/arch/loongarch/kvm/exit.c
> >>>>>> +++ b/arch/loongarch/kvm/exit.c
> >>>>>> @@ -754,7 +754,8 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>>>>>                    return RESUME_HOST;
> >>>>>>            }
> >>>>>>
> >>>>>> -       kvm_own_fpu(vcpu);
> >>>>>> +       vcpu->arch.fpu_load_type = KVM_LARCH_FPU;
> >>>>>> +       kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >>>>>>
> >>>>>>            return RESUME_GUEST;
> >>>>>>     }
> >>>>>> @@ -794,8 +795,10 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>>>>>     {
> >>>>>>            if (!kvm_guest_has_lsx(&vcpu->arch))
> >>>>>>                    kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> >>>>>> -       else
> >>>>>> -               kvm_own_lsx(vcpu);
> >>>>>> +       else {
> >>>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LSX;
> >>>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >>>>>> +       }
> >>>>>>
> >>>>>>            return RESUME_GUEST;
> >>>>>>     }
> >>>>>> @@ -812,8 +815,10 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>>>>>     {
> >>>>>>            if (!kvm_guest_has_lasx(&vcpu->arch))
> >>>>>>                    kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> >>>>>> -       else
> >>>>>> -               kvm_own_lasx(vcpu);
> >>>>>> +       else {
> >>>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LASX;
> >>>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >>>>>> +       }
> >>>>>>
> >>>>>>            return RESUME_GUEST;
> >>>>>>     }
> >>>>>> @@ -822,8 +827,10 @@ static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu, int ecode)
> >>>>>>     {
> >>>>>>            if (!kvm_guest_has_lbt(&vcpu->arch))
> >>>>>>                    kvm_queue_exception(vcpu, EXCCODE_INE, 0);
> >>>>>> -       else
> >>>>>> -               kvm_own_lbt(vcpu);
> >>>>>> +       else {
> >>>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LBT;
> >>>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
> >>>>>> +       }
> >>>>>>
> >>>>>>            return RESUME_GUEST;
> >>>>>>     }
> >>>>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
> >>>>>> index 995461d724b5..d05fe6c8f456 100644
> >>>>>> --- a/arch/loongarch/kvm/vcpu.c
> >>>>>> +++ b/arch/loongarch/kvm/vcpu.c
> >>>>>> @@ -232,6 +232,31 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
> >>>>>>                            kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa);
> >>>>>>                            vcpu->arch.flush_gpa = INVALID_GPA;
> >>>>>>                    }
> >>>>>> +
> >>>>>> +       if (kvm_check_request(KVM_REQ_FPU_LOAD, vcpu)) {
> >>>>>> +               switch (vcpu->arch.fpu_load_type) {
> >>>>>> +               case KVM_LARCH_FPU:
> >>>>>> +                       kvm_own_fpu(vcpu);
> >>>>>> +                       break;
> >>>>>> +
> >>>>>> +               case KVM_LARCH_LSX:
> >>>>>> +                       kvm_own_lsx(vcpu);
> >>>>>> +                       break;
> >>>>>> +
> >>>>>> +               case KVM_LARCH_LASX:
> >>>>>> +                       kvm_own_lasx(vcpu);
> >>>>>> +                       break;
> >>>>>> +
> >>>>>> +               case KVM_LARCH_LBT:
> >>>>>> +                       kvm_own_lbt(vcpu);
> >>>>>> +                       break;
> >>>>>> +
> >>>>>> +               default:
> >>>>>> +                       break;
> >>>>>> +               }
> >>>>>> +
> >>>>>> +               vcpu->arch.fpu_load_type = 0;
> >>>>>> +       }
> >>>>>>     }
> >>>>>>
> >>>>>>     /*
> >>>>>> @@ -1286,13 +1311,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> >>>>>>     #ifdef CONFIG_CPU_HAS_LBT
> >>>>>>     int kvm_own_lbt(struct kvm_vcpu *vcpu)
> >>>>>>     {
> >>>>>> -       preempt_disable();
> >>>>>>            if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) {
> >>>>>>                    set_csr_euen(CSR_EUEN_LBTEN);
> >>>>>>                    _restore_lbt(&vcpu->arch.lbt);
> >>>>>>                    vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
> >>>>>>            }
> >>>>>> -       preempt_enable();
> >>>>>>
> >>>>>>            return 0;
> >>>>>>     }
> >>>>>> @@ -1335,8 +1358,6 @@ static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
> >>>>>>     /* Enable FPU and restore context */
> >>>>>>     void kvm_own_fpu(struct kvm_vcpu *vcpu)
> >>>>>>     {
> >>>>>> -       preempt_disable();
> >>>>>> -
> >>>>>>            /*
> >>>>>>             * Enable FPU for guest
> >>>>>>             * Set FR and FRE according to guest context
> >>>>>> @@ -1347,16 +1368,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
> >>>>>>            kvm_restore_fpu(&vcpu->arch.fpu);
> >>>>>>            vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
> >>>>>>            trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
> >>>>>> -
> >>>>>> -       preempt_enable();
> >>>>>>     }
> >>>>>>
> >>>>>>     #ifdef CONFIG_CPU_HAS_LSX
> >>>>>>     /* Enable LSX and restore context */
> >>>>>>     int kvm_own_lsx(struct kvm_vcpu *vcpu)
> >>>>>>     {
> >>>>>> -       preempt_disable();
> >>>>>> -
> >>>>>>            /* Enable LSX for guest */
> >>>>>>            kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
> >>>>>>            set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
> >>>>>> @@ -1378,7 +1395,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
> >>>>>>
> >>>>>>            trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
> >>>>>>            vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
> >>>>>> -       preempt_enable();
> >>>>>>
> >>>>>>            return 0;
> >>>>>>     }
> >>>>>> @@ -1388,8 +1404,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
> >>>>>>     /* Enable LASX and restore context */
> >>>>>>     int kvm_own_lasx(struct kvm_vcpu *vcpu)
> >>>>>>     {
> >>>>>> -       preempt_disable();
> >>>>>> -
> >>>>>>            kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
> >>>>>>            set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
> >>>>>>            switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
> >>>>>> @@ -1411,7 +1425,6 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
> >>>>>>
> >>>>>>            trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
> >>>>>>            vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
> >>>>>> -       preempt_enable();
> >>>>>>
> >>>>>>            return 0;
> >>>>>>     }
> >>>>>> --
> >>>>>> 2.39.3
> >>>>>>
> >>>>>>
> >>>>
> >>>>
> >>
> >>
>
>
Re: [PATCH v3 4/4] LoongArch: KVM: Add FPU delay load support
Posted by Bibo Mao 6 days, 11 hours ago

On 2026/2/3 下午5:17, Huacai Chen wrote:
> On Tue, Feb 3, 2026 at 4:59 PM Bibo Mao <maobibo@loongson.cn> wrote:
>>
>>
>>
>> On 2026/2/3 下午4:50, Huacai Chen wrote:
>>> On Tue, Feb 3, 2026 at 3:51 PM Bibo Mao <maobibo@loongson.cn> wrote:
>>>>
>>>>
>>>>
>>>> On 2026/2/3 下午3:34, Huacai Chen wrote:
>>>>> On Tue, Feb 3, 2026 at 2:48 PM Bibo Mao <maobibo@loongson.cn> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 2026/2/3 下午12:15, Huacai Chen wrote:
>>>>>>> Hi, Bibo,
>>>>>>>
>>>>>>> On Tue, Feb 3, 2026 at 11:31 AM Bibo Mao <maobibo@loongson.cn> wrote:
>>>>>>>>
>>>>>>>> FPU is lazy enabled with KVM hypervisor. After FPU is enabled and
>>>>>>>> loaded, vCPU can be preempted and FPU will be lost again, there will
>>>>>>>> be unnecessary FPU exception, load and store process. Here FPU is
>>>>>>>> delay load until guest enter entry.
>>>>>>> Calling LSX/LASX as FPU is a little strange, but somewhat reasonable.
>>>>>>> Calling LBT as FPU is very strange. So I still like the V1 logic.
>>>>>> yeap, LBT can use another different BIT and separate with FPU. It is
>>>>>> actually normal use one bit + fpu type variant to represent different
>>>>>> different FPU load requirement, such as
>>>>>> TIF_FOREIGN_FPSTATE/TIF_NEED_FPU_LOAD on other architectures.
>>>>>>
>>>>>> I think it is better to put int fpu_load_type in structure loongarch_fpu.
>>>>>>
>>>>>> And there will be another optimization to avoid load FPU again if FPU HW
>>>>>> is owned by current thread/vCPU, that will add last_cpu int type in
>>>>>> structure loongarch_fpu also.
>>>>>>
>>>>>> Regards
>>>>>> Bibo Mao
>>>>>>>
>>>>>>> If you insist on this version, please rename KVM_REQ_FPU_LOAD to
>>>>>>> KVM_REQ_AUX_LOAD and rename fpu_load_type to aux_type, which is
>>>>>>> similar to aux_inuse.
>>>>> Then why not consider this?
>>>> this can work now. However there is two different structure struct
>>>> loongarch_fpu and struct loongarch_lbt.
>>> Yes, but two structures don't block us from using KVM_REQ_AUX_LOAD and
>>> aux_type to abstract both FPU and LBT, which is similar to aux_inuse.
>>>>
>>>> 1. If kernel wants to use late FPU load, new element fpu_load_type can
>>>> be added in struct loongarch_fpu for both user app/KVM.
>> where aux_type is put for kernel/kvm? Put it in thread structure with
>> kernel late FPU load and vcpu.arch with KVM late FPU load?
> aux_type is renamed from fpu_load_type, so where fpu_load_type is,
> then where aux_type is.
> 
>>>>
>>>> 2. With further optimization, FPU HW can own by user app/kernel/KVM,
>>>> there will be another last_cpu int type added in struct loongarch_fpu.
>>> Both loongarch_fpu and loongarch_lbt are register copies, so adding
>>> fpu_load_type/last_cpu is not a good idea.
>> If vCPU using FPU is preempted by kernel thread and kernel thread does
>> not use FPU, HW FPU is the same with SW FPU state, HW FPU load can be
>> skipped.
>>
>> BTW do you ever investigate FPU load/save process on other general
>> architectures except MIPS?
> I investigate nothing, including MIPS. Other architectures may give us
> some inspiration, but that doesn't mean we should copy them, no matter
> X86 or MIPS.
> 
> X86 introduced lazy fpu, then others also use lazy fpu; but now X86
> have switched to eager fpu, others should also do the same?
> 
> On the other hand, when you use separate FPU/LSX/LASX, I only mention
> the trace functions. Then you changed to centralized FPU/LSX/LASX/LBT.
> Then I suggest you improve centralized FPU/LSX/LASX/LBT, you changed
> to separate FPU/LBT again, where is the end?
OK, I can use aux bit and aux type for kvm only.

In future if there is FPU register skipping loading optimization patch, 
then we will discuss it then.

Regards
Bibo Mao
> 
> 
> 
> Huacai
>>
>> Regards
>> Bibo Mao
>>>
>>>
>>> Huacai
>>>>
>>>> Regards
>>>> Bibo Mao
>>>>
>>>> Regards
>>>> Bibo Mao
>>>>
>>>>>
>>>>> Huacai
>>>>>
>>>>>>>
>>>>>>> Huacai
>>>>>>>
>>>>>>>>
>>>>>>>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>>>>>>>> ---
>>>>>>>>      arch/loongarch/include/asm/kvm_host.h |  2 ++
>>>>>>>>      arch/loongarch/kvm/exit.c             | 21 ++++++++++-----
>>>>>>>>      arch/loongarch/kvm/vcpu.c             | 37 ++++++++++++++++++---------
>>>>>>>>      3 files changed, 41 insertions(+), 19 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>>>>>>>> index e4fe5b8e8149..902ff7bc0e35 100644
>>>>>>>> --- a/arch/loongarch/include/asm/kvm_host.h
>>>>>>>> +++ b/arch/loongarch/include/asm/kvm_host.h
>>>>>>>> @@ -37,6 +37,7 @@
>>>>>>>>      #define KVM_REQ_TLB_FLUSH_GPA          KVM_ARCH_REQ(0)
>>>>>>>>      #define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
>>>>>>>>      #define KVM_REQ_PMU                    KVM_ARCH_REQ(2)
>>>>>>>> +#define KVM_REQ_FPU_LOAD               KVM_ARCH_REQ(3)
>>>>>>>>
>>>>>>>>      #define KVM_GUESTDBG_SW_BP_MASK                \
>>>>>>>>             (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
>>>>>>>> @@ -234,6 +235,7 @@ struct kvm_vcpu_arch {
>>>>>>>>             u64 vpid;
>>>>>>>>             gpa_t flush_gpa;
>>>>>>>>
>>>>>>>> +       int fpu_load_type;
>>>>>>>>             /* Frequency of stable timer in Hz */
>>>>>>>>             u64 timer_mhz;
>>>>>>>>             ktime_t expire;
>>>>>>>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
>>>>>>>> index 65ec10a7245a..62403c7c6f9a 100644
>>>>>>>> --- a/arch/loongarch/kvm/exit.c
>>>>>>>> +++ b/arch/loongarch/kvm/exit.c
>>>>>>>> @@ -754,7 +754,8 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>>>>>                     return RESUME_HOST;
>>>>>>>>             }
>>>>>>>>
>>>>>>>> -       kvm_own_fpu(vcpu);
>>>>>>>> +       vcpu->arch.fpu_load_type = KVM_LARCH_FPU;
>>>>>>>> +       kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>>>>>
>>>>>>>>             return RESUME_GUEST;
>>>>>>>>      }
>>>>>>>> @@ -794,8 +795,10 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>>>>>      {
>>>>>>>>             if (!kvm_guest_has_lsx(&vcpu->arch))
>>>>>>>>                     kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>>>>>>>> -       else
>>>>>>>> -               kvm_own_lsx(vcpu);
>>>>>>>> +       else {
>>>>>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LSX;
>>>>>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>>>>> +       }
>>>>>>>>
>>>>>>>>             return RESUME_GUEST;
>>>>>>>>      }
>>>>>>>> @@ -812,8 +815,10 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>>>>>      {
>>>>>>>>             if (!kvm_guest_has_lasx(&vcpu->arch))
>>>>>>>>                     kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>>>>>>>> -       else
>>>>>>>> -               kvm_own_lasx(vcpu);
>>>>>>>> +       else {
>>>>>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LASX;
>>>>>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>>>>> +       }
>>>>>>>>
>>>>>>>>             return RESUME_GUEST;
>>>>>>>>      }
>>>>>>>> @@ -822,8 +827,10 @@ static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>>>>>      {
>>>>>>>>             if (!kvm_guest_has_lbt(&vcpu->arch))
>>>>>>>>                     kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>>>>>>>> -       else
>>>>>>>> -               kvm_own_lbt(vcpu);
>>>>>>>> +       else {
>>>>>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LBT;
>>>>>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>>>>> +       }
>>>>>>>>
>>>>>>>>             return RESUME_GUEST;
>>>>>>>>      }
>>>>>>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>>>>>>>> index 995461d724b5..d05fe6c8f456 100644
>>>>>>>> --- a/arch/loongarch/kvm/vcpu.c
>>>>>>>> +++ b/arch/loongarch/kvm/vcpu.c
>>>>>>>> @@ -232,6 +232,31 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
>>>>>>>>                             kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa);
>>>>>>>>                             vcpu->arch.flush_gpa = INVALID_GPA;
>>>>>>>>                     }
>>>>>>>> +
>>>>>>>> +       if (kvm_check_request(KVM_REQ_FPU_LOAD, vcpu)) {
>>>>>>>> +               switch (vcpu->arch.fpu_load_type) {
>>>>>>>> +               case KVM_LARCH_FPU:
>>>>>>>> +                       kvm_own_fpu(vcpu);
>>>>>>>> +                       break;
>>>>>>>> +
>>>>>>>> +               case KVM_LARCH_LSX:
>>>>>>>> +                       kvm_own_lsx(vcpu);
>>>>>>>> +                       break;
>>>>>>>> +
>>>>>>>> +               case KVM_LARCH_LASX:
>>>>>>>> +                       kvm_own_lasx(vcpu);
>>>>>>>> +                       break;
>>>>>>>> +
>>>>>>>> +               case KVM_LARCH_LBT:
>>>>>>>> +                       kvm_own_lbt(vcpu);
>>>>>>>> +                       break;
>>>>>>>> +
>>>>>>>> +               default:
>>>>>>>> +                       break;
>>>>>>>> +               }
>>>>>>>> +
>>>>>>>> +               vcpu->arch.fpu_load_type = 0;
>>>>>>>> +       }
>>>>>>>>      }
>>>>>>>>
>>>>>>>>      /*
>>>>>>>> @@ -1286,13 +1311,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>>>>>>>>      #ifdef CONFIG_CPU_HAS_LBT
>>>>>>>>      int kvm_own_lbt(struct kvm_vcpu *vcpu)
>>>>>>>>      {
>>>>>>>> -       preempt_disable();
>>>>>>>>             if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) {
>>>>>>>>                     set_csr_euen(CSR_EUEN_LBTEN);
>>>>>>>>                     _restore_lbt(&vcpu->arch.lbt);
>>>>>>>>                     vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
>>>>>>>>             }
>>>>>>>> -       preempt_enable();
>>>>>>>>
>>>>>>>>             return 0;
>>>>>>>>      }
>>>>>>>> @@ -1335,8 +1358,6 @@ static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
>>>>>>>>      /* Enable FPU and restore context */
>>>>>>>>      void kvm_own_fpu(struct kvm_vcpu *vcpu)
>>>>>>>>      {
>>>>>>>> -       preempt_disable();
>>>>>>>> -
>>>>>>>>             /*
>>>>>>>>              * Enable FPU for guest
>>>>>>>>              * Set FR and FRE according to guest context
>>>>>>>> @@ -1347,16 +1368,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
>>>>>>>>             kvm_restore_fpu(&vcpu->arch.fpu);
>>>>>>>>             vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
>>>>>>>>             trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
>>>>>>>> -
>>>>>>>> -       preempt_enable();
>>>>>>>>      }
>>>>>>>>
>>>>>>>>      #ifdef CONFIG_CPU_HAS_LSX
>>>>>>>>      /* Enable LSX and restore context */
>>>>>>>>      int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>>>>>>>      {
>>>>>>>> -       preempt_disable();
>>>>>>>> -
>>>>>>>>             /* Enable LSX for guest */
>>>>>>>>             kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
>>>>>>>>             set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
>>>>>>>> @@ -1378,7 +1395,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>>>>>>>
>>>>>>>>             trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
>>>>>>>>             vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
>>>>>>>> -       preempt_enable();
>>>>>>>>
>>>>>>>>             return 0;
>>>>>>>>      }
>>>>>>>> @@ -1388,8 +1404,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>>>>>>>      /* Enable LASX and restore context */
>>>>>>>>      int kvm_own_lasx(struct kvm_vcpu *vcpu)
>>>>>>>>      {
>>>>>>>> -       preempt_disable();
>>>>>>>> -
>>>>>>>>             kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
>>>>>>>>             set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
>>>>>>>>             switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
>>>>>>>> @@ -1411,7 +1425,6 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
>>>>>>>>
>>>>>>>>             trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
>>>>>>>>             vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
>>>>>>>> -       preempt_enable();
>>>>>>>>
>>>>>>>>             return 0;
>>>>>>>>      }
>>>>>>>> --
>>>>>>>> 2.39.3
>>>>>>>>
>>>>>>>>
>>>>>>
>>>>>>
>>>>
>>>>
>>
>>

Re: [PATCH v3 4/4] LoongArch: KVM: Add FPU delay load support
Posted by Bibo Mao 6 days, 11 hours ago

On 2026/2/3 下午5:17, Huacai Chen wrote:
> On Tue, Feb 3, 2026 at 4:59 PM Bibo Mao <maobibo@loongson.cn> wrote:
>>
>>
>>
>> On 2026/2/3 下午4:50, Huacai Chen wrote:
>>> On Tue, Feb 3, 2026 at 3:51 PM Bibo Mao <maobibo@loongson.cn> wrote:
>>>>
>>>>
>>>>
>>>> On 2026/2/3 下午3:34, Huacai Chen wrote:
>>>>> On Tue, Feb 3, 2026 at 2:48 PM Bibo Mao <maobibo@loongson.cn> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 2026/2/3 下午12:15, Huacai Chen wrote:
>>>>>>> Hi, Bibo,
>>>>>>>
>>>>>>> On Tue, Feb 3, 2026 at 11:31 AM Bibo Mao <maobibo@loongson.cn> wrote:
>>>>>>>>
>>>>>>>> FPU is lazy enabled with KVM hypervisor. After FPU is enabled and
>>>>>>>> loaded, vCPU can be preempted and FPU will be lost again, there will
>>>>>>>> be unnecessary FPU exception, load and store process. Here FPU is
>>>>>>>> delay load until guest enter entry.
>>>>>>> Calling LSX/LASX as FPU is a little strange, but somewhat reasonable.
>>>>>>> Calling LBT as FPU is very strange. So I still like the V1 logic.
>>>>>> yeap, LBT can use another different BIT and separate with FPU. It is
>>>>>> actually normal use one bit + fpu type variant to represent different
>>>>>> different FPU load requirement, such as
>>>>>> TIF_FOREIGN_FPSTATE/TIF_NEED_FPU_LOAD on other architectures.
>>>>>>
>>>>>> I think it is better to put int fpu_load_type in structure loongarch_fpu.
>>>>>>
>>>>>> And there will be another optimization to avoid load FPU again if FPU HW
>>>>>> is owned by current thread/vCPU, that will add last_cpu int type in
>>>>>> structure loongarch_fpu also.
>>>>>>
>>>>>> Regards
>>>>>> Bibo Mao
>>>>>>>
>>>>>>> If you insist on this version, please rename KVM_REQ_FPU_LOAD to
>>>>>>> KVM_REQ_AUX_LOAD and rename fpu_load_type to aux_type, which is
>>>>>>> similar to aux_inuse.
>>>>> Then why not consider this?
>>>> this can work now. However there is two different structure struct
>>>> loongarch_fpu and struct loongarch_lbt.
>>> Yes, but two structures don't block us from using KVM_REQ_AUX_LOAD and
>>> aux_type to abstract both FPU and LBT, which is similar to aux_inuse.
>>>>
>>>> 1. If kernel wants to use late FPU load, new element fpu_load_type can
>>>> be added in struct loongarch_fpu for both user app/KVM.
>> where aux_type is put for kernel/kvm? Put it in thread structure with
>> kernel late FPU load and vcpu.arch with KVM late FPU load?
> aux_type is renamed from fpu_load_type, so where fpu_load_type is,
> then where aux_type is.
> 
>>>>
>>>> 2. With further optimization, FPU HW can own by user app/kernel/KVM,
>>>> there will be another last_cpu int type added in struct loongarch_fpu.
>>> Both loongarch_fpu and loongarch_lbt are register copies, so adding
>>> fpu_load_type/last_cpu is not a good idea.
>> If vCPU using FPU is preempted by kernel thread and kernel thread does
>> not use FPU, HW FPU is the same with SW FPU state, HW FPU load can be
>> skipped.
>>
>> BTW do you ever investigate FPU load/save process on other general
>> architectures except MIPS?
> I investigate nothing, including MIPS. Other architectures may give us
> some inspiration, but that doesn't mean we should copy them, no matter
> X86 or MIPS.
> 
> X86 introduced lazy fpu, then others also use lazy fpu; but now X86
> have switched to eager fpu, others should also do the same?
I do not say easy fpu should be used. The problem is that for lazy FPU 
exception, if FPU HW register is the same with SW FPU state though FPU 
is disabled, can the FPU load be skipped and only enable FPU by EUEN 
register?

> 
> On the other hand, when you use separate FPU/LSX/LASX, I only mention
> the trace functions. Then you changed to centralized FPU/LSX/LASX/LBT.
The trace function is good suggestion, so I accept and post the second 
version. centralized FPU/LSX/LASX is because kvm_late_check_requests() 
is critical hot path, however FPU load is not, it is not necessary to 
add three cold kvm_check_request().

> Then I suggest you improve centralized FPU/LSX/LASX/LBT, you changed
> to separate FPU/LBT again, where is the end?
Now the LBT and FPU uses different structure to save and restore, FPU 
and LBT is separated already, however FPU/LSX/LASX uses the same 
structure loongarch_fpu.

Regards
Bibo Mao
> 
> 
> 
> Huacai
>>
>> Regards
>> Bibo Mao
>>>
>>>
>>> Huacai
>>>>
>>>> Regards
>>>> Bibo Mao
>>>>
>>>> Regards
>>>> Bibo Mao
>>>>
>>>>>
>>>>> Huacai
>>>>>
>>>>>>>
>>>>>>> Huacai
>>>>>>>
>>>>>>>>
>>>>>>>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>>>>>>>> ---
>>>>>>>>      arch/loongarch/include/asm/kvm_host.h |  2 ++
>>>>>>>>      arch/loongarch/kvm/exit.c             | 21 ++++++++++-----
>>>>>>>>      arch/loongarch/kvm/vcpu.c             | 37 ++++++++++++++++++---------
>>>>>>>>      3 files changed, 41 insertions(+), 19 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>>>>>>>> index e4fe5b8e8149..902ff7bc0e35 100644
>>>>>>>> --- a/arch/loongarch/include/asm/kvm_host.h
>>>>>>>> +++ b/arch/loongarch/include/asm/kvm_host.h
>>>>>>>> @@ -37,6 +37,7 @@
>>>>>>>>      #define KVM_REQ_TLB_FLUSH_GPA          KVM_ARCH_REQ(0)
>>>>>>>>      #define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
>>>>>>>>      #define KVM_REQ_PMU                    KVM_ARCH_REQ(2)
>>>>>>>> +#define KVM_REQ_FPU_LOAD               KVM_ARCH_REQ(3)
>>>>>>>>
>>>>>>>>      #define KVM_GUESTDBG_SW_BP_MASK                \
>>>>>>>>             (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
>>>>>>>> @@ -234,6 +235,7 @@ struct kvm_vcpu_arch {
>>>>>>>>             u64 vpid;
>>>>>>>>             gpa_t flush_gpa;
>>>>>>>>
>>>>>>>> +       int fpu_load_type;
>>>>>>>>             /* Frequency of stable timer in Hz */
>>>>>>>>             u64 timer_mhz;
>>>>>>>>             ktime_t expire;
>>>>>>>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
>>>>>>>> index 65ec10a7245a..62403c7c6f9a 100644
>>>>>>>> --- a/arch/loongarch/kvm/exit.c
>>>>>>>> +++ b/arch/loongarch/kvm/exit.c
>>>>>>>> @@ -754,7 +754,8 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>>>>>                     return RESUME_HOST;
>>>>>>>>             }
>>>>>>>>
>>>>>>>> -       kvm_own_fpu(vcpu);
>>>>>>>> +       vcpu->arch.fpu_load_type = KVM_LARCH_FPU;
>>>>>>>> +       kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>>>>>
>>>>>>>>             return RESUME_GUEST;
>>>>>>>>      }
>>>>>>>> @@ -794,8 +795,10 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>>>>>      {
>>>>>>>>             if (!kvm_guest_has_lsx(&vcpu->arch))
>>>>>>>>                     kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>>>>>>>> -       else
>>>>>>>> -               kvm_own_lsx(vcpu);
>>>>>>>> +       else {
>>>>>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LSX;
>>>>>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>>>>> +       }
>>>>>>>>
>>>>>>>>             return RESUME_GUEST;
>>>>>>>>      }
>>>>>>>> @@ -812,8 +815,10 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>>>>>      {
>>>>>>>>             if (!kvm_guest_has_lasx(&vcpu->arch))
>>>>>>>>                     kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>>>>>>>> -       else
>>>>>>>> -               kvm_own_lasx(vcpu);
>>>>>>>> +       else {
>>>>>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LASX;
>>>>>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>>>>> +       }
>>>>>>>>
>>>>>>>>             return RESUME_GUEST;
>>>>>>>>      }
>>>>>>>> @@ -822,8 +827,10 @@ static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu, int ecode)
>>>>>>>>      {
>>>>>>>>             if (!kvm_guest_has_lbt(&vcpu->arch))
>>>>>>>>                     kvm_queue_exception(vcpu, EXCCODE_INE, 0);
>>>>>>>> -       else
>>>>>>>> -               kvm_own_lbt(vcpu);
>>>>>>>> +       else {
>>>>>>>> +               vcpu->arch.fpu_load_type = KVM_LARCH_LBT;
>>>>>>>> +               kvm_make_request(KVM_REQ_FPU_LOAD, vcpu);
>>>>>>>> +       }
>>>>>>>>
>>>>>>>>             return RESUME_GUEST;
>>>>>>>>      }
>>>>>>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>>>>>>>> index 995461d724b5..d05fe6c8f456 100644
>>>>>>>> --- a/arch/loongarch/kvm/vcpu.c
>>>>>>>> +++ b/arch/loongarch/kvm/vcpu.c
>>>>>>>> @@ -232,6 +232,31 @@ static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
>>>>>>>>                             kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa);
>>>>>>>>                             vcpu->arch.flush_gpa = INVALID_GPA;
>>>>>>>>                     }
>>>>>>>> +
>>>>>>>> +       if (kvm_check_request(KVM_REQ_FPU_LOAD, vcpu)) {
>>>>>>>> +               switch (vcpu->arch.fpu_load_type) {
>>>>>>>> +               case KVM_LARCH_FPU:
>>>>>>>> +                       kvm_own_fpu(vcpu);
>>>>>>>> +                       break;
>>>>>>>> +
>>>>>>>> +               case KVM_LARCH_LSX:
>>>>>>>> +                       kvm_own_lsx(vcpu);
>>>>>>>> +                       break;
>>>>>>>> +
>>>>>>>> +               case KVM_LARCH_LASX:
>>>>>>>> +                       kvm_own_lasx(vcpu);
>>>>>>>> +                       break;
>>>>>>>> +
>>>>>>>> +               case KVM_LARCH_LBT:
>>>>>>>> +                       kvm_own_lbt(vcpu);
>>>>>>>> +                       break;
>>>>>>>> +
>>>>>>>> +               default:
>>>>>>>> +                       break;
>>>>>>>> +               }
>>>>>>>> +
>>>>>>>> +               vcpu->arch.fpu_load_type = 0;
>>>>>>>> +       }
>>>>>>>>      }
>>>>>>>>
>>>>>>>>      /*
>>>>>>>> @@ -1286,13 +1311,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>>>>>>>>      #ifdef CONFIG_CPU_HAS_LBT
>>>>>>>>      int kvm_own_lbt(struct kvm_vcpu *vcpu)
>>>>>>>>      {
>>>>>>>> -       preempt_disable();
>>>>>>>>             if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) {
>>>>>>>>                     set_csr_euen(CSR_EUEN_LBTEN);
>>>>>>>>                     _restore_lbt(&vcpu->arch.lbt);
>>>>>>>>                     vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
>>>>>>>>             }
>>>>>>>> -       preempt_enable();
>>>>>>>>
>>>>>>>>             return 0;
>>>>>>>>      }
>>>>>>>> @@ -1335,8 +1358,6 @@ static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
>>>>>>>>      /* Enable FPU and restore context */
>>>>>>>>      void kvm_own_fpu(struct kvm_vcpu *vcpu)
>>>>>>>>      {
>>>>>>>> -       preempt_disable();
>>>>>>>> -
>>>>>>>>             /*
>>>>>>>>              * Enable FPU for guest
>>>>>>>>              * Set FR and FRE according to guest context
>>>>>>>> @@ -1347,16 +1368,12 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu)
>>>>>>>>             kvm_restore_fpu(&vcpu->arch.fpu);
>>>>>>>>             vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
>>>>>>>>             trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
>>>>>>>> -
>>>>>>>> -       preempt_enable();
>>>>>>>>      }
>>>>>>>>
>>>>>>>>      #ifdef CONFIG_CPU_HAS_LSX
>>>>>>>>      /* Enable LSX and restore context */
>>>>>>>>      int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>>>>>>>      {
>>>>>>>> -       preempt_disable();
>>>>>>>> -
>>>>>>>>             /* Enable LSX for guest */
>>>>>>>>             kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
>>>>>>>>             set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
>>>>>>>> @@ -1378,7 +1395,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>>>>>>>
>>>>>>>>             trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
>>>>>>>>             vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
>>>>>>>> -       preempt_enable();
>>>>>>>>
>>>>>>>>             return 0;
>>>>>>>>      }
>>>>>>>> @@ -1388,8 +1404,6 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu)
>>>>>>>>      /* Enable LASX and restore context */
>>>>>>>>      int kvm_own_lasx(struct kvm_vcpu *vcpu)
>>>>>>>>      {
>>>>>>>> -       preempt_disable();
>>>>>>>> -
>>>>>>>>             kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
>>>>>>>>             set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
>>>>>>>>             switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
>>>>>>>> @@ -1411,7 +1425,6 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu)
>>>>>>>>
>>>>>>>>             trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
>>>>>>>>             vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
>>>>>>>> -       preempt_enable();
>>>>>>>>
>>>>>>>>             return 0;
>>>>>>>>      }
>>>>>>>> --
>>>>>>>> 2.39.3
>>>>>>>>
>>>>>>>>
>>>>>>
>>>>>>
>>>>
>>>>
>>
>>