From nobody Fri Apr 3 22:15:14 2026 Received: from mail.loongson.cn (mail.loongson.cn [114.242.206.163]) by smtp.subspace.kernel.org (Postfix) with ESMTP id F0CE87404E; Mon, 23 Mar 2026 03:40:37 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=114.242.206.163 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774237241; cv=none; b=SR93jc5dvXokMqEUCcDdS+4A+4XKalDLKSOFaSx+0fKvdNyrRlQoYSrkj6mk7rV/F62iFqJ0pJT260D3REAaDRVnFB7CVMJUnAyaCmoERrPDm/hbow1GEVwHYIcx+77+ConNObjGvfiGWDnK3MApgv8vI0Z232yzf4Jcctij3mw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774237241; c=relaxed/simple; bh=2GilZkAoeozT1RpSJYGejiwFN6C6fjf+USga5J6Xx/U=; h=From:To:Cc:Subject:Date:Message-Id:MIME-Version; b=pfUUhPz/a/xf56x4xl/iohTcDSk9HODYIdNE8tdF+DgEIAnjrBOwPHKWJQN4svAHmjqjAsjLnhCTZtSH4zZsZpdBvAbZPqQiAeRpf9Z/tPTZ/kbeXS0earm2x7u8G98z0hZmcx6ZtHYJNOlk2KyeuqHestlgG/KNlqBmYO7xq/w= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=loongson.cn; spf=pass smtp.mailfrom=loongson.cn; arc=none smtp.client-ip=114.242.206.163 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=loongson.cn Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=loongson.cn Received: from loongson.cn (unknown [10.2.5.213]) by gateway (Coremail) with SMTP id _____8DxvsMztsBpKqkdAA--.19681S3; Mon, 23 Mar 2026 11:40:35 +0800 (CST) Received: from localhost.localdomain (unknown [10.2.5.213]) by front1 (Coremail) with SMTP id qMiowJDxaeAytsBpOBRbAA--.43922S2; Mon, 23 Mar 2026 11:40:35 +0800 (CST) From: Bibo Mao To: Tianrui Zhao , Huacai Chen Cc: kernel@xen0n.name, kvm@vger.kernel.org, loongarch@lists.linux.dev, linux-kernel@vger.kernel.org Subject: [PATCH] LoongArch: KVM: Set max used FPU type with FPU exception Date: Mon, 23 Mar 2026 11:40:34 +0800 Message-Id: <20260323034034.3465072-1-maobibo@loongson.cn> X-Mailer: git-send-email 2.39.3 Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-CM-TRANSID: qMiowJDxaeAytsBpOBRbAA--.43922S2 X-CM-SenderInfo: xpdruxter6z05rqj20fqof0/ X-Coremail-Antispam: 1Uk129KBjDUn29KB7ZKAUJUUUUU529EdanIXcx71UUUUU7KY7 ZEXasCq-sGcSsGvfJ3UbIjqfuFe4nvWSU5nxnvy29KBjDU0xBIdaVrnUUvcSsGvfC2Kfnx nUUI43ZEXa7xR_UUUUUUUUU== Content-Type: text/plain; charset="utf-8" With FPU save and restore flow, the cost is the same with different FPU width 8/16/32 bytes, whatever from CPU cycle and cache line impaction. Here is to enable FPU with max used type, for example if application ever uses LASX instrction, enable FPU with LASX type even with FPU exeception. So it can avoid possible LSX/LASX exception in future. With context switch microbench which may touch FPU and LASX, there is 9% improvement when halt_poll_ns is disabled. The command is "./context --test=3Dpipe" and source code located at: https://github.com/bibo-mao/context_switch/blob/main/context.c Original Wih patch improvement 75232 82440 9% Signed-off-by: Bibo Mao --- arch/loongarch/include/asm/kvm_host.h | 1 + arch/loongarch/kvm/exit.c | 6 +++--- arch/loongarch/kvm/vcpu.c | 25 ++++++++++--------------- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include= /asm/kvm_host.h index 19eb5e5c3984..7d739fd5cda6 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -203,6 +203,7 @@ struct kvm_vcpu_arch { /* Which auxiliary state is loaded (KVM_LARCH_*) */ unsigned int aux_inuse; unsigned int aux_ldtype; + unsigned int aux_used; =20 /* FPU state */ struct loongarch_fpu fpu FPU_ALIGN; diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index da0ad89f2eb7..da8e330c8b20 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -754,7 +754,7 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcp= u, int ecode) return RESUME_HOST; } =20 - vcpu->arch.aux_ldtype =3D KVM_LARCH_FPU; + vcpu->arch.aux_ldtype =3D vcpu->arch.aux_used | KVM_LARCH_FPU; kvm_make_request(KVM_REQ_AUX_LOAD, vcpu); =20 return RESUME_GUEST; @@ -796,7 +796,7 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcp= u, int ecode) if (!kvm_guest_has_lsx(&vcpu->arch)) kvm_queue_exception(vcpu, EXCCODE_INE, 0); else { - vcpu->arch.aux_ldtype =3D KVM_LARCH_LSX; + vcpu->arch.aux_ldtype =3D vcpu->arch.aux_used | KVM_LARCH_LSX; kvm_make_request(KVM_REQ_AUX_LOAD, vcpu); } =20 @@ -816,7 +816,7 @@ static int kvm_handle_lasx_disabled(struct kvm_vcpu *vc= pu, int ecode) if (!kvm_guest_has_lasx(&vcpu->arch)) kvm_queue_exception(vcpu, EXCCODE_INE, 0); else { - vcpu->arch.aux_ldtype =3D KVM_LARCH_LASX; + vcpu->arch.aux_ldtype =3D vcpu->arch.aux_used | KVM_LARCH_LASX; kvm_make_request(KVM_REQ_AUX_LOAD, vcpu); } =20 diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 8810fcd7e26e..0a111c74a330 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -239,23 +239,14 @@ static void kvm_late_check_requests(struct kvm_vcpu *= vcpu) } =20 if (kvm_check_request(KVM_REQ_AUX_LOAD, vcpu)) { - switch (vcpu->arch.aux_ldtype) { - case KVM_LARCH_FPU: - kvm_own_fpu(vcpu); - break; - case KVM_LARCH_LSX: - kvm_own_lsx(vcpu); - break; - case KVM_LARCH_LASX: + if (vcpu->arch.aux_ldtype & KVM_LARCH_LASX) kvm_own_lasx(vcpu); - break; - case KVM_LARCH_LBT: + else if (vcpu->arch.aux_ldtype & KVM_LARCH_LSX) + kvm_own_lsx(vcpu); + else if (vcpu->arch.aux_ldtype & KVM_LARCH_FPU) + kvm_own_fpu(vcpu); + else if (vcpu->arch.aux_ldtype =3D=3D KVM_LARCH_LBT) kvm_own_lbt(vcpu); - break; - default: - break; - } - vcpu->arch.aux_ldtype =3D 0; } } @@ -956,6 +947,7 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, break; case KVM_REG_LOONGARCH_VCPU_RESET: vcpu->arch.st.guest_addr =3D 0; + vcpu->arch.aux_used =3D 0; memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending)); memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear)); =20 @@ -1384,6 +1376,7 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu) =20 kvm_restore_fpu(&vcpu->arch.fpu); vcpu->arch.aux_inuse |=3D KVM_LARCH_FPU; + vcpu->arch.aux_used |=3D KVM_LARCH_FPU; trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU); } =20 @@ -1412,6 +1405,7 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu) =20 trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX); vcpu->arch.aux_inuse |=3D KVM_LARCH_LSX | KVM_LARCH_FPU; + vcpu->arch.aux_used |=3D KVM_LARCH_LSX | KVM_LARCH_FPU; =20 return 0; } @@ -1442,6 +1436,7 @@ int kvm_own_lasx(struct kvm_vcpu *vcpu) =20 trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX); vcpu->arch.aux_inuse |=3D KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU; + vcpu->arch.aux_used |=3D KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU; =20 return 0; } base-commit: c369299895a591d96745d6492d4888259b004a9e --=20 2.39.3