[PATCH v8 3/3] RISC-V: KVM: Reuse KVM_CAP_VM_GPA_BITS to select HGATP.MODE

fangyu.yu@linux.alibaba.com posted 3 patches 8 hours ago
[PATCH v8 3/3] RISC-V: KVM: Reuse KVM_CAP_VM_GPA_BITS to select HGATP.MODE
Posted by fangyu.yu@linux.alibaba.com 8 hours ago
From: Fangyu Yu <fangyu.yu@linux.alibaba.com>

Reuse KVM_CAP_VM_GPA_BITS to advertise and select the effective
G-stage GPA width for a VM.

KVM_CHECK_EXTENSION(KVM_CAP_VM_GPA_BITS) returns the effective GPA
bits for a VM, KVM_ENABLE_CAP(KVM_CAP_VM_GPA_BITS) allows userspace
to downsize the effective GPA width by selecting a smaller G-stage
page table format:
  - gpa_bits <= 41 selects Sv39x4 (pgd_levels=3)
  - gpa_bits <= 50 selects Sv48x4 (pgd_levels=4)
  - gpa_bits <= 59 selects Sv57x4 (pgd_levels=5)

Reject the request with -EINVAL for unsupported values and with -EBUSY
if vCPUs have been created or any memslot is populated.

Signed-off-by: Fangyu Yu <fangyu.yu@linux.alibaba.com>
Reviewed-by: Andrew Jones <andrew.jones@oss.qualcomm.com>
Reviewed-by: Guo Ren <guoren@kernel.org>
---
 arch/riscv/kvm/vm.c | 44 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index fb7c4e07961f..a9f083feeb76 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -214,12 +214,52 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 {
+	if (cap->flags)
+		return -EINVAL;
+
 	switch (cap->cap) {
 	case KVM_CAP_RISCV_MP_STATE_RESET:
-		if (cap->flags)
-			return -EINVAL;
 		kvm->arch.mp_state_reset = true;
 		return 0;
+	case KVM_CAP_VM_GPA_BITS: {
+		unsigned long gpa_bits = cap->args[0];
+		unsigned long new_levels;
+		int r = 0;
+
+		/* Decide target pgd levels from requested gpa_bits */
+#ifdef CONFIG_64BIT
+		if (gpa_bits <= 41)
+			new_levels = 3;        /* Sv39x4 */
+		else if (gpa_bits <= 50)
+			new_levels = 4;        /* Sv48x4 */
+		else if (gpa_bits <= 59)
+			new_levels = 5;        /* Sv57x4 */
+		else
+			return -EINVAL;
+#else
+		/* 32-bit: only Sv32x4*/
+		if (gpa_bits <= 34)
+			new_levels = 2;
+		else
+			return -EINVAL;
+#endif
+		if (new_levels > kvm_riscv_gstage_max_pgd_levels)
+			return -EINVAL;
+
+		/* Follow KVM's lock ordering: kvm->lock -> kvm->slots_lock. */
+		mutex_lock(&kvm->lock);
+		mutex_lock(&kvm->slots_lock);
+
+		if (kvm->created_vcpus || !kvm_are_all_memslots_empty(kvm))
+			r = -EBUSY;
+		else
+			kvm->arch.pgd_levels = new_levels;
+
+		mutex_unlock(&kvm->slots_lock);
+		mutex_unlock(&kvm->lock);
+
+		return r;
+	}
 	default:
 		return -EINVAL;
 	}
-- 
2.50.1
Re: [PATCH v8 3/3] RISC-V: KVM: Reuse KVM_CAP_VM_GPA_BITS to select HGATP.MODE
Posted by Anup Patel 7 hours ago
On Fri, Apr 3, 2026 at 9:00 PM <fangyu.yu@linux.alibaba.com> wrote:
>
> From: Fangyu Yu <fangyu.yu@linux.alibaba.com>
>
> Reuse KVM_CAP_VM_GPA_BITS to advertise and select the effective
> G-stage GPA width for a VM.
>
> KVM_CHECK_EXTENSION(KVM_CAP_VM_GPA_BITS) returns the effective GPA
> bits for a VM, KVM_ENABLE_CAP(KVM_CAP_VM_GPA_BITS) allows userspace
> to downsize the effective GPA width by selecting a smaller G-stage
> page table format:
>   - gpa_bits <= 41 selects Sv39x4 (pgd_levels=3)
>   - gpa_bits <= 50 selects Sv48x4 (pgd_levels=4)
>   - gpa_bits <= 59 selects Sv57x4 (pgd_levels=5)
>
> Reject the request with -EINVAL for unsupported values and with -EBUSY
> if vCPUs have been created or any memslot is populated.
>
> Signed-off-by: Fangyu Yu <fangyu.yu@linux.alibaba.com>
> Reviewed-by: Andrew Jones <andrew.jones@oss.qualcomm.com>
> Reviewed-by: Guo Ren <guoren@kernel.org>

LGTM.

Reviewed-by: Anup Patel <anup@brainfault.org>

Thanks,
Anup

> ---
>  arch/riscv/kvm/vm.c | 44 ++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 42 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
> index fb7c4e07961f..a9f083feeb76 100644
> --- a/arch/riscv/kvm/vm.c
> +++ b/arch/riscv/kvm/vm.c
> @@ -214,12 +214,52 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>
>  int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>  {
> +       if (cap->flags)
> +               return -EINVAL;
> +
>         switch (cap->cap) {
>         case KVM_CAP_RISCV_MP_STATE_RESET:
> -               if (cap->flags)
> -                       return -EINVAL;
>                 kvm->arch.mp_state_reset = true;
>                 return 0;
> +       case KVM_CAP_VM_GPA_BITS: {
> +               unsigned long gpa_bits = cap->args[0];
> +               unsigned long new_levels;
> +               int r = 0;
> +
> +               /* Decide target pgd levels from requested gpa_bits */
> +#ifdef CONFIG_64BIT
> +               if (gpa_bits <= 41)
> +                       new_levels = 3;        /* Sv39x4 */
> +               else if (gpa_bits <= 50)
> +                       new_levels = 4;        /* Sv48x4 */
> +               else if (gpa_bits <= 59)
> +                       new_levels = 5;        /* Sv57x4 */
> +               else
> +                       return -EINVAL;
> +#else
> +               /* 32-bit: only Sv32x4*/
> +               if (gpa_bits <= 34)
> +                       new_levels = 2;
> +               else
> +                       return -EINVAL;
> +#endif
> +               if (new_levels > kvm_riscv_gstage_max_pgd_levels)
> +                       return -EINVAL;
> +
> +               /* Follow KVM's lock ordering: kvm->lock -> kvm->slots_lock. */
> +               mutex_lock(&kvm->lock);
> +               mutex_lock(&kvm->slots_lock);
> +
> +               if (kvm->created_vcpus || !kvm_are_all_memslots_empty(kvm))
> +                       r = -EBUSY;
> +               else
> +                       kvm->arch.pgd_levels = new_levels;
> +
> +               mutex_unlock(&kvm->slots_lock);
> +               mutex_unlock(&kvm->lock);
> +
> +               return r;
> +       }
>         default:
>                 return -EINVAL;
>         }
> --
> 2.50.1
>