From: Fangyu Yu <fangyu.yu@linux.alibaba.com>
Add a VM capability that allows userspace to select the G-stage page table
format by setting HGATP.MODE on a per-VM basis.
Userspace enables the capability via KVM_ENABLE_CAP, passing the requested
HGATP.MODE in args[0]. The request is rejected with -EINVAL if the mode is
not supported by the host, and with -EBUSY if the VM has already been
committed (e.g. vCPUs have been created or any memslot is populated).
KVM_CHECK_EXTENSION(KVM_CAP_RISCV_SET_HGATP_MODE) returns a bitmask of the
HGATP.MODE formats supported by the host.
Signed-off-by: Fangyu Yu <fangyu.yu@linux.alibaba.com>
---
Documentation/virt/kvm/api.rst | 27 +++++++++++++++++++++++++++
arch/riscv/kvm/vm.c | 20 ++++++++++++++++++--
include/uapi/linux/kvm.h | 1 +
3 files changed, 46 insertions(+), 2 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 01a3abef8abb..1a0c5ddacae8 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8765,6 +8765,33 @@ helpful if user space wants to emulate instructions which are not
This capability can be enabled dynamically even if VCPUs were already
created and are running.
+7.47 KVM_CAP_RISCV_SET_HGATP_MODE
+---------------------------------
+
+:Architectures: riscv
+:Type: VM
+:Parameters: args[0] contains the requested HGATP mode
+:Returns:
+ - 0 on success.
+ - -EINVAL if args[0] is outside the range of HGATP modes supported by the
+ hardware.
+ - -EBUSY if vCPUs have already been created for the VM, if the VM has any
+ non-empty memslots.
+
+This capability allows userspace to explicitly select the HGATP mode for
+the VM. The selected mode must be supported by both KVM and hardware. This
+capability must be enabled before creating any vCPUs or memslots.
+
+``KVM_CHECK_EXTENSION(KVM_CAP_RISCV_SET_HGATP_MODE)`` returns a bitmask of
+HGATP.MODE values supported by the host. A return value of 0 indicates that
+the capability is not supported.
+
+The returned bitmask uses the following bit positions::
+
+ bit 0: HGATP.MODE = SV39X4
+ bit 1: HGATP.MODE = SV48X4
+ bit 2: HGATP.MODE = SV57X4
+
8. Other capabilities.
======================
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 4b2156df40fc..3bbbcb6a17a6 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -202,6 +202,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_VM_GPA_BITS:
r = kvm_riscv_gstage_gpa_bits(&kvm->arch);
break;
+ case KVM_CAP_RISCV_SET_HGATP_MODE:
+ r = kvm_riscv_get_hgatp_mode_mask();
+ break;
default:
r = 0;
break;
@@ -212,12 +215,25 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
{
+ if (cap->flags)
+ return -EINVAL;
+
switch (cap->cap) {
case KVM_CAP_RISCV_MP_STATE_RESET:
- if (cap->flags)
- return -EINVAL;
kvm->arch.mp_state_reset = true;
return 0;
+ case KVM_CAP_RISCV_SET_HGATP_MODE:
+#ifdef CONFIG_64BIT
+ if (!kvm_riscv_hgatp_mode_is_valid(cap->args[0]))
+ return -EINVAL;
+
+ if (kvm->created_vcpus || !kvm_are_all_memslots_empty(kvm))
+ return -EBUSY;
+
+ kvm->arch.kvm_riscv_gstage_pgd_levels =
+ 3 + cap->args[0] - HGATP_MODE_SV39X4;
+#endif
+ return 0;
default:
return -EINVAL;
}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index dddb781b0507..00c02a880518 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -974,6 +974,7 @@ struct kvm_enable_cap {
#define KVM_CAP_GUEST_MEMFD_FLAGS 244
#define KVM_CAP_ARM_SEA_TO_USER 245
#define KVM_CAP_S390_USER_OPEREXEC 246
+#define KVM_CAP_RISCV_SET_HGATP_MODE 247
struct kvm_irq_routing_irqchip {
__u32 irqchip;
--
2.50.1
On Mon, Feb 02, 2026 at 10:07:15PM +0800, fangyu.yu@linux.alibaba.com wrote:
> From: Fangyu Yu <fangyu.yu@linux.alibaba.com>
>
> Add a VM capability that allows userspace to select the G-stage page table
> format by setting HGATP.MODE on a per-VM basis.
>
> Userspace enables the capability via KVM_ENABLE_CAP, passing the requested
> HGATP.MODE in args[0]. The request is rejected with -EINVAL if the mode is
> not supported by the host, and with -EBUSY if the VM has already been
> committed (e.g. vCPUs have been created or any memslot is populated).
>
> KVM_CHECK_EXTENSION(KVM_CAP_RISCV_SET_HGATP_MODE) returns a bitmask of the
> HGATP.MODE formats supported by the host.
>
> Signed-off-by: Fangyu Yu <fangyu.yu@linux.alibaba.com>
> ---
> Documentation/virt/kvm/api.rst | 27 +++++++++++++++++++++++++++
> arch/riscv/kvm/vm.c | 20 ++++++++++++++++++--
> include/uapi/linux/kvm.h | 1 +
> 3 files changed, 46 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 01a3abef8abb..1a0c5ddacae8 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -8765,6 +8765,33 @@ helpful if user space wants to emulate instructions which are not
> This capability can be enabled dynamically even if VCPUs were already
> created and are running.
>
> +7.47 KVM_CAP_RISCV_SET_HGATP_MODE
> +---------------------------------
> +
> +:Architectures: riscv
If we only want this to work for rv64, then we should write riscv64 here,
but, as I said in the last patch, I think we can just support rv32 too
by supporting its one and only mode.
> +:Type: VM
> +:Parameters: args[0] contains the requested HGATP mode
> +:Returns:
> + - 0 on success.
> + - -EINVAL if args[0] is outside the range of HGATP modes supported by the
> + hardware.
> + - -EBUSY if vCPUs have already been created for the VM, if the VM has any
> + non-empty memslots.
> +
> +This capability allows userspace to explicitly select the HGATP mode for
> +the VM. The selected mode must be supported by both KVM and hardware. This
> +capability must be enabled before creating any vCPUs or memslots.
We should write what happens if the capability (setting the mode) is not
done, i.e. what's the default mode.
> +
> +``KVM_CHECK_EXTENSION(KVM_CAP_RISCV_SET_HGATP_MODE)`` returns a bitmask of
> +HGATP.MODE values supported by the host. A return value of 0 indicates that
> +the capability is not supported.
> +
> +The returned bitmask uses the following bit positions::
> +
> + bit 0: HGATP.MODE = SV39X4
> + bit 1: HGATP.MODE = SV48X4
> + bit 2: HGATP.MODE = SV57X4
Could write something along the lines of the UAPI having the bit
definitions rather than duplicating that information here.
> +
> 8. Other capabilities.
> ======================
>
> diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
> index 4b2156df40fc..3bbbcb6a17a6 100644
> --- a/arch/riscv/kvm/vm.c
> +++ b/arch/riscv/kvm/vm.c
> @@ -202,6 +202,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> case KVM_CAP_VM_GPA_BITS:
> r = kvm_riscv_gstage_gpa_bits(&kvm->arch);
> break;
> + case KVM_CAP_RISCV_SET_HGATP_MODE:
> + r = kvm_riscv_get_hgatp_mode_mask();
> + break;
> default:
> r = 0;
> break;
> @@ -212,12 +215,25 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>
> int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> {
> + if (cap->flags)
> + return -EINVAL;
> +
> switch (cap->cap) {
> case KVM_CAP_RISCV_MP_STATE_RESET:
> - if (cap->flags)
> - return -EINVAL;
> kvm->arch.mp_state_reset = true;
> return 0;
> + case KVM_CAP_RISCV_SET_HGATP_MODE:
> +#ifdef CONFIG_64BIT
> + if (!kvm_riscv_hgatp_mode_is_valid(cap->args[0]))
> + return -EINVAL;
> +
> + if (kvm->created_vcpus || !kvm_are_all_memslots_empty(kvm))
> + return -EBUSY;
> +
> + kvm->arch.kvm_riscv_gstage_pgd_levels =
> + 3 + cap->args[0] - HGATP_MODE_SV39X4;
> +#endif
> + return 0;
> default:
> return -EINVAL;
> }
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index dddb781b0507..00c02a880518 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -974,6 +974,7 @@ struct kvm_enable_cap {
> #define KVM_CAP_GUEST_MEMFD_FLAGS 244
> #define KVM_CAP_ARM_SEA_TO_USER 245
> #define KVM_CAP_S390_USER_OPEREXEC 246
> +#define KVM_CAP_RISCV_SET_HGATP_MODE 247
>
> struct kvm_irq_routing_irqchip {
> __u32 irqchip;
> --
> 2.50.1
>
Thanks,
drew
>> From: Fangyu Yu <fangyu.yu@linux.alibaba.com>
>>
>> Add a VM capability that allows userspace to select the G-stage page table
>> format by setting HGATP.MODE on a per-VM basis.
>>
>> Userspace enables the capability via KVM_ENABLE_CAP, passing the requested
>> HGATP.MODE in args[0]. The request is rejected with -EINVAL if the mode is
>> not supported by the host, and with -EBUSY if the VM has already been
>> committed (e.g. vCPUs have been created or any memslot is populated).
>>
>> KVM_CHECK_EXTENSION(KVM_CAP_RISCV_SET_HGATP_MODE) returns a bitmask of the
>> HGATP.MODE formats supported by the host.
>>
>> Signed-off-by: Fangyu Yu <fangyu.yu@linux.alibaba.com>
>> ---
>> Documentation/virt/kvm/api.rst | 27 +++++++++++++++++++++++++++
>> arch/riscv/kvm/vm.c | 20 ++++++++++++++++++--
>> include/uapi/linux/kvm.h | 1 +
>> 3 files changed, 46 insertions(+), 2 deletions(-)
>>
>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
>> index 01a3abef8abb..1a0c5ddacae8 100644
>> --- a/Documentation/virt/kvm/api.rst
>> +++ b/Documentation/virt/kvm/api.rst
>> @@ -8765,6 +8765,33 @@ helpful if user space wants to emulate instructions which are not
>> This capability can be enabled dynamically even if VCPUs were already
>> created and are running.
>>
>> +7.47 KVM_CAP_RISCV_SET_HGATP_MODE
>> +---------------------------------
>> +
>> +:Architectures: riscv
>
>If we only want this to work for rv64, then we should write riscv64 here,
>but, as I said in the last patch, I think we can just support rv32 too
>by supporting its one and only mode.
Agreed.
I'll update the documentation to list ":Architectures: riscv" and make the
capability available on both RV32 and RV64. For RV32, userspace will be
able to select the single supported G-stage mode (Sv32x4)
>> +:Type: VM
>> +:Parameters: args[0] contains the requested HGATP mode
>> +:Returns:
>> + - 0 on success.
>> + - -EINVAL if args[0] is outside the range of HGATP modes supported by the
>> + hardware.
>> + - -EBUSY if vCPUs have already been created for the VM, if the VM has any
>> + non-empty memslots.
>> +
>> +This capability allows userspace to explicitly select the HGATP mode for
>> +the VM. The selected mode must be supported by both KVM and hardware. This
>> +capability must be enabled before creating any vCPUs or memslots.
>
>We should write what happens if the capability (setting the mode) is not
>done, i.e. what's the default mode.
Good point.
I'll update the documentation to explicitly state the default behavior when
KVM_CAP_RISCV_SET_HGATP_MODE is not used: KVM will auto-select the HGATP
G-stage mode during VM initialization. In other words, userspace only needs
to set the capability when it wants to override the default auto-selection.
>> +
>> +``KVM_CHECK_EXTENSION(KVM_CAP_RISCV_SET_HGATP_MODE)`` returns a bitmask of
>> +HGATP.MODE values supported by the host. A return value of 0 indicates that
>> +the capability is not supported.
>> +
>> +The returned bitmask uses the following bit positions::
>> +
>> + bit 0: HGATP.MODE = SV39X4
>> + bit 1: HGATP.MODE = SV48X4
>> + bit 2: HGATP.MODE = SV57X4
>
>Could write something along the lines of the UAPI having the bit
>definitions rather than duplicating that information here.
>
Sure. I'll rework this section.
>> +
>> 8. Other capabilities.
>> ======================
>>
>> diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
>> index 4b2156df40fc..3bbbcb6a17a6 100644
>> --- a/arch/riscv/kvm/vm.c
>> +++ b/arch/riscv/kvm/vm.c
>> @@ -202,6 +202,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>> case KVM_CAP_VM_GPA_BITS:
>> r = kvm_riscv_gstage_gpa_bits(&kvm->arch);
>> break;
>> + case KVM_CAP_RISCV_SET_HGATP_MODE:
>> + r = kvm_riscv_get_hgatp_mode_mask();
>> + break;
>> default:
>> r = 0;
>> break;
>> @@ -212,12 +215,25 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>>
>> int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>> {
>> + if (cap->flags)
>> + return -EINVAL;
>> +
>> switch (cap->cap) {
>> case KVM_CAP_RISCV_MP_STATE_RESET:
>> - if (cap->flags)
>> - return -EINVAL;
>> kvm->arch.mp_state_reset = true;
>> return 0;
>> + case KVM_CAP_RISCV_SET_HGATP_MODE:
>> +#ifdef CONFIG_64BIT
>> + if (!kvm_riscv_hgatp_mode_is_valid(cap->args[0]))
>> + return -EINVAL;
>> +
>> + if (kvm->created_vcpus || !kvm_are_all_memslots_empty(kvm))
>> + return -EBUSY;
>> +
>> + kvm->arch.kvm_riscv_gstage_pgd_levels =
>> + 3 + cap->args[0] - HGATP_MODE_SV39X4;
>> +#endif
>> + return 0;
>> default:
>> return -EINVAL;
>> }
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index dddb781b0507..00c02a880518 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -974,6 +974,7 @@ struct kvm_enable_cap {
>> #define KVM_CAP_GUEST_MEMFD_FLAGS 244
>> #define KVM_CAP_ARM_SEA_TO_USER 245
>> #define KVM_CAP_S390_USER_OPEREXEC 246
>> +#define KVM_CAP_RISCV_SET_HGATP_MODE 247
>>
>> struct kvm_irq_routing_irqchip {
>> __u32 irqchip;
>> --
>> 2.50.1
>>
>
>Thanks,
>drew
Thanks,
Fangyu
© 2016 - 2026 Red Hat, Inc.