arch/riscv/include/asm/kvm_gstage.h | 12 ++--- arch/riscv/include/asm/kvm_host.h | 4 ++ arch/riscv/kvm/gstage.c | 82 +++++++++++++++++------------ arch/riscv/kvm/main.c | 4 +- arch/riscv/kvm/mmu.c | 18 +++++-- arch/riscv/kvm/vm.c | 2 +- arch/riscv/kvm/vmid.c | 2 +- 7 files changed, 74 insertions(+), 50 deletions(-)
From: Fangyu Yu <fangyu.yu@linux.alibaba.com>
Introduces two per-VM architecture-specific fields to support runtime
configuration of the G-stage page table format:
- kvm->arch.kvm_riscv_gstage_mode: specifies the HGATP mode used by the
current VM;
- kvm->arch.kvm_riscv_gstage_pgd_levels: the corresponding number of page
table levels for the selected mode.
These fields replace the previous global variables
kvm_riscv_gstage_mode and kvm_riscv_gstage_pgd_levels, enabling different
virtual machines to independently select their G-stage page table format
instead of being forced to share the maximum mode detected by the kernel
at boot time.
Signed-off-by: Fangyu Yu <fangyu.yu@linux.alibaba.com>
---
arch/riscv/include/asm/kvm_gstage.h | 12 ++---
arch/riscv/include/asm/kvm_host.h | 4 ++
arch/riscv/kvm/gstage.c | 82 +++++++++++++++++------------
arch/riscv/kvm/main.c | 4 +-
arch/riscv/kvm/mmu.c | 18 +++++--
arch/riscv/kvm/vm.c | 2 +-
arch/riscv/kvm/vmid.c | 2 +-
7 files changed, 74 insertions(+), 50 deletions(-)
diff --git a/arch/riscv/include/asm/kvm_gstage.h b/arch/riscv/include/asm/kvm_gstage.h
index 595e2183173e..fdcada123b3f 100644
--- a/arch/riscv/include/asm/kvm_gstage.h
+++ b/arch/riscv/include/asm/kvm_gstage.h
@@ -29,16 +29,11 @@ struct kvm_gstage_mapping {
#define kvm_riscv_gstage_index_bits 10
#endif
-extern unsigned long kvm_riscv_gstage_mode;
-extern unsigned long kvm_riscv_gstage_pgd_levels;
+extern unsigned long kvm_riscv_gstage_max_mode;
+extern unsigned long kvm_riscv_gstage_max_pgd_levels;
#define kvm_riscv_gstage_pgd_xbits 2
#define kvm_riscv_gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + kvm_riscv_gstage_pgd_xbits))
-#define kvm_riscv_gstage_gpa_bits (HGATP_PAGE_SHIFT + \
- (kvm_riscv_gstage_pgd_levels * \
- kvm_riscv_gstage_index_bits) + \
- kvm_riscv_gstage_pgd_xbits)
-#define kvm_riscv_gstage_gpa_size ((gpa_t)(1ULL << kvm_riscv_gstage_gpa_bits))
bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
pte_t **ptepp, u32 *ptep_level);
@@ -69,4 +64,7 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end
void kvm_riscv_gstage_mode_detect(void);
+gpa_t kvm_riscv_gstage_gpa_size(struct kvm_arch *k);
+unsigned long kvm_riscv_gstage_gpa_bits(struct kvm_arch *k);
+
#endif
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 24585304c02b..27ea8e8fd5b0 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -103,6 +103,10 @@ struct kvm_arch {
/* KVM_CAP_RISCV_MP_STATE_RESET */
bool mp_state_reset;
+
+ unsigned long kvm_riscv_gstage_mode;
+ unsigned long kvm_riscv_gstage_pgd_levels;
+ bool gstage_mode_initialized;
};
struct kvm_cpu_trap {
diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c
index b67d60d722c2..06452e4c2ab2 100644
--- a/arch/riscv/kvm/gstage.c
+++ b/arch/riscv/kvm/gstage.c
@@ -12,22 +12,23 @@
#include <asm/kvm_gstage.h>
#ifdef CONFIG_64BIT
-unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV39X4;
-unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 3;
+unsigned long kvm_riscv_gstage_max_mode __ro_after_init = HGATP_MODE_SV39X4;
+unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 3;
#else
-unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV32X4;
-unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 2;
+unsigned long kvm_riscv_gstage_max_mode __ro_after_init = HGATP_MODE_SV32X4;
+unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 2;
#endif
#define gstage_pte_leaf(__ptep) \
(pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))
-static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
+static inline unsigned long gstage_pte_index(struct kvm_gstage *gstage,
+ gpa_t addr, u32 level)
{
unsigned long mask;
unsigned long shift = HGATP_PAGE_SHIFT + (kvm_riscv_gstage_index_bits * level);
- if (level == (kvm_riscv_gstage_pgd_levels - 1))
+ if (level == (gstage->kvm->arch.kvm_riscv_gstage_pgd_levels - 1))
mask = (PTRS_PER_PTE * (1UL << kvm_riscv_gstage_pgd_xbits)) - 1;
else
mask = PTRS_PER_PTE - 1;
@@ -40,12 +41,13 @@ static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte)));
}
-static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
+static int gstage_page_size_to_level(struct kvm_gstage *gstage, unsigned long page_size,
+ u32 *out_level)
{
u32 i;
unsigned long psz = 1UL << 12;
- for (i = 0; i < kvm_riscv_gstage_pgd_levels; i++) {
+ for (i = 0; i < gstage->kvm->arch.kvm_riscv_gstage_pgd_levels; i++) {
if (page_size == (psz << (i * kvm_riscv_gstage_index_bits))) {
*out_level = i;
return 0;
@@ -55,21 +57,23 @@ static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
return -EINVAL;
}
-static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder)
+static int gstage_level_to_page_order(struct kvm_gstage *gstage, u32 level,
+ unsigned long *out_pgorder)
{
- if (kvm_riscv_gstage_pgd_levels < level)
+ if (gstage->kvm->arch.kvm_riscv_gstage_pgd_levels < level)
return -EINVAL;
*out_pgorder = 12 + (level * kvm_riscv_gstage_index_bits);
return 0;
}
-static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
+static int gstage_level_to_page_size(struct kvm_gstage *gstage, u32 level,
+ unsigned long *out_pgsize)
{
int rc;
unsigned long page_order = PAGE_SHIFT;
- rc = gstage_level_to_page_order(level, &page_order);
+ rc = gstage_level_to_page_order(gstage, level, &page_order);
if (rc)
return rc;
@@ -81,11 +85,11 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
pte_t **ptepp, u32 *ptep_level)
{
pte_t *ptep;
- u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
+ u32 current_level = gstage->kvm->arch.kvm_riscv_gstage_pgd_levels - 1;
*ptep_level = current_level;
ptep = (pte_t *)gstage->pgd;
- ptep = &ptep[gstage_pte_index(addr, current_level)];
+ ptep = &ptep[gstage_pte_index(gstage, addr, current_level)];
while (ptep && pte_val(ptep_get(ptep))) {
if (gstage_pte_leaf(ptep)) {
*ptep_level = current_level;
@@ -97,7 +101,7 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
current_level--;
*ptep_level = current_level;
ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
- ptep = &ptep[gstage_pte_index(addr, current_level)];
+ ptep = &ptep[gstage_pte_index(gstage, addr, current_level)];
} else {
ptep = NULL;
}
@@ -110,7 +114,7 @@ static void gstage_tlb_flush(struct kvm_gstage *gstage, u32 level, gpa_t addr)
{
unsigned long order = PAGE_SHIFT;
- if (gstage_level_to_page_order(level, &order))
+ if (gstage_level_to_page_order(gstage, level, &order))
return;
addr &= ~(BIT(order) - 1);
@@ -125,9 +129,9 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
struct kvm_mmu_memory_cache *pcache,
const struct kvm_gstage_mapping *map)
{
- u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
+ u32 current_level = gstage->kvm->arch.kvm_riscv_gstage_pgd_levels - 1;
pte_t *next_ptep = (pte_t *)gstage->pgd;
- pte_t *ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
+ pte_t *ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)];
if (current_level < map->level)
return -EINVAL;
@@ -151,7 +155,7 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
}
current_level--;
- ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
+ ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)];
}
if (pte_val(*ptep) != pte_val(map->pte)) {
@@ -175,7 +179,7 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
out_map->addr = gpa;
out_map->level = 0;
- ret = gstage_page_size_to_level(page_size, &out_map->level);
+ ret = gstage_page_size_to_level(gstage, page_size, &out_map->level);
if (ret)
return ret;
@@ -217,7 +221,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
u32 next_ptep_level;
unsigned long next_page_size, page_size;
- ret = gstage_level_to_page_size(ptep_level, &page_size);
+ ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
if (ret)
return;
@@ -229,7 +233,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
if (ptep_level && !gstage_pte_leaf(ptep)) {
next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
next_ptep_level = ptep_level - 1;
- ret = gstage_level_to_page_size(next_ptep_level, &next_page_size);
+ ret = gstage_level_to_page_size(gstage, next_ptep_level, &next_page_size);
if (ret)
return;
@@ -263,7 +267,7 @@ void kvm_riscv_gstage_unmap_range(struct kvm_gstage *gstage,
while (addr < end) {
found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
- ret = gstage_level_to_page_size(ptep_level, &page_size);
+ ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
if (ret)
break;
@@ -297,7 +301,7 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end
while (addr < end) {
found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
- ret = gstage_level_to_page_size(ptep_level, &page_size);
+ ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
if (ret)
break;
@@ -319,41 +323,51 @@ void __init kvm_riscv_gstage_mode_detect(void)
/* Try Sv57x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) {
- kvm_riscv_gstage_mode = HGATP_MODE_SV57X4;
- kvm_riscv_gstage_pgd_levels = 5;
+ kvm_riscv_gstage_max_mode = HGATP_MODE_SV57X4;
+ kvm_riscv_gstage_max_pgd_levels = 5;
goto done;
}
/* Try Sv48x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
- kvm_riscv_gstage_mode = HGATP_MODE_SV48X4;
- kvm_riscv_gstage_pgd_levels = 4;
+ kvm_riscv_gstage_max_mode = HGATP_MODE_SV48X4;
+ kvm_riscv_gstage_max_pgd_levels = 4;
goto done;
}
/* Try Sv39x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV39X4) {
- kvm_riscv_gstage_mode = HGATP_MODE_SV39X4;
- kvm_riscv_gstage_pgd_levels = 3;
+ kvm_riscv_gstage_max_mode = HGATP_MODE_SV39X4;
+ kvm_riscv_gstage_max_pgd_levels = 3;
goto done;
}
#else /* CONFIG_32BIT */
/* Try Sv32x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV32X4) {
- kvm_riscv_gstage_mode = HGATP_MODE_SV32X4;
- kvm_riscv_gstage_pgd_levels = 2;
+ kvm_riscv_gstage_max_mode = HGATP_MODE_SV32X4;
+ kvm_riscv_gstage_max_pgd_levels = 2;
goto done;
}
#endif
/* KVM depends on !HGATP_MODE_OFF */
- kvm_riscv_gstage_mode = HGATP_MODE_OFF;
- kvm_riscv_gstage_pgd_levels = 0;
+ kvm_riscv_gstage_max_mode = HGATP_MODE_OFF;
+ kvm_riscv_gstage_max_pgd_levels = 0;
done:
csr_write(CSR_HGATP, 0);
kvm_riscv_local_hfence_gvma_all();
}
+
+unsigned long kvm_riscv_gstage_gpa_bits(struct kvm_arch *k) {
+ return (HGATP_PAGE_SHIFT + (k->kvm_riscv_gstage_pgd_levels *
+ kvm_riscv_gstage_index_bits) +
+ kvm_riscv_gstage_pgd_xbits);
+}
+
+gpa_t kvm_riscv_gstage_gpa_size(struct kvm_arch *k) {
+ return ((gpa_t)(1ULL << kvm_riscv_gstage_gpa_bits(k)));
+}
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 45536af521f0..56a246e0e791 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -105,7 +105,7 @@ static int __init riscv_kvm_init(void)
return rc;
kvm_riscv_gstage_mode_detect();
- switch (kvm_riscv_gstage_mode) {
+ switch (kvm_riscv_gstage_max_mode) {
case HGATP_MODE_SV32X4:
str = "Sv32x4";
break;
@@ -164,7 +164,7 @@ static int __init riscv_kvm_init(void)
(rc) ? slist : "no features");
}
- kvm_info("using %s G-stage page table format\n", str);
+ kvm_info("Max G-stage page table format %s \n", str);
kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 4ab06697bfc0..574783907162 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -67,7 +67,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
if (!writable)
map.pte = pte_wrprotect(map.pte);
- ret = kvm_mmu_topup_memory_cache(&pcache, kvm_riscv_gstage_pgd_levels);
+ ret = kvm_mmu_topup_memory_cache(&pcache,kvm->arch.kvm_riscv_gstage_pgd_levels);
if (ret)
goto out;
@@ -186,8 +186,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* space addressable by the KVM guest GPA space.
*/
if ((new->base_gfn + new->npages) >=
- (kvm_riscv_gstage_gpa_size >> PAGE_SHIFT))
+ (kvm_riscv_gstage_gpa_size(&kvm->arch) >> PAGE_SHIFT)) {
return -EFAULT;
+ }
hva = new->userspace_addr;
size = new->npages << PAGE_SHIFT;
@@ -332,7 +333,7 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
memset(out_map, 0, sizeof(*out_map));
/* We need minimum second+third level pages */
- ret = kvm_mmu_topup_memory_cache(pcache, kvm_riscv_gstage_pgd_levels);
+ ret = kvm_mmu_topup_memory_cache(pcache, kvm->arch.kvm_riscv_gstage_pgd_levels);
if (ret) {
kvm_err("Failed to topup G-stage cache\n");
return ret;
@@ -431,6 +432,11 @@ int kvm_riscv_mmu_alloc_pgd(struct kvm *kvm)
return -ENOMEM;
kvm->arch.pgd = page_to_virt(pgd_page);
kvm->arch.pgd_phys = page_to_phys(pgd_page);
+ if (!kvm->arch.gstage_mode_initialized) {
+ /*user-space didn't set KVM_CAP_RISC_HGATP_MODE cap*/
+ kvm->arch.kvm_riscv_gstage_mode = kvm_riscv_gstage_max_mode;
+ kvm->arch.kvm_riscv_gstage_pgd_levels = kvm_riscv_gstage_max_pgd_levels;
+ }
return 0;
}
@@ -446,10 +452,12 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm)
gstage.flags = 0;
gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage.pgd = kvm->arch.pgd;
- kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size, false);
+ kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size(&kvm->arch), false);
pgd = READ_ONCE(kvm->arch.pgd);
kvm->arch.pgd = NULL;
kvm->arch.pgd_phys = 0;
+ kvm->arch.kvm_riscv_gstage_mode = HGATP_MODE_OFF;
+ kvm->arch.kvm_riscv_gstage_pgd_levels = 0;
}
spin_unlock(&kvm->mmu_lock);
@@ -459,8 +467,8 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm)
void kvm_riscv_mmu_update_hgatp(struct kvm_vcpu *vcpu)
{
- unsigned long hgatp = kvm_riscv_gstage_mode << HGATP_MODE_SHIFT;
struct kvm_arch *k = &vcpu->kvm->arch;
+ unsigned long hgatp = k->kvm_riscv_gstage_mode << HGATP_MODE_SHIFT;
hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID;
hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN;
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 66d91ae6e9b2..4b2156df40fc 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -200,7 +200,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_USER_MEM_SLOTS;
break;
case KVM_CAP_VM_GPA_BITS:
- r = kvm_riscv_gstage_gpa_bits;
+ r = kvm_riscv_gstage_gpa_bits(&kvm->arch);
break;
default:
r = 0;
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
index cf34d448289d..db27430f111e 100644
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -26,7 +26,7 @@ static DEFINE_SPINLOCK(vmid_lock);
void __init kvm_riscv_gstage_vmid_detect(void)
{
/* Figure-out number of VMID bits in HW */
- csr_write(CSR_HGATP, (kvm_riscv_gstage_mode << HGATP_MODE_SHIFT) | HGATP_VMID);
+ csr_write(CSR_HGATP, (kvm_riscv_gstage_max_mode << HGATP_MODE_SHIFT) | HGATP_VMID);
vmid_bits = csr_read(CSR_HGATP);
vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT;
vmid_bits = fls_long(vmid_bits);
--
2.50.1
On Mon, Jan 05, 2026 at 10:32:31PM +0800, fangyu.yu@linux.alibaba.com wrote:
> From: Fangyu Yu <fangyu.yu@linux.alibaba.com>
>
> Introduces two per-VM architecture-specific fields to support runtime
> configuration of the G-stage page table format:
>
> - kvm->arch.kvm_riscv_gstage_mode: specifies the HGATP mode used by the
> current VM;
> - kvm->arch.kvm_riscv_gstage_pgd_levels: the corresponding number of page
> table levels for the selected mode.
>
> These fields replace the previous global variables
> kvm_riscv_gstage_mode and kvm_riscv_gstage_pgd_levels, enabling different
> virtual machines to independently select their G-stage page table format
> instead of being forced to share the maximum mode detected by the kernel
> at boot time.
>
> Signed-off-by: Fangyu Yu <fangyu.yu@linux.alibaba.com>
> ---
> arch/riscv/include/asm/kvm_gstage.h | 12 ++---
> arch/riscv/include/asm/kvm_host.h | 4 ++
> arch/riscv/kvm/gstage.c | 82 +++++++++++++++++------------
> arch/riscv/kvm/main.c | 4 +-
> arch/riscv/kvm/mmu.c | 18 +++++--
> arch/riscv/kvm/vm.c | 2 +-
> arch/riscv/kvm/vmid.c | 2 +-
> 7 files changed, 74 insertions(+), 50 deletions(-)
>
> diff --git a/arch/riscv/include/asm/kvm_gstage.h b/arch/riscv/include/asm/kvm_gstage.h
> index 595e2183173e..fdcada123b3f 100644
> --- a/arch/riscv/include/asm/kvm_gstage.h
> +++ b/arch/riscv/include/asm/kvm_gstage.h
> @@ -29,16 +29,11 @@ struct kvm_gstage_mapping {
> #define kvm_riscv_gstage_index_bits 10
> #endif
>
> -extern unsigned long kvm_riscv_gstage_mode;
> -extern unsigned long kvm_riscv_gstage_pgd_levels;
> +extern unsigned long kvm_riscv_gstage_max_mode;
> +extern unsigned long kvm_riscv_gstage_max_pgd_levels;
>
> #define kvm_riscv_gstage_pgd_xbits 2
> #define kvm_riscv_gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + kvm_riscv_gstage_pgd_xbits))
> -#define kvm_riscv_gstage_gpa_bits (HGATP_PAGE_SHIFT + \
> - (kvm_riscv_gstage_pgd_levels * \
> - kvm_riscv_gstage_index_bits) + \
> - kvm_riscv_gstage_pgd_xbits)
> -#define kvm_riscv_gstage_gpa_size ((gpa_t)(1ULL << kvm_riscv_gstage_gpa_bits))
>
> bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
> pte_t **ptepp, u32 *ptep_level);
> @@ -69,4 +64,7 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end
>
> void kvm_riscv_gstage_mode_detect(void);
>
> +gpa_t kvm_riscv_gstage_gpa_size(struct kvm_arch *k);
> +unsigned long kvm_riscv_gstage_gpa_bits(struct kvm_arch *k);
> +
> #endif
> diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
> index 24585304c02b..27ea8e8fd5b0 100644
> --- a/arch/riscv/include/asm/kvm_host.h
> +++ b/arch/riscv/include/asm/kvm_host.h
> @@ -103,6 +103,10 @@ struct kvm_arch {
>
> /* KVM_CAP_RISCV_MP_STATE_RESET */
> bool mp_state_reset;
> +
> + unsigned long kvm_riscv_gstage_mode;
There's a 1:1 mapping for mode/levels, so we don't need to track both.
Since mode is rarely used, then I think something like this would still
provide enough convenience without requiring the storage allocation.
static inline unsigned long kvm_riscv_gstage_mode(struct kvm_gstage *gstage)
{
unsigned long modes[] = {
[2] = HGATP_MODE_SV32X4,
[3] = HGATP_MODE_SV39X4,
[4] = HGATP_MODE_SV48X4,
[5] = HGATP_MODE_SV57X4,
};
return modes[gstage->kvm->arch.kvm_riscv_gstage_pgd_levels];
}
> + unsigned long kvm_riscv_gstage_pgd_levels;
> + bool gstage_mode_initialized;
> };
>
> struct kvm_cpu_trap {
> diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c
> index b67d60d722c2..06452e4c2ab2 100644
> --- a/arch/riscv/kvm/gstage.c
> +++ b/arch/riscv/kvm/gstage.c
> @@ -12,22 +12,23 @@
> #include <asm/kvm_gstage.h>
>
> #ifdef CONFIG_64BIT
> -unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV39X4;
> -unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 3;
> +unsigned long kvm_riscv_gstage_max_mode __ro_after_init = HGATP_MODE_SV39X4;
With a kvm_riscv_gstage_mode() function we don't need
kvm_riscv_gstage_max_mode either.
> +unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 3;
> #else
> -unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV32X4;
> -unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 2;
> +unsigned long kvm_riscv_gstage_max_mode __ro_after_init = HGATP_MODE_SV32X4;
> +unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 2;
> #endif
>
> #define gstage_pte_leaf(__ptep) \
> (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))
>
> -static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
> +static inline unsigned long gstage_pte_index(struct kvm_gstage *gstage,
> + gpa_t addr, u32 level)
> {
> unsigned long mask;
> unsigned long shift = HGATP_PAGE_SHIFT + (kvm_riscv_gstage_index_bits * level);
>
> - if (level == (kvm_riscv_gstage_pgd_levels - 1))
> + if (level == (gstage->kvm->arch.kvm_riscv_gstage_pgd_levels - 1))
nit: we can drop the unnecessary () while touching this line.
> mask = (PTRS_PER_PTE * (1UL << kvm_riscv_gstage_pgd_xbits)) - 1;
> else
> mask = PTRS_PER_PTE - 1;
> @@ -40,12 +41,13 @@ static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
> return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte)));
> }
>
> -static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
> +static int gstage_page_size_to_level(struct kvm_gstage *gstage, unsigned long page_size,
> + u32 *out_level)
> {
> u32 i;
> unsigned long psz = 1UL << 12;
>
> - for (i = 0; i < kvm_riscv_gstage_pgd_levels; i++) {
> + for (i = 0; i < gstage->kvm->arch.kvm_riscv_gstage_pgd_levels; i++) {
> if (page_size == (psz << (i * kvm_riscv_gstage_index_bits))) {
> *out_level = i;
> return 0;
> @@ -55,21 +57,23 @@ static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
> return -EINVAL;
> }
>
> -static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder)
> +static int gstage_level_to_page_order(struct kvm_gstage *gstage, u32 level,
> + unsigned long *out_pgorder)
> {
> - if (kvm_riscv_gstage_pgd_levels < level)
> + if (gstage->kvm->arch.kvm_riscv_gstage_pgd_levels < level)
> return -EINVAL;
>
> *out_pgorder = 12 + (level * kvm_riscv_gstage_index_bits);
> return 0;
> }
>
> -static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
> +static int gstage_level_to_page_size(struct kvm_gstage *gstage, u32 level,
> + unsigned long *out_pgsize)
> {
> int rc;
> unsigned long page_order = PAGE_SHIFT;
>
> - rc = gstage_level_to_page_order(level, &page_order);
> + rc = gstage_level_to_page_order(gstage, level, &page_order);
> if (rc)
> return rc;
>
> @@ -81,11 +85,11 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
> pte_t **ptepp, u32 *ptep_level)
> {
> pte_t *ptep;
> - u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
> + u32 current_level = gstage->kvm->arch.kvm_riscv_gstage_pgd_levels - 1;
>
> *ptep_level = current_level;
> ptep = (pte_t *)gstage->pgd;
> - ptep = &ptep[gstage_pte_index(addr, current_level)];
> + ptep = &ptep[gstage_pte_index(gstage, addr, current_level)];
> while (ptep && pte_val(ptep_get(ptep))) {
> if (gstage_pte_leaf(ptep)) {
> *ptep_level = current_level;
> @@ -97,7 +101,7 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
> current_level--;
> *ptep_level = current_level;
> ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
> - ptep = &ptep[gstage_pte_index(addr, current_level)];
> + ptep = &ptep[gstage_pte_index(gstage, addr, current_level)];
> } else {
> ptep = NULL;
> }
> @@ -110,7 +114,7 @@ static void gstage_tlb_flush(struct kvm_gstage *gstage, u32 level, gpa_t addr)
> {
> unsigned long order = PAGE_SHIFT;
>
> - if (gstage_level_to_page_order(level, &order))
> + if (gstage_level_to_page_order(gstage, level, &order))
> return;
> addr &= ~(BIT(order) - 1);
>
> @@ -125,9 +129,9 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
> struct kvm_mmu_memory_cache *pcache,
> const struct kvm_gstage_mapping *map)
> {
> - u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
> + u32 current_level = gstage->kvm->arch.kvm_riscv_gstage_pgd_levels - 1;
> pte_t *next_ptep = (pte_t *)gstage->pgd;
> - pte_t *ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
> + pte_t *ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)];
>
> if (current_level < map->level)
> return -EINVAL;
> @@ -151,7 +155,7 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
> }
>
> current_level--;
> - ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
> + ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)];
> }
>
> if (pte_val(*ptep) != pte_val(map->pte)) {
> @@ -175,7 +179,7 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
> out_map->addr = gpa;
> out_map->level = 0;
>
> - ret = gstage_page_size_to_level(page_size, &out_map->level);
> + ret = gstage_page_size_to_level(gstage, page_size, &out_map->level);
> if (ret)
> return ret;
>
> @@ -217,7 +221,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
> u32 next_ptep_level;
> unsigned long next_page_size, page_size;
>
> - ret = gstage_level_to_page_size(ptep_level, &page_size);
> + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
> if (ret)
> return;
>
> @@ -229,7 +233,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
> if (ptep_level && !gstage_pte_leaf(ptep)) {
> next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
> next_ptep_level = ptep_level - 1;
> - ret = gstage_level_to_page_size(next_ptep_level, &next_page_size);
> + ret = gstage_level_to_page_size(gstage, next_ptep_level, &next_page_size);
> if (ret)
> return;
>
> @@ -263,7 +267,7 @@ void kvm_riscv_gstage_unmap_range(struct kvm_gstage *gstage,
>
> while (addr < end) {
> found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
> - ret = gstage_level_to_page_size(ptep_level, &page_size);
> + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
> if (ret)
> break;
>
> @@ -297,7 +301,7 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end
>
> while (addr < end) {
> found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
> - ret = gstage_level_to_page_size(ptep_level, &page_size);
> + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
> if (ret)
> break;
>
> @@ -319,41 +323,51 @@ void __init kvm_riscv_gstage_mode_detect(void)
> /* Try Sv57x4 G-stage mode */
> csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
> if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) {
> - kvm_riscv_gstage_mode = HGATP_MODE_SV57X4;
> - kvm_riscv_gstage_pgd_levels = 5;
> + kvm_riscv_gstage_max_mode = HGATP_MODE_SV57X4;
> + kvm_riscv_gstage_max_pgd_levels = 5;
> goto done;
> }
>
> /* Try Sv48x4 G-stage mode */
> csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
> if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
> - kvm_riscv_gstage_mode = HGATP_MODE_SV48X4;
> - kvm_riscv_gstage_pgd_levels = 4;
> + kvm_riscv_gstage_max_mode = HGATP_MODE_SV48X4;
> + kvm_riscv_gstage_max_pgd_levels = 4;
> goto done;
> }
>
> /* Try Sv39x4 G-stage mode */
> csr_write(CSR_HGATP, HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
> if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV39X4) {
> - kvm_riscv_gstage_mode = HGATP_MODE_SV39X4;
> - kvm_riscv_gstage_pgd_levels = 3;
> + kvm_riscv_gstage_max_mode = HGATP_MODE_SV39X4;
> + kvm_riscv_gstage_max_pgd_levels = 3;
> goto done;
> }
> #else /* CONFIG_32BIT */
> /* Try Sv32x4 G-stage mode */
> csr_write(CSR_HGATP, HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
> if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV32X4) {
> - kvm_riscv_gstage_mode = HGATP_MODE_SV32X4;
> - kvm_riscv_gstage_pgd_levels = 2;
> + kvm_riscv_gstage_max_mode = HGATP_MODE_SV32X4;
> + kvm_riscv_gstage_max_pgd_levels = 2;
> goto done;
> }
> #endif
>
> /* KVM depends on !HGATP_MODE_OFF */
> - kvm_riscv_gstage_mode = HGATP_MODE_OFF;
> - kvm_riscv_gstage_pgd_levels = 0;
> + kvm_riscv_gstage_max_mode = HGATP_MODE_OFF;
> + kvm_riscv_gstage_max_pgd_levels = 0;
>
> done:
> csr_write(CSR_HGATP, 0);
> kvm_riscv_local_hfence_gvma_all();
> }
> +
> +unsigned long kvm_riscv_gstage_gpa_bits(struct kvm_arch *k) {
Did you run checkpatch? I think it requires '{' to be on its own line.
nit: s/k/ka/ would be consistent with other archs, although I see k is
used in riscv's kvm_riscv_mmu_update_hgatp() but that can be fixed up
in this patch since there's a change in the same place too.
> + return (HGATP_PAGE_SHIFT + (k->kvm_riscv_gstage_pgd_levels *
> + kvm_riscv_gstage_index_bits) +
> + kvm_riscv_gstage_pgd_xbits);
> +}
> +
> +gpa_t kvm_riscv_gstage_gpa_size(struct kvm_arch *k) {
same comments as above
> + return ((gpa_t)(1ULL << kvm_riscv_gstage_gpa_bits(k)));
return BIT_ULL(kvm_riscv_gstage_gpa_bits(ka))
(the cast is implicit from return type)
> +}
> diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
> index 45536af521f0..56a246e0e791 100644
> --- a/arch/riscv/kvm/main.c
> +++ b/arch/riscv/kvm/main.c
> @@ -105,7 +105,7 @@ static int __init riscv_kvm_init(void)
> return rc;
>
> kvm_riscv_gstage_mode_detect();
> - switch (kvm_riscv_gstage_mode) {
> + switch (kvm_riscv_gstage_max_mode) {
> case HGATP_MODE_SV32X4:
> str = "Sv32x4";
> break;
> @@ -164,7 +164,7 @@ static int __init riscv_kvm_init(void)
> (rc) ? slist : "no features");
> }
>
> - kvm_info("using %s G-stage page table format\n", str);
> + kvm_info("Max G-stage page table format %s \n", str);
>
> kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());
>
> diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
> index 4ab06697bfc0..574783907162 100644
> --- a/arch/riscv/kvm/mmu.c
> +++ b/arch/riscv/kvm/mmu.c
> @@ -67,7 +67,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
> if (!writable)
> map.pte = pte_wrprotect(map.pte);
>
> - ret = kvm_mmu_topup_memory_cache(&pcache, kvm_riscv_gstage_pgd_levels);
> + ret = kvm_mmu_topup_memory_cache(&pcache,kvm->arch.kvm_riscv_gstage_pgd_levels);
^ missing space
> if (ret)
> goto out;
>
> @@ -186,8 +186,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
> * space addressable by the KVM guest GPA space.
> */
> if ((new->base_gfn + new->npages) >=
> - (kvm_riscv_gstage_gpa_size >> PAGE_SHIFT))
> + (kvm_riscv_gstage_gpa_size(&kvm->arch) >> PAGE_SHIFT)) {
> return -EFAULT;
> + }
nit: Remove the unnecessary () and the '{' and the condition will fit on
one 100 char line.
>
> hva = new->userspace_addr;
> size = new->npages << PAGE_SHIFT;
> @@ -332,7 +333,7 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
> memset(out_map, 0, sizeof(*out_map));
>
> /* We need minimum second+third level pages */
> - ret = kvm_mmu_topup_memory_cache(pcache, kvm_riscv_gstage_pgd_levels);
> + ret = kvm_mmu_topup_memory_cache(pcache, kvm->arch.kvm_riscv_gstage_pgd_levels);
> if (ret) {
> kvm_err("Failed to topup G-stage cache\n");
> return ret;
> @@ -431,6 +432,11 @@ int kvm_riscv_mmu_alloc_pgd(struct kvm *kvm)
> return -ENOMEM;
> kvm->arch.pgd = page_to_virt(pgd_page);
> kvm->arch.pgd_phys = page_to_phys(pgd_page);
> + if (!kvm->arch.gstage_mode_initialized) {
> + /*user-space didn't set KVM_CAP_RISC_HGATP_MODE cap*/
^ missing space ^ missing space
> + kvm->arch.kvm_riscv_gstage_mode = kvm_riscv_gstage_max_mode;
> + kvm->arch.kvm_riscv_gstage_pgd_levels = kvm_riscv_gstage_max_pgd_levels;
Missing 'kvm->arch.gstage_mode_initialized = true' statement.
> + }
>
> return 0;
> }
> @@ -446,10 +452,12 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm)
> gstage.flags = 0;
> gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
> gstage.pgd = kvm->arch.pgd;
> - kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size, false);
> + kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size(&kvm->arch), false);
> pgd = READ_ONCE(kvm->arch.pgd);
> kvm->arch.pgd = NULL;
> kvm->arch.pgd_phys = 0;
> + kvm->arch.kvm_riscv_gstage_mode = HGATP_MODE_OFF;
> + kvm->arch.kvm_riscv_gstage_pgd_levels = 0;
> }
> spin_unlock(&kvm->mmu_lock);
>
> @@ -459,8 +467,8 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm)
>
> void kvm_riscv_mmu_update_hgatp(struct kvm_vcpu *vcpu)
> {
> - unsigned long hgatp = kvm_riscv_gstage_mode << HGATP_MODE_SHIFT;
> struct kvm_arch *k = &vcpu->kvm->arch;
> + unsigned long hgatp = k->kvm_riscv_gstage_mode << HGATP_MODE_SHIFT;
>
> hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID;
> hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN;
> diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
> index 66d91ae6e9b2..4b2156df40fc 100644
> --- a/arch/riscv/kvm/vm.c
> +++ b/arch/riscv/kvm/vm.c
> @@ -200,7 +200,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> r = KVM_USER_MEM_SLOTS;
> break;
> case KVM_CAP_VM_GPA_BITS:
> - r = kvm_riscv_gstage_gpa_bits;
> + r = kvm_riscv_gstage_gpa_bits(&kvm->arch);
> break;
> default:
> r = 0;
> diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
> index cf34d448289d..db27430f111e 100644
> --- a/arch/riscv/kvm/vmid.c
> +++ b/arch/riscv/kvm/vmid.c
> @@ -26,7 +26,7 @@ static DEFINE_SPINLOCK(vmid_lock);
> void __init kvm_riscv_gstage_vmid_detect(void)
> {
> /* Figure-out number of VMID bits in HW */
> - csr_write(CSR_HGATP, (kvm_riscv_gstage_mode << HGATP_MODE_SHIFT) | HGATP_VMID);
> + csr_write(CSR_HGATP, (kvm_riscv_gstage_max_mode << HGATP_MODE_SHIFT) | HGATP_VMID);
> vmid_bits = csr_read(CSR_HGATP);
> vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT;
> vmid_bits = fls_long(vmid_bits);
> --
> 2.50.1
>
Thanks,
drew
>> From: Fangyu Yu <fangyu.yu@linux.alibaba.com>
>>
>> Introduces two per-VM architecture-specific fields to support runtime
>> configuration of the G-stage page table format:
>>
>> - kvm->arch.kvm_riscv_gstage_mode: specifies the HGATP mode used by the
>> current VM;
>> - kvm->arch.kvm_riscv_gstage_pgd_levels: the corresponding number of page
>> table levels for the selected mode.
>>
>> These fields replace the previous global variables
>> kvm_riscv_gstage_mode and kvm_riscv_gstage_pgd_levels, enabling different
>> virtual machines to independently select their G-stage page table format
>> instead of being forced to share the maximum mode detected by the kernel
>> at boot time.
>>
>> Signed-off-by: Fangyu Yu <fangyu.yu@linux.alibaba.com>
>> ---
>> arch/riscv/include/asm/kvm_gstage.h | 12 ++---
>> arch/riscv/include/asm/kvm_host.h | 4 ++
>> arch/riscv/kvm/gstage.c | 82 +++++++++++++++++------------
>> arch/riscv/kvm/main.c | 4 +-
>> arch/riscv/kvm/mmu.c | 18 +++++--
>> arch/riscv/kvm/vm.c | 2 +-
>> arch/riscv/kvm/vmid.c | 2 +-
>> 7 files changed, 74 insertions(+), 50 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/kvm_gstage.h b/arch/riscv/include/asm/kvm_gstage.h
>> index 595e2183173e..fdcada123b3f 100644
>> --- a/arch/riscv/include/asm/kvm_gstage.h
>> +++ b/arch/riscv/include/asm/kvm_gstage.h
>> @@ -29,16 +29,11 @@ struct kvm_gstage_mapping {
>> #define kvm_riscv_gstage_index_bits 10
>> #endif
>>
>> -extern unsigned long kvm_riscv_gstage_mode;
>> -extern unsigned long kvm_riscv_gstage_pgd_levels;
>> +extern unsigned long kvm_riscv_gstage_max_mode;
>> +extern unsigned long kvm_riscv_gstage_max_pgd_levels;
>>
>> #define kvm_riscv_gstage_pgd_xbits 2
>> #define kvm_riscv_gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + kvm_riscv_gstage_pgd_xbits))
>> -#define kvm_riscv_gstage_gpa_bits (HGATP_PAGE_SHIFT + \
>> - (kvm_riscv_gstage_pgd_levels * \
>> - kvm_riscv_gstage_index_bits) + \
>> - kvm_riscv_gstage_pgd_xbits)
>> -#define kvm_riscv_gstage_gpa_size ((gpa_t)(1ULL << kvm_riscv_gstage_gpa_bits))
>>
>> bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
>> pte_t **ptepp, u32 *ptep_level);
>> @@ -69,4 +64,7 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end
>>
>> void kvm_riscv_gstage_mode_detect(void);
>>
>> +gpa_t kvm_riscv_gstage_gpa_size(struct kvm_arch *k);
>> +unsigned long kvm_riscv_gstage_gpa_bits(struct kvm_arch *k);
>> +
>> #endif
>> diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
>> index 24585304c02b..27ea8e8fd5b0 100644
>> --- a/arch/riscv/include/asm/kvm_host.h
>> +++ b/arch/riscv/include/asm/kvm_host.h
>> @@ -103,6 +103,10 @@ struct kvm_arch {
>>
>> /* KVM_CAP_RISCV_MP_STATE_RESET */
>> bool mp_state_reset;
>> +
>> + unsigned long kvm_riscv_gstage_mode;
>
>There's a 1:1 mapping for mode/levels, so we don't need to track both.
>Since mode is rarely used, then I think something like this would still
>provide enough convenience without requiring the storage allocation.
>
> static inline unsigned long kvm_riscv_gstage_mode(struct kvm_gstage *gstage)
> {
> unsigned long modes[] = {
> [2] = HGATP_MODE_SV32X4,
> [3] = HGATP_MODE_SV39X4,
> [4] = HGATP_MODE_SV48X4,
> [5] = HGATP_MODE_SV57X4,
> };
>
> return modes[gstage->kvm->arch.kvm_riscv_gstage_pgd_levels];
> }
Thanks for the suggestion.
You're right that gstage mode has a 1:1 mapping with pgd_levels, so keeping
both is redundant. In the next revision I'll drop kvm_riscv_gstage_mode and
derive HGATP.MODE from kvm_riscv_gstage_pgd_levels via a small helper.
>> + unsigned long kvm_riscv_gstage_pgd_levels;
>> + bool gstage_mode_initialized;
>> };
>>
>> struct kvm_cpu_trap {
>> diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c
>> index b67d60d722c2..06452e4c2ab2 100644
>> --- a/arch/riscv/kvm/gstage.c
>> +++ b/arch/riscv/kvm/gstage.c
>> @@ -12,22 +12,23 @@
>> #include <asm/kvm_gstage.h>
>>
>> #ifdef CONFIG_64BIT
>> -unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV39X4;
>> -unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 3;
>> +unsigned long kvm_riscv_gstage_max_mode __ro_after_init = HGATP_MODE_SV39X4;
>
>With a kvm_riscv_gstage_mode() function we don't need
>kvm_riscv_gstage_max_mode either.
Thanks, agreed.
>> +unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 3;
>> #else
>> -unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV32X4;
>> -unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 2;
>> +unsigned long kvm_riscv_gstage_max_mode __ro_after_init = HGATP_MODE_SV32X4;
>> +unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 2;
>> #endif
>>
>> #define gstage_pte_leaf(__ptep) \
>> (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))
>>
>> -static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
>> +static inline unsigned long gstage_pte_index(struct kvm_gstage *gstage,
>> + gpa_t addr, u32 level)
>> {
>> unsigned long mask;
>> unsigned long shift = HGATP_PAGE_SHIFT + (kvm_riscv_gstage_index_bits * level);
>>
>> - if (level == (kvm_riscv_gstage_pgd_levels - 1))
>> + if (level == (gstage->kvm->arch.kvm_riscv_gstage_pgd_levels - 1))
>
>nit: we can drop the unnecessary () while touching this line.
Ack, will fix.
>> mask = (PTRS_PER_PTE * (1UL << kvm_riscv_gstage_pgd_xbits)) - 1;
>> else
>> mask = PTRS_PER_PTE - 1;
>> @@ -40,12 +41,13 @@ static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
>> return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte)));
>> }
>>
>> -static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
>> +static int gstage_page_size_to_level(struct kvm_gstage *gstage, unsigned long page_size,
>> + u32 *out_level)
>> {
>> u32 i;
>> unsigned long psz = 1UL << 12;
>>
>> - for (i = 0; i < kvm_riscv_gstage_pgd_levels; i++) {
>> + for (i = 0; i < gstage->kvm->arch.kvm_riscv_gstage_pgd_levels; i++) {
>> if (page_size == (psz << (i * kvm_riscv_gstage_index_bits))) {
>> *out_level = i;
>> return 0;
>> @@ -55,21 +57,23 @@ static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
>> return -EINVAL;
>> }
>>
>> -static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder)
>> +static int gstage_level_to_page_order(struct kvm_gstage *gstage, u32 level,
>> + unsigned long *out_pgorder)
>> {
>> - if (kvm_riscv_gstage_pgd_levels < level)
>> + if (gstage->kvm->arch.kvm_riscv_gstage_pgd_levels < level)
>> return -EINVAL;
>>
>> *out_pgorder = 12 + (level * kvm_riscv_gstage_index_bits);
>> return 0;
>> }
>>
>> -static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
>> +static int gstage_level_to_page_size(struct kvm_gstage *gstage, u32 level,
>> + unsigned long *out_pgsize)
>> {
>> int rc;
>> unsigned long page_order = PAGE_SHIFT;
>>
>> - rc = gstage_level_to_page_order(level, &page_order);
>> + rc = gstage_level_to_page_order(gstage, level, &page_order);
>> if (rc)
>> return rc;
>>
>> @@ -81,11 +85,11 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
>> pte_t **ptepp, u32 *ptep_level)
>> {
>> pte_t *ptep;
>> - u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
>> + u32 current_level = gstage->kvm->arch.kvm_riscv_gstage_pgd_levels - 1;
>>
>> *ptep_level = current_level;
>> ptep = (pte_t *)gstage->pgd;
>> - ptep = &ptep[gstage_pte_index(addr, current_level)];
>> + ptep = &ptep[gstage_pte_index(gstage, addr, current_level)];
>> while (ptep && pte_val(ptep_get(ptep))) {
>> if (gstage_pte_leaf(ptep)) {
>> *ptep_level = current_level;
>> @@ -97,7 +101,7 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
>> current_level--;
>> *ptep_level = current_level;
>> ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
>> - ptep = &ptep[gstage_pte_index(addr, current_level)];
>> + ptep = &ptep[gstage_pte_index(gstage, addr, current_level)];
>> } else {
>> ptep = NULL;
>> }
>> @@ -110,7 +114,7 @@ static void gstage_tlb_flush(struct kvm_gstage *gstage, u32 level, gpa_t addr)
>> {
>> unsigned long order = PAGE_SHIFT;
>>
>> - if (gstage_level_to_page_order(level, &order))
>> + if (gstage_level_to_page_order(gstage, level, &order))
>> return;
>> addr &= ~(BIT(order) - 1);
>>
>> @@ -125,9 +129,9 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
>> struct kvm_mmu_memory_cache *pcache,
>> const struct kvm_gstage_mapping *map)
>> {
>> - u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
>> + u32 current_level = gstage->kvm->arch.kvm_riscv_gstage_pgd_levels - 1;
>> pte_t *next_ptep = (pte_t *)gstage->pgd;
>> - pte_t *ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
>> + pte_t *ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)];
>>
>> if (current_level < map->level)
>> return -EINVAL;
>> @@ -151,7 +155,7 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
>> }
>>
>> current_level--;
>> - ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
>> + ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)];
>> }
>>
>> if (pte_val(*ptep) != pte_val(map->pte)) {
>> @@ -175,7 +179,7 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
>> out_map->addr = gpa;
>> out_map->level = 0;
>>
>> - ret = gstage_page_size_to_level(page_size, &out_map->level);
>> + ret = gstage_page_size_to_level(gstage, page_size, &out_map->level);
>> if (ret)
>> return ret;
>>
>> @@ -217,7 +221,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
>> u32 next_ptep_level;
>> unsigned long next_page_size, page_size;
>>
>> - ret = gstage_level_to_page_size(ptep_level, &page_size);
>> + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
>> if (ret)
>> return;
>>
>> @@ -229,7 +233,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
>> if (ptep_level && !gstage_pte_leaf(ptep)) {
>> next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
>> next_ptep_level = ptep_level - 1;
>> - ret = gstage_level_to_page_size(next_ptep_level, &next_page_size);
>> + ret = gstage_level_to_page_size(gstage, next_ptep_level, &next_page_size);
>> if (ret)
>> return;
>>
>> @@ -263,7 +267,7 @@ void kvm_riscv_gstage_unmap_range(struct kvm_gstage *gstage,
>>
>> while (addr < end) {
>> found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
>> - ret = gstage_level_to_page_size(ptep_level, &page_size);
>> + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
>> if (ret)
>> break;
>>
>> @@ -297,7 +301,7 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end
>>
>> while (addr < end) {
>> found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
>> - ret = gstage_level_to_page_size(ptep_level, &page_size);
>> + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
>> if (ret)
>> break;
>>
>> @@ -319,41 +323,51 @@ void __init kvm_riscv_gstage_mode_detect(void)
>> /* Try Sv57x4 G-stage mode */
>> csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
>> if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) {
>> - kvm_riscv_gstage_mode = HGATP_MODE_SV57X4;
>> - kvm_riscv_gstage_pgd_levels = 5;
>> + kvm_riscv_gstage_max_mode = HGATP_MODE_SV57X4;
>> + kvm_riscv_gstage_max_pgd_levels = 5;
>> goto done;
>> }
>>
>> /* Try Sv48x4 G-stage mode */
>> csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
>> if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
>> - kvm_riscv_gstage_mode = HGATP_MODE_SV48X4;
>> - kvm_riscv_gstage_pgd_levels = 4;
>> + kvm_riscv_gstage_max_mode = HGATP_MODE_SV48X4;
>> + kvm_riscv_gstage_max_pgd_levels = 4;
>> goto done;
>> }
>>
>> /* Try Sv39x4 G-stage mode */
>> csr_write(CSR_HGATP, HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
>> if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV39X4) {
>> - kvm_riscv_gstage_mode = HGATP_MODE_SV39X4;
>> - kvm_riscv_gstage_pgd_levels = 3;
>> + kvm_riscv_gstage_max_mode = HGATP_MODE_SV39X4;
>> + kvm_riscv_gstage_max_pgd_levels = 3;
>> goto done;
>> }
>> #else /* CONFIG_32BIT */
>> /* Try Sv32x4 G-stage mode */
>> csr_write(CSR_HGATP, HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
>> if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV32X4) {
>> - kvm_riscv_gstage_mode = HGATP_MODE_SV32X4;
>> - kvm_riscv_gstage_pgd_levels = 2;
>> + kvm_riscv_gstage_max_mode = HGATP_MODE_SV32X4;
>> + kvm_riscv_gstage_max_pgd_levels = 2;
>> goto done;
>> }
>> #endif
>>
>> /* KVM depends on !HGATP_MODE_OFF */
>> - kvm_riscv_gstage_mode = HGATP_MODE_OFF;
>> - kvm_riscv_gstage_pgd_levels = 0;
>> + kvm_riscv_gstage_max_mode = HGATP_MODE_OFF;
>> + kvm_riscv_gstage_max_pgd_levels = 0;
>>
>> done:
>> csr_write(CSR_HGATP, 0);
>> kvm_riscv_local_hfence_gvma_all();
>> }
>> +
>> +unsigned long kvm_riscv_gstage_gpa_bits(struct kvm_arch *k) {
>
>Did you run checkpatch? I think it requires '{' to be on its own line.
>
>nit: s/k/ka/ would be consistent with other archs, although I see k is
>used in riscv's kvm_riscv_mmu_update_hgatp() but that can be fixed up
>in this patch since there's a change in the same place too.
Thanks for catching that.
Yes, checkpatch complains about the opening brace placement here. I'll fix
the style by moving '{' onto its own line. I'll also rename the argument
from 'k' to 'ka' for consistency (and update the existing usage in
kvm_riscv_mmu_update_hgatp() in the same patch since we're touching it
anyway).
I'll apply the same fixes to kvm_riscv_gstage_gpa_size() as well.
>
>> + return (HGATP_PAGE_SHIFT + (k->kvm_riscv_gstage_pgd_levels *
>> + kvm_riscv_gstage_index_bits) +
>> + kvm_riscv_gstage_pgd_xbits);
>> +}
>> +
>> +gpa_t kvm_riscv_gstage_gpa_size(struct kvm_arch *k) {
>
>same comments as above
>
>> + return ((gpa_t)(1ULL << kvm_riscv_gstage_gpa_bits(k)));
>
> return BIT_ULL(kvm_riscv_gstage_gpa_bits(ka))
>
>(the cast is implicit from return type)
>
>> +}
>> diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
>> index 45536af521f0..56a246e0e791 100644
>> --- a/arch/riscv/kvm/main.c
>> +++ b/arch/riscv/kvm/main.c
>> @@ -105,7 +105,7 @@ static int __init riscv_kvm_init(void)
>> return rc;
>>
>> kvm_riscv_gstage_mode_detect();
>> - switch (kvm_riscv_gstage_mode) {
>> + switch (kvm_riscv_gstage_max_mode) {
>> case HGATP_MODE_SV32X4:
>> str = "Sv32x4";
>> break;
>> @@ -164,7 +164,7 @@ static int __init riscv_kvm_init(void)
>> (rc) ? slist : "no features");
>> }
>>
>> - kvm_info("using %s G-stage page table format\n", str);
>> + kvm_info("Max G-stage page table format %s \n", str);
>>
>> kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());
>>
>> diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
>> index 4ab06697bfc0..574783907162 100644
>> --- a/arch/riscv/kvm/mmu.c
>> +++ b/arch/riscv/kvm/mmu.c
>> @@ -67,7 +67,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
>> if (!writable)
>> map.pte = pte_wrprotect(map.pte);
>>
>> - ret = kvm_mmu_topup_memory_cache(&pcache, kvm_riscv_gstage_pgd_levels);
>> + ret = kvm_mmu_topup_memory_cache(&pcache,kvm->arch.kvm_riscv_gstage_pgd_levels);
> ^ missing space
>
>> if (ret)
>> goto out;
>>
>> @@ -186,8 +186,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
>> * space addressable by the KVM guest GPA space.
>> */
>> if ((new->base_gfn + new->npages) >=
>> - (kvm_riscv_gstage_gpa_size >> PAGE_SHIFT))
>> + (kvm_riscv_gstage_gpa_size(&kvm->arch) >> PAGE_SHIFT)) {
>> return -EFAULT;
>> + }
>
>nit: Remove the unnecessary () and the '{' and the condition will fit on
>one 100 char line.
Ack.
>>
>> hva = new->userspace_addr;
>> size = new->npages << PAGE_SHIFT;
>> @@ -332,7 +333,7 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
>> memset(out_map, 0, sizeof(*out_map));
>>
>> /* We need minimum second+third level pages */
>> - ret = kvm_mmu_topup_memory_cache(pcache, kvm_riscv_gstage_pgd_levels);
>> + ret = kvm_mmu_topup_memory_cache(pcache, kvm->arch.kvm_riscv_gstage_pgd_levels);
>> if (ret) {
>> kvm_err("Failed to topup G-stage cache\n");
>> return ret;
>> @@ -431,6 +432,11 @@ int kvm_riscv_mmu_alloc_pgd(struct kvm *kvm)
>> return -ENOMEM;
>> kvm->arch.pgd = page_to_virt(pgd_page);
>> kvm->arch.pgd_phys = page_to_phys(pgd_page);
>> + if (!kvm->arch.gstage_mode_initialized) {
>> + /*user-space didn't set KVM_CAP_RISC_HGATP_MODE cap*/
> ^ missing space ^ missing space
>> + kvm->arch.kvm_riscv_gstage_mode = kvm_riscv_gstage_max_mode;
>> + kvm->arch.kvm_riscv_gstage_pgd_levels = kvm_riscv_gstage_max_pgd_levels;
>
>Missing 'kvm->arch.gstage_mode_initialized = true' statement.
The initialization is done in the following commit of this series (patch 2/2)
>> + }
>>
>> return 0;
>> }
>> @@ -446,10 +452,12 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm)
>> gstage.flags = 0;
>> gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
>> gstage.pgd = kvm->arch.pgd;
>> - kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size, false);
>> + kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size(&kvm->arch), false);
>> pgd = READ_ONCE(kvm->arch.pgd);
>> kvm->arch.pgd = NULL;
>> kvm->arch.pgd_phys = 0;
>> + kvm->arch.kvm_riscv_gstage_mode = HGATP_MODE_OFF;
>> + kvm->arch.kvm_riscv_gstage_pgd_levels = 0;
>> }
>> spin_unlock(&kvm->mmu_lock);
>>
>> @@ -459,8 +467,8 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm)
>>
>> void kvm_riscv_mmu_update_hgatp(struct kvm_vcpu *vcpu)
>> {
>> - unsigned long hgatp = kvm_riscv_gstage_mode << HGATP_MODE_SHIFT;
>> struct kvm_arch *k = &vcpu->kvm->arch;
>> + unsigned long hgatp = k->kvm_riscv_gstage_mode << HGATP_MODE_SHIFT;
>>
>> hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID;
>> hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN;
>> diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
>> index 66d91ae6e9b2..4b2156df40fc 100644
>> --- a/arch/riscv/kvm/vm.c
>> +++ b/arch/riscv/kvm/vm.c
>> @@ -200,7 +200,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>> r = KVM_USER_MEM_SLOTS;
>> break;
>> case KVM_CAP_VM_GPA_BITS:
>> - r = kvm_riscv_gstage_gpa_bits;
>> + r = kvm_riscv_gstage_gpa_bits(&kvm->arch);
>> break;
>> default:
>> r = 0;
>> diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
>> index cf34d448289d..db27430f111e 100644
>> --- a/arch/riscv/kvm/vmid.c
>> +++ b/arch/riscv/kvm/vmid.c
>> @@ -26,7 +26,7 @@ static DEFINE_SPINLOCK(vmid_lock);
>> void __init kvm_riscv_gstage_vmid_detect(void)
>> {
>> /* Figure-out number of VMID bits in HW */
>> - csr_write(CSR_HGATP, (kvm_riscv_gstage_mode << HGATP_MODE_SHIFT) | HGATP_VMID);
>> + csr_write(CSR_HGATP, (kvm_riscv_gstage_max_mode << HGATP_MODE_SHIFT) | HGATP_VMID);
>> vmid_bits = csr_read(CSR_HGATP);
>> vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT;
>> vmid_bits = fls_long(vmid_bits);
>> --
>> 2.50.1
>>
>
>Thanks,
>drew
>
Thanks,
Fangyu
© 2016 - 2026 Red Hat, Inc.