arch/riscv/include/asm/kvm_gstage.h | 3 + arch/riscv/kvm/gstage.c | 100 ++++++++++++++++++++++++++++ arch/riscv/kvm/mmu.c | 12 +++- 3 files changed, 114 insertions(+), 1 deletion(-)
From: Jessica Liu <liu.xuemei1@zte.com.cn>
Use block mapping if backed by a THP, as implemented in architectures
like ARM and x86_64.
Signed-off-by: Jessica Liu <liu.xuemei1@zte.com.cn>
---
arch/riscv/include/asm/kvm_gstage.h | 3 +
arch/riscv/kvm/gstage.c | 100 ++++++++++++++++++++++++++++
arch/riscv/kvm/mmu.c | 12 +++-
3 files changed, 114 insertions(+), 1 deletion(-)
diff --git a/arch/riscv/include/asm/kvm_gstage.h b/arch/riscv/include/asm/kvm_gstage.h
index 595e2183173e..cc67fb2d2d42 100644
--- a/arch/riscv/include/asm/kvm_gstage.h
+++ b/arch/riscv/include/asm/kvm_gstage.h
@@ -69,4 +69,7 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end
void kvm_riscv_gstage_mode_detect(void);
+long kvm_riscv_gstage_thp_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long hva, kvm_pfn_t *pfnp, gpa_t *gpa);
+
#endif
diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c
index 24c270d6d0e2..98494b4b4652 100644
--- a/arch/riscv/kvm/gstage.c
+++ b/arch/riscv/kvm/gstage.c
@@ -77,6 +77,106 @@ static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
return 0;
}
+static int gstage_get_user_mapping_size(struct kvm *kvm, u64 addr)
+{
+ pte_t *ptepp;
+ u32 ptep_level;
+ unsigned long out_pgsize;
+ struct kvm_gstage gstage = {
+ .pgd = kvm->mm->pgd
+ };
+
+ if (!kvm_riscv_gstage_get_leaf(&gstage, addr, &ptepp, &ptep_level))
+ return -EFAULT;
+
+ if (gstage_level_to_page_size(ptep_level, &out_pgsize))
+ return -EFAULT;
+
+ return out_pgsize;
+}
+
+static bool gstage_supports_huge_mapping(struct kvm_memory_slot *memslot, unsigned long hva)
+{
+ gpa_t gpa_start;
+ hva_t uaddr_start, uaddr_end;
+ size_t size;
+
+ size = memslot->npages * PAGE_SIZE;
+ uaddr_start = memslot->userspace_addr;
+ uaddr_end = uaddr_start + size;
+
+ gpa_start = memslot->base_gfn << PAGE_SIZE;
+
+ /*
+ * Pages belonging to memslots that don't have the same alignment
+ * within a PMD for userspace and GPA cannot be mapped with g-stage
+ * PMD entries, because we'll end up mapping the wrong pages.
+ *
+ * Consider a layout like the following:
+ *
+ * memslot->userspace_addr:
+ * +-----+--------------------+--------------------+---+
+ * |abcde|fgh vs-stage block | vs-stage block tv|xyz|
+ * +-----+--------------------+--------------------+---+
+ *
+ * memslot->base_gfn << PAGE_SHIFT:
+ * +---+--------------------+--------------------+-----+
+ * |abc|def g-stage block | g-stage block |tvxyz|
+ * +---+--------------------+--------------------+-----+
+ *
+ * If we create those g-stage blocks, we'll end up with this incorrect
+ * mapping:
+ * d -> f
+ * e -> g
+ * f -> h
+ */
+ if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1)))
+ return false;
+
+ /*
+ * Next, let's make sure we're not trying to map anything not covered
+ * by the memslot. This means we have to prohibit block size mappings
+ * for the beginning and end of a non-block aligned and non-block sized
+ * memory slot (illustrated by the head and tail parts of the
+ * userspace view above containing pages 'abcde' and 'xyz',
+ * respectively).
+ *
+ * Note that it doesn't matter if we do the check using the
+ * userspace_addr or the base_gfn, as both are equally aligned (per
+ * the check above) and equally sized.
+ */
+ return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE));
+}
+
+long kvm_riscv_gstage_thp_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long hva, kvm_pfn_t *hfnp, gpa_t *gpa)
+{
+ kvm_pfn_t hfn = *hfnp;
+
+ /*
+ * Make sure the adjustment is done only for THP pages. Also make
+ * sure that the HVA and GPA are sufficiently aligned and that the
+ * block map is contained within the memslot.
+ */
+ if (gstage_supports_huge_mapping(memslot, hva)) {
+ int sz = gstage_get_user_mapping_size(kvm, hva);
+
+ if (sz < 0)
+ return sz;
+
+ if (sz < PMD_SIZE)
+ return PAGE_SIZE;
+
+ *gpa &= PMD_MASK;
+ hfn &= ~(PTRS_PER_PMD - 1);
+ *hfnp = hfn;
+
+ return PMD_SIZE;
+ }
+
+ return PAGE_SIZE;
+}
+
bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
pte_t **ptepp, u32 *ptep_level)
{
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 525fb5a330c0..f70cf721ebb8 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -337,7 +337,8 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
struct kvm_mmu_memory_cache *pcache = &vcpu->arch.mmu_page_cache;
bool logging = (memslot->dirty_bitmap &&
!(memslot->flags & KVM_MEM_READONLY)) ? true : false;
- unsigned long vma_pagesize, mmu_seq;
+ unsigned long mmu_seq;
+ long vma_pagesize;
struct kvm_gstage gstage;
struct page *page;
@@ -416,6 +417,15 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
if (mmu_invalidate_retry(kvm, mmu_seq))
goto out_unlock;
+ /* check if we are backed by a THP and thus use block mapping if possible */
+ if (vma_pagesize == PAGE_SIZE) {
+ vma_pagesize = kvm_riscv_gstage_thp_adjust(kvm, memslot, hva, &hfn, &gpa);
+ if (vma_pagesize < 0) {
+ ret = vma_pagesize;
+ goto out_unlock;
+ }
+ }
+
if (writable) {
mark_page_dirty_in_slot(kvm, memslot, gfn);
ret = kvm_riscv_gstage_map_page(&gstage, pcache, gpa, hfn << PAGE_SHIFT,
--
2.27.0
On 9/28/2025 1:14 PM, liu.xuemei1@zte.com.cn wrote: > +static bool gstage_supports_huge_mapping(struct kvm_memory_slot *memslot, unsigned long hva) > +{ > + gpa_t gpa_start; > + hva_t uaddr_start, uaddr_end; > + size_t size; > + > + size = memslot->npages * PAGE_SIZE; > + uaddr_start = memslot->userspace_addr; > + uaddr_end = uaddr_start + size; > + > + gpa_start = memslot->base_gfn << PAGE_SIZE; looks wrong why << PAGE_SIZE ? typo > + > + /* > + * Pages belonging to memslots that don't have the same alignment > + * within a PMD for userspace and GPA cannot be mapped with g-stage > + * PMD entries, because we'll end up mapping the wrong pages. > + * > + * Consider a layout like the following: > + * > + * memslot->userspace_addr: > + * +-----+--------------------+--------------------+---+ > + *|abcde|fgh vs-stage block | vs-stage block tv|xyz| > + * +-----+--------------------+--------------------+---+ > + * > + * memslot->base_gfn << PAGE_SHIFT: > + * +---+--------------------+--------------------+-----+ > + *|abc|def g-stage block | g-stage block |tvxyz| > + * +---+--------------------+--------------------+-----+ > + * > + * If we create those g-stage blocks, we'll end up with this incorrect > + * mapping: > + * d -> f > + * e -> g > + * f -> h > + */ > + if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1))) > + return false; > + > + /* > + * Next, let's make sure we're not trying to map anything not covered > + * by the memslot. This means we have to prohibit block size mappings > + * for the beginning and end of a non-block aligned and non-block sized > + * memory slot (illustrated by the head and tail parts of the > + * userspace view above containing pages 'abcde' and 'xyz', > + * respectively). > + * > + * Note that it doesn't matter if we do the check using the > + * userspace_addr or the base_gfn, as both are equally aligned (per > + * the check above) and equally sized. > + */ > + return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE)); > +} > + Thanks, Alok
Hi, kernel test robot noticed the following build warnings: [auto build test WARNING on kvm/queue] [also build test WARNING on kvm/next mst-vhost/linux-next linus/master v6.17-rc7 next-20250926] [cannot apply to kvm/linux-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/liu-xuemei1-zte-com-cn/RISC-V-KVM-Transparent-huge-page-support/20250928-154904 base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue patch link: https://lore.kernel.org/r/20250928154450701hRC3fm00QYFnGiM0_M1No%40zte.com.cn patch subject: [PATCH] RISC-V: KVM: Transparent huge page support config: riscv-randconfig-001-20250928 (https://download.01.org/0day-ci/archive/20250928/202509282326.NFfcoD5h-lkp@intel.com/config) compiler: riscv64-linux-gcc (GCC) 11.5.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250928/202509282326.NFfcoD5h-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202509282326.NFfcoD5h-lkp@intel.com/ All warnings (new ones prefixed by >>): arch/riscv/kvm/gstage.c: In function 'gstage_supports_huge_mapping': >> arch/riscv/kvm/gstage.c:108:39: warning: left shift count >= width of type [-Wshift-count-overflow] 108 | gpa_start = memslot->base_gfn << PAGE_SIZE; | ^~ vim +108 arch/riscv/kvm/gstage.c 97 98 static bool gstage_supports_huge_mapping(struct kvm_memory_slot *memslot, unsigned long hva) 99 { 100 gpa_t gpa_start; 101 hva_t uaddr_start, uaddr_end; 102 size_t size; 103 104 size = memslot->npages * PAGE_SIZE; 105 uaddr_start = memslot->userspace_addr; 106 uaddr_end = uaddr_start + size; 107 > 108 gpa_start = memslot->base_gfn << PAGE_SIZE; 109 110 /* 111 * Pages belonging to memslots that don't have the same alignment 112 * within a PMD for userspace and GPA cannot be mapped with g-stage 113 * PMD entries, because we'll end up mapping the wrong pages. 114 * 115 * Consider a layout like the following: 116 * 117 * memslot->userspace_addr: 118 * +-----+--------------------+--------------------+---+ 119 * |abcde|fgh vs-stage block | vs-stage block tv|xyz| 120 * +-----+--------------------+--------------------+---+ 121 * 122 * memslot->base_gfn << PAGE_SHIFT: 123 * +---+--------------------+--------------------+-----+ 124 * |abc|def g-stage block | g-stage block |tvxyz| 125 * +---+--------------------+--------------------+-----+ 126 * 127 * If we create those g-stage blocks, we'll end up with this incorrect 128 * mapping: 129 * d -> f 130 * e -> g 131 * f -> h 132 */ 133 if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1))) 134 return false; 135 136 /* 137 * Next, let's make sure we're not trying to map anything not covered 138 * by the memslot. This means we have to prohibit block size mappings 139 * for the beginning and end of a non-block aligned and non-block sized 140 * memory slot (illustrated by the head and tail parts of the 141 * userspace view above containing pages 'abcde' and 'xyz', 142 * respectively). 143 * 144 * Note that it doesn't matter if we do the check using the 145 * userspace_addr or the base_gfn, as both are equally aligned (per 146 * the check above) and equally sized. 147 */ 148 return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE)); 149 } 150 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.