[v1] RISC-V: KVM: Transparent huge page support

[PATCH] RISC-V: KVM: Transparent huge page support

Posted by liu.xuemei1@zte.com.cn 3 days, 12 hours ago

From: Jessica Liu <liu.xuemei1@zte.com.cn>

Use block mapping if backed by a THP, as implemented in architectures
like ARM and x86_64.

Signed-off-by: Jessica Liu <liu.xuemei1@zte.com.cn>
---
 arch/riscv/include/asm/kvm_gstage.h |   3 +
 arch/riscv/kvm/gstage.c             | 100 ++++++++++++++++++++++++++++
 arch/riscv/kvm/mmu.c                |  12 +++-
 3 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/kvm_gstage.h b/arch/riscv/include/asm/kvm_gstage.h
index 595e2183173e..cc67fb2d2d42 100644
--- a/arch/riscv/include/asm/kvm_gstage.h
+++ b/arch/riscv/include/asm/kvm_gstage.h
@@ -69,4 +69,7 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end

 void kvm_riscv_gstage_mode_detect(void);

+long kvm_riscv_gstage_thp_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
+				 unsigned long hva, kvm_pfn_t *pfnp, gpa_t *gpa);
+
 #endif
diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c
index 24c270d6d0e2..98494b4b4652 100644
--- a/arch/riscv/kvm/gstage.c
+++ b/arch/riscv/kvm/gstage.c
@@ -77,6 +77,106 @@ static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
 	return 0;
 }

+static int gstage_get_user_mapping_size(struct kvm *kvm, u64 addr)
+{
+	pte_t *ptepp;
+	u32 ptep_level;
+	unsigned long out_pgsize;
+	struct kvm_gstage gstage = {
+		.pgd = kvm->mm->pgd
+	};
+
+	if (!kvm_riscv_gstage_get_leaf(&gstage, addr, &ptepp, &ptep_level))
+		return -EFAULT;
+
+	if (gstage_level_to_page_size(ptep_level, &out_pgsize))
+		return -EFAULT;
+
+	return out_pgsize;
+}
+
+static bool gstage_supports_huge_mapping(struct kvm_memory_slot *memslot, unsigned long hva)
+{
+	gpa_t gpa_start;
+	hva_t uaddr_start, uaddr_end;
+	size_t size;
+
+	size = memslot->npages * PAGE_SIZE;
+	uaddr_start = memslot->userspace_addr;
+	uaddr_end = uaddr_start + size;
+
+	gpa_start = memslot->base_gfn << PAGE_SIZE;
+
+	/*
+	 * Pages belonging to memslots that don't have the same alignment
+	 * within a PMD for userspace and GPA cannot be mapped with g-stage
+	 * PMD entries, because we'll end up mapping the wrong pages.
+	 *
+	 * Consider a layout like the following:
+	 *
+	 *    memslot->userspace_addr:
+	 *    +-----+--------------------+--------------------+---+
+	 *    |abcde|fgh  vs-stage block  |    vs-stage block tv|xyz|
+	 *    +-----+--------------------+--------------------+---+
+	 *
+	 *    memslot->base_gfn << PAGE_SHIFT:
+	 *      +---+--------------------+--------------------+-----+
+	 *      |abc|def  g-stage block  |    g-stage block   |tvxyz|
+	 *      +---+--------------------+--------------------+-----+
+	 *
+	 * If we create those g-stage blocks, we'll end up with this incorrect
+	 * mapping:
+	 *   d -> f
+	 *   e -> g
+	 *   f -> h
+	 */
+	if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1)))
+		return false;
+
+	/*
+	 * Next, let's make sure we're not trying to map anything not covered
+	 * by the memslot. This means we have to prohibit block size mappings
+	 * for the beginning and end of a non-block aligned and non-block sized
+	 * memory slot (illustrated by the head and tail parts of the
+	 * userspace view above containing pages 'abcde' and 'xyz',
+	 * respectively).
+	 *
+	 * Note that it doesn't matter if we do the check using the
+	 * userspace_addr or the base_gfn, as both are equally aligned (per
+	 * the check above) and equally sized.
+	 */
+	return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE));
+}
+
+long kvm_riscv_gstage_thp_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
+				 unsigned long hva, kvm_pfn_t *hfnp, gpa_t *gpa)
+{
+	kvm_pfn_t hfn = *hfnp;
+
+	/*
+	 * Make sure the adjustment is done only for THP pages. Also make
+	 * sure that the HVA and GPA are sufficiently aligned and that the
+	 * block map is contained within the memslot.
+	 */
+	if (gstage_supports_huge_mapping(memslot, hva)) {
+		int sz = gstage_get_user_mapping_size(kvm, hva);
+
+		if (sz < 0)
+			return sz;
+
+		if (sz < PMD_SIZE)
+			return PAGE_SIZE;
+
+		*gpa &= PMD_MASK;
+		hfn &= ~(PTRS_PER_PMD - 1);
+		*hfnp = hfn;
+
+		return PMD_SIZE;
+	}
+
+	return PAGE_SIZE;
+}
+
 bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
 			       pte_t **ptepp, u32 *ptep_level)
 {
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index 525fb5a330c0..f70cf721ebb8 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -337,7 +337,8 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
 	struct kvm_mmu_memory_cache *pcache = &vcpu->arch.mmu_page_cache;
 	bool logging = (memslot->dirty_bitmap &&
 			!(memslot->flags & KVM_MEM_READONLY)) ? true : false;
-	unsigned long vma_pagesize, mmu_seq;
+	unsigned long mmu_seq;
+	long vma_pagesize;
 	struct kvm_gstage gstage;
 	struct page *page;

@@ -416,6 +417,15 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
 	if (mmu_invalidate_retry(kvm, mmu_seq))
 		goto out_unlock;

+	/* check if we are backed by a THP and thus use block mapping if possible */
+	if (vma_pagesize == PAGE_SIZE) {
+		vma_pagesize = kvm_riscv_gstage_thp_adjust(kvm, memslot, hva, &hfn, &gpa);
+		if (vma_pagesize < 0) {
+			ret = vma_pagesize;
+			goto out_unlock;
+		}
+	}
+
 	if (writable) {
 		mark_page_dirty_in_slot(kvm, memslot, gfn);
 		ret = kvm_riscv_gstage_map_page(&gstage, pcache, gpa, hfn << PAGE_SHIFT,
-- 
2.27.0

Re: [External] : [PATCH] RISC-V: KVM: Transparent huge page support

Posted by ALOK TIWARI 3 days ago


On 9/28/2025 1:14 PM, liu.xuemei1@zte.com.cn wrote:
> +static bool gstage_supports_huge_mapping(struct kvm_memory_slot *memslot, unsigned long hva)
> +{
> +	gpa_t gpa_start;
> +	hva_t uaddr_start, uaddr_end;
> +	size_t size;
> +
> +	size = memslot->npages * PAGE_SIZE;
> +	uaddr_start = memslot->userspace_addr;
> +	uaddr_end = uaddr_start + size;
> +
> +	gpa_start = memslot->base_gfn << PAGE_SIZE;

looks wrong why << PAGE_SIZE ? typo

> +
> +	/*
> +	 * Pages belonging to memslots that don't have the same alignment
> +	 * within a PMD for userspace and GPA cannot be mapped with g-stage
> +	 * PMD entries, because we'll end up mapping the wrong pages.
> +	 *
> +	 * Consider a layout like the following:
> +	 *
> +	 *    memslot->userspace_addr:
> +	 *    +-----+--------------------+--------------------+---+
> +	 *|abcde|fgh vs-stage block | vs-stage block tv|xyz|
> +	 *    +-----+--------------------+--------------------+---+
> +	 *
> +	 *    memslot->base_gfn << PAGE_SHIFT:
> +	 *      +---+--------------------+--------------------+-----+
> +	 *|abc|def g-stage block | g-stage block |tvxyz|
> +	 *      +---+--------------------+--------------------+-----+
> +	 *
> +	 * If we create those g-stage blocks, we'll end up with this incorrect
> +	 * mapping:
> +	 *   d -> f
> +	 *   e -> g
> +	 *   f -> h
> +	 */
> +	if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1)))
> +		return false;
> +
> +	/*
> +	 * Next, let's make sure we're not trying to map anything not covered
> +	 * by the memslot. This means we have to prohibit block size mappings
> +	 * for the beginning and end of a non-block aligned and non-block sized
> +	 * memory slot (illustrated by the head and tail parts of the
> +	 * userspace view above containing pages 'abcde' and 'xyz',
> +	 * respectively).
> +	 *
> +	 * Note that it doesn't matter if we do the check using the
> +	 * userspace_addr or the base_gfn, as both are equally aligned (per
> +	 * the check above) and equally sized.
> +	 */
> +	return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE));
> +}
> +


Thanks,
Alok

Re: [PATCH] RISC-V: KVM: Transparent huge page support

Posted by kernel test robot 3 days, 4 hours ago

Hi,

kernel test robot noticed the following build warnings:

[auto build test WARNING on kvm/queue]
[also build test WARNING on kvm/next mst-vhost/linux-next linus/master v6.17-rc7 next-20250926]
[cannot apply to kvm/linux-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/liu-xuemei1-zte-com-cn/RISC-V-KVM-Transparent-huge-page-support/20250928-154904
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
patch link:    https://lore.kernel.org/r/20250928154450701hRC3fm00QYFnGiM0_M1No%40zte.com.cn
patch subject: [PATCH] RISC-V: KVM: Transparent huge page support
config: riscv-randconfig-001-20250928 (https://download.01.org/0day-ci/archive/20250928/202509282326.NFfcoD5h-lkp@intel.com/config)
compiler: riscv64-linux-gcc (GCC) 11.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250928/202509282326.NFfcoD5h-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202509282326.NFfcoD5h-lkp@intel.com/

All warnings (new ones prefixed by >>):

   arch/riscv/kvm/gstage.c: In function 'gstage_supports_huge_mapping':
>> arch/riscv/kvm/gstage.c:108:39: warning: left shift count >= width of type [-Wshift-count-overflow]
     108 |         gpa_start = memslot->base_gfn << PAGE_SIZE;
         |                                       ^~


vim +108 arch/riscv/kvm/gstage.c

    97	
    98	static bool gstage_supports_huge_mapping(struct kvm_memory_slot *memslot, unsigned long hva)
    99	{
   100		gpa_t gpa_start;
   101		hva_t uaddr_start, uaddr_end;
   102		size_t size;
   103	
   104		size = memslot->npages * PAGE_SIZE;
   105		uaddr_start = memslot->userspace_addr;
   106		uaddr_end = uaddr_start + size;
   107	
 > 108		gpa_start = memslot->base_gfn << PAGE_SIZE;
   109	
   110		/*
   111		 * Pages belonging to memslots that don't have the same alignment
   112		 * within a PMD for userspace and GPA cannot be mapped with g-stage
   113		 * PMD entries, because we'll end up mapping the wrong pages.
   114		 *
   115		 * Consider a layout like the following:
   116		 *
   117		 *    memslot->userspace_addr:
   118		 *    +-----+--------------------+--------------------+---+
   119		 *    |abcde|fgh  vs-stage block  |    vs-stage block tv|xyz|
   120		 *    +-----+--------------------+--------------------+---+
   121		 *
   122		 *    memslot->base_gfn << PAGE_SHIFT:
   123		 *      +---+--------------------+--------------------+-----+
   124		 *      |abc|def  g-stage block  |    g-stage block   |tvxyz|
   125		 *      +---+--------------------+--------------------+-----+
   126		 *
   127		 * If we create those g-stage blocks, we'll end up with this incorrect
   128		 * mapping:
   129		 *   d -> f
   130		 *   e -> g
   131		 *   f -> h
   132		 */
   133		if ((gpa_start & (PMD_SIZE - 1)) != (uaddr_start & (PMD_SIZE - 1)))
   134			return false;
   135	
   136		/*
   137		 * Next, let's make sure we're not trying to map anything not covered
   138		 * by the memslot. This means we have to prohibit block size mappings
   139		 * for the beginning and end of a non-block aligned and non-block sized
   140		 * memory slot (illustrated by the head and tail parts of the
   141		 * userspace view above containing pages 'abcde' and 'xyz',
   142		 * respectively).
   143		 *
   144		 * Note that it doesn't matter if we do the check using the
   145		 * userspace_addr or the base_gfn, as both are equally aligned (per
   146		 * the check above) and equally sized.
   147		 */
   148		return (hva >= ALIGN(uaddr_start, PMD_SIZE)) && (hva < ALIGN_DOWN(uaddr_end, PMD_SIZE));
   149	}
   150	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki