[v1] KVM: s390: some cleanup and small fixes

[PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers

Posted by Claudio Imbrenda 9 months ago

Refactor some gmap functions; move the implementation into a separate
file with only helper functions. The new helper functions work on vm
addresses, leaving all gmap logic in the gmap functions, which mostly
become just wrappers.

The whole gmap handling is going to be moved inside KVM soon, but the
helper functions need to touch core mm functions, and thus need to
stay in the core of kernel.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 MAINTAINERS                          |   2 +
 arch/s390/include/asm/gmap_helpers.h |  18 ++
 arch/s390/kvm/diag.c                 |  11 +-
 arch/s390/kvm/kvm-s390.c             |   3 +-
 arch/s390/mm/Makefile                |   2 +
 arch/s390/mm/gmap.c                  |  46 ++---
 arch/s390/mm/gmap_helpers.c          | 266 +++++++++++++++++++++++++++
 7 files changed, 307 insertions(+), 41 deletions(-)
 create mode 100644 arch/s390/include/asm/gmap_helpers.h
 create mode 100644 arch/s390/mm/gmap_helpers.c

diff --git a/MAINTAINERS b/MAINTAINERS
index f21f1dabb5fe..b0a8fb5a254c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13093,12 +13093,14 @@ S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
 F:	Documentation/virt/kvm/s390*
 F:	arch/s390/include/asm/gmap.h
+F:	arch/s390/include/asm/gmap_helpers.h
 F:	arch/s390/include/asm/kvm*
 F:	arch/s390/include/uapi/asm/kvm*
 F:	arch/s390/include/uapi/asm/uvdevice.h
 F:	arch/s390/kernel/uv.c
 F:	arch/s390/kvm/
 F:	arch/s390/mm/gmap.c
+F:	arch/s390/mm/gmap_helpers.c
 F:	drivers/s390/char/uvdevice.c
 F:	tools/testing/selftests/drivers/s390x/uvdevice/
 F:	tools/testing/selftests/kvm/*/s390/
diff --git a/arch/s390/include/asm/gmap_helpers.h b/arch/s390/include/asm/gmap_helpers.h
new file mode 100644
index 000000000000..1854fe6a8c33
--- /dev/null
+++ b/arch/s390/include/asm/gmap_helpers.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Helper functions for KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2007, 2025
+ *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ */
+
+#ifndef _ASM_S390_GMAP_HELPERS_H
+#define _ASM_S390_GMAP_HELPERS_H
+
+void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr);
+void __gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
+void __gmap_helper_set_unused(struct mm_struct *mm, unsigned long vmaddr);
+int gmap_helper_disable_cow_sharing(void);
+
+#endif /* _ASM_S390_GMAP_HELPERS_H */
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 74f73141f9b9..ce8b2f8125c4 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -11,6 +11,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
 #include <asm/virtio-ccw.h>
 #include "kvm-s390.h"
 #include "trace.h"
@@ -37,7 +38,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 	 * fast path (no prefix swap page involved)
 	 */
 	if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
-		gmap_discard(vcpu->arch.gmap, start, end);
+		gmap_helper_discard(vcpu->kvm->mm, start, end);
 	} else {
 		/*
 		 * This is slow path.  gmap_discard will check for start
@@ -45,12 +46,12 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 		 * prefix and let gmap_discard make some of these calls
 		 * NOPs.
 		 */
-		gmap_discard(vcpu->arch.gmap, start, prefix);
+		gmap_helper_discard(vcpu->kvm->mm, start, prefix);
 		if (start <= prefix)
-			gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+			gmap_helper_discard(vcpu->kvm->mm, 0, PAGE_SIZE);
 		if (end > prefix + PAGE_SIZE)
-			gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
-		gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
+			gmap_helper_discard(vcpu->kvm->mm, PAGE_SIZE, 2 * PAGE_SIZE);
+		gmap_helper_discard(vcpu->kvm->mm, prefix + 2 * PAGE_SIZE, end);
 	}
 	return 0;
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3f3175193fd7..b904f1bcb363 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -40,6 +40,7 @@
 #include <asm/machine.h>
 #include <asm/stp.h>
 #include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
 #include <asm/nmi.h>
 #include <asm/isc.h>
 #include <asm/sclp.h>
@@ -2674,7 +2675,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 		if (r)
 			break;
 
-		r = s390_disable_cow_sharing();
+		r = gmap_helper_disable_cow_sharing();
 		if (r)
 			break;
 
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 9726b91fe7e4..bd0401cc7ca5 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -12,3 +12,5 @@ obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_PTDUMP)		+= dump_pagetables.o
 obj-$(CONFIG_PGSTE)		+= gmap.o
 obj-$(CONFIG_PFAULT)		+= pfault.o
+
+obj-$(subst m,y,$(CONFIG_KVM))	+= gmap_helpers.o
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 4869555ff403..17a2a57de3a2 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -22,6 +22,7 @@
 #include <asm/page-states.h>
 #include <asm/pgalloc.h>
 #include <asm/machine.h>
+#include <asm/gmap_helpers.h>
 #include <asm/gmap.h>
 #include <asm/page.h>
 
@@ -619,58 +620,33 @@ EXPORT_SYMBOL(__gmap_link);
  */
 void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
 {
-	struct vm_area_struct *vma;
 	unsigned long vmaddr;
-	spinlock_t *ptl;
-	pte_t *ptep;
+
+	mmap_assert_locked(gmap->mm);
 
 	/* Find the vm address for the guest address */
 	vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
 						   gaddr >> PMD_SHIFT);
 	if (vmaddr) {
 		vmaddr |= gaddr & ~PMD_MASK;
-
-		vma = vma_lookup(gmap->mm, vmaddr);
-		if (!vma || is_vm_hugetlb_page(vma))
-			return;
-
-		/* Get pointer to the page table entry */
-		ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
-		if (likely(ptep)) {
-			ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
-			pte_unmap_unlock(ptep, ptl);
-		}
+		__gmap_helper_zap_one(gmap->mm, vmaddr);
 	}
 }
 EXPORT_SYMBOL_GPL(__gmap_zap);
 
 void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
 {
-	unsigned long gaddr, vmaddr, size;
-	struct vm_area_struct *vma;
+	unsigned long vmaddr, next, start, end;
 
 	mmap_read_lock(gmap->mm);
-	for (gaddr = from; gaddr < to;
-	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
-		/* Find the vm address for the guest address */
-		vmaddr = (unsigned long)
-			radix_tree_lookup(&gmap->guest_to_host,
-					  gaddr >> PMD_SHIFT);
+	for ( ; from < to; from = next) {
+		next = ALIGN(from + 1, PMD_SIZE);
+		vmaddr = (unsigned long)radix_tree_lookup(&gmap->guest_to_host, from >> PMD_SHIFT);
 		if (!vmaddr)
 			continue;
-		vmaddr |= gaddr & ~PMD_MASK;
-		/* Find vma in the parent mm */
-		vma = find_vma(gmap->mm, vmaddr);
-		if (!vma)
-			continue;
-		/*
-		 * We do not discard pages that are backed by
-		 * hugetlbfs, so we don't have to refault them.
-		 */
-		if (is_vm_hugetlb_page(vma))
-			continue;
-		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
-		zap_page_range_single(vma, vmaddr, size, NULL);
+		start = vmaddr | (from & ~PMD_MASK);
+		end = (vmaddr | (min(to - 1, next - 1) & ~PMD_MASK)) + 1;
+		__gmap_helper_discard(gmap->mm, start, end);
 	}
 	mmap_read_unlock(gmap->mm);
 }
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
new file mode 100644
index 000000000000..8e66586718f6
--- /dev/null
+++ b/arch/s390/mm/gmap_helpers.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Helper functions for KVM guest address space mapping code
+ *
+ *    Copyright IBM Corp. 2007, 2025
+ *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
+ *               Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *               David Hildenbrand <david@redhat.com>
+ *               Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagewalk.h>
+#include <linux/ksm.h>
+#include <asm/gmap_helpers.h>
+
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
+{
+	if (!non_swap_entry(entry))
+		dec_mm_counter(mm, MM_SWAPENTS);
+	else if (is_migration_entry(entry))
+		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
+	free_swap_and_cache(entry);
+}
+
+void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr)
+{
+	struct vm_area_struct *vma;
+	spinlock_t *ptl;
+	pte_t *ptep;
+
+	mmap_assert_locked(mm);
+
+	/* Find the vm address for the guest address */
+	vma = vma_lookup(mm, vmaddr);
+	if (!vma || is_vm_hugetlb_page(vma))
+		return;
+
+	/* Get pointer to the page table entry */
+	ptep = get_locked_pte(mm, vmaddr, &ptl);
+	if (!likely(ptep))
+		return;
+	if (pte_swap(*ptep))
+		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
+	pte_unmap_unlock(ptep, ptl);
+}
+EXPORT_SYMBOL_GPL(__gmap_helper_zap_one);
+
+void __gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
+{
+	struct vm_area_struct *vma;
+	unsigned long next;
+
+	mmap_assert_locked(mm);
+
+	while (vmaddr < end) {
+		vma = find_vma_intersection(mm, vmaddr, end);
+		if (!vma)
+			break;
+		vmaddr = max(vmaddr, vma->vm_start);
+		next = min(end, vma->vm_end);
+		if (!is_vm_hugetlb_page(vma))
+			zap_page_range_single(vma, vmaddr, next - vmaddr, NULL);
+		vmaddr = next;
+	}
+}
+
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
+{
+	mmap_read_lock(mm);
+	__gmap_helper_discard(mm, vmaddr, end);
+	mmap_read_unlock(mm);
+}
+EXPORT_SYMBOL_GPL(gmap_helper_discard);
+
+static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
+{
+	struct vm_area_struct *vma;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+
+	/* We need a valid VMA, otherwise this is clearly a fault. */
+	vma = vma_lookup(mm, addr);
+	if (!vma)
+		return -EFAULT;
+
+	pgd = pgd_offset(mm, addr);
+	if (!pgd_present(*pgd))
+		return -ENOENT;
+
+	p4d = p4d_offset(pgd, addr);
+	if (!p4d_present(*p4d))
+		return -ENOENT;
+
+	pud = pud_offset(p4d, addr);
+	if (!pud_present(*pud))
+		return -ENOENT;
+
+	/* Large PUDs are not supported yet. */
+	if (pud_leaf(*pud))
+		return -EFAULT;
+
+	*pmdp = pmd_offset(pud, addr);
+	return 0;
+}
+
+void __gmap_helper_set_unused(struct mm_struct *mm, unsigned long vmaddr)
+{
+	spinlock_t *ptl;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	mmap_assert_locked(mm);
+
+	if (pmd_lookup(mm, vmaddr, &pmdp))
+		return;
+	ptl = pmd_lock(mm, pmdp);
+	if (!pmd_present(*pmdp) || pmd_leaf(*pmdp)) {
+		spin_unlock(ptl);
+		return;
+	}
+	spin_unlock(ptl);
+
+	ptep = pte_offset_map_lock(mm, pmdp, vmaddr, &ptl);
+	if (!ptep)
+		return;
+	/* The last byte of a pte can be changed freely without ipte */
+	__atomic64_or(_PAGE_UNUSED, (long *)ptep);
+	pte_unmap_unlock(ptep, ptl);
+}
+EXPORT_SYMBOL_GPL(__gmap_helper_set_unused);
+
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+				   unsigned long end, struct mm_walk *walk)
+{
+	unsigned long *found_addr = walk->private;
+
+	/* Return 1 of the page is a zeropage. */
+	if (is_zero_pfn(pte_pfn(*pte))) {
+		/*
+		 * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+		 * right thing and likely don't care: FAULT_FLAG_UNSHARE
+		 * currently only works in COW mappings, which is also where
+		 * mm_forbids_zeropage() is checked.
+		 */
+		if (!is_cow_mapping(walk->vma->vm_flags))
+			return -EFAULT;
+
+		*found_addr = addr;
+		return 1;
+	}
+	return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+	.pte_entry      = find_zeropage_pte_entry,
+	.walk_lock      = PGWALK_WRLOCK,
+};
+
+/*
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
+	unsigned long addr;
+	vm_fault_t fault;
+	int rc;
+
+	for_each_vma(vmi, vma) {
+		/*
+		 * We could only look at COW mappings, but it's more future
+		 * proof to catch unexpected zeropages in other mappings and
+		 * fail.
+		 */
+		if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+			continue;
+		addr = vma->vm_start;
+
+retry:
+		rc = walk_page_range_vma(vma, addr, vma->vm_end,
+					 &find_zeropage_ops, &addr);
+		if (rc < 0)
+			return rc;
+		else if (!rc)
+			continue;
+
+		/* addr was updated by find_zeropage_pte_entry() */
+		fault = handle_mm_fault(vma, addr,
+					FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+					NULL);
+		if (fault & VM_FAULT_OOM)
+			return -ENOMEM;
+		/*
+		 * See break_ksm(): even after handle_mm_fault() returned 0, we
+		 * must start the lookup from the current address, because
+		 * handle_mm_fault() may back out if there's any difficulty.
+		 *
+		 * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+		 * maybe they could trigger in the future on concurrent
+		 * truncation. In that case, the shared zeropage would be gone
+		 * and we can simply retry and make progress.
+		 */
+		cond_resched();
+		goto retry;
+	}
+
+	return 0;
+}
+
+static int __gmap_helper_disable_cow_sharing(struct mm_struct *mm)
+{
+	int rc;
+
+	if (!mm->context.allow_cow_sharing)
+		return 0;
+
+	mm->context.allow_cow_sharing = 0;
+
+	/* Replace all shared zeropages by anonymous pages. */
+	rc = __gmap_helper_unshare_zeropages(mm);
+	/*
+	 * Make sure to disable KSM (if enabled for the whole process or
+	 * individual VMAs). Note that nothing currently hinders user space
+	 * from re-enabling it.
+	 */
+	if (!rc)
+		rc = ksm_disable(mm);
+	if (rc)
+		mm->context.allow_cow_sharing = 1;
+	return rc;
+}
+
+/*
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int gmap_helper_disable_cow_sharing(void)
+{
+	int rc;
+
+	mmap_write_lock(current->mm);
+	rc = __gmap_helper_disable_cow_sharing(current->mm);
+	mmap_write_unlock(current->mm);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
-- 
2.49.0

Re: [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers

Posted by Nina Schoetterl-Glausch 8 months, 3 weeks ago

On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> Refactor some gmap functions; move the implementation into a separate
> file with only helper functions. The new helper functions work on vm
> addresses, leaving all gmap logic in the gmap functions, which mostly
> become just wrappers.
> 
> The whole gmap handling is going to be moved inside KVM soon, but the
> helper functions need to touch core mm functions, and thus need to
> stay in the core of kernel.
> 
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> ---
>  MAINTAINERS                          |   2 +
>  arch/s390/include/asm/gmap_helpers.h |  18 ++
>  arch/s390/kvm/diag.c                 |  11 +-
>  arch/s390/kvm/kvm-s390.c             |   3 +-
>  arch/s390/mm/Makefile                |   2 +
>  arch/s390/mm/gmap.c                  |  46 ++---
>  arch/s390/mm/gmap_helpers.c          | 266 +++++++++++++++++++++++++++
>  7 files changed, 307 insertions(+), 41 deletions(-)
>  create mode 100644 arch/s390/include/asm/gmap_helpers.h
>  create mode 100644 arch/s390/mm/gmap_helpers.c
> 
[...]

> diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> index 4869555ff403..17a2a57de3a2 100644
> --- a/arch/s390/mm/gmap.c
> +++ b/arch/s390/mm/gmap.c
> 
[...]

>  void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
>  {
> -	unsigned long gaddr, vmaddr, size;
> -	struct vm_area_struct *vma;
> +	unsigned long vmaddr, next, start, end;
>  
>  	mmap_read_lock(gmap->mm);
> -	for (gaddr = from; gaddr < to;
> -	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
> -		/* Find the vm address for the guest address */
> -		vmaddr = (unsigned long)
> -			radix_tree_lookup(&gmap->guest_to_host,
> -					  gaddr >> PMD_SHIFT);
> +	for ( ; from < to; from = next) {
> +		next = ALIGN(from + 1, PMD_SIZE);

I think you can use pmd_addr_end(from, to) here...

> +		vmaddr = (unsigned long)radix_tree_lookup(&gmap->guest_to_host, from >> PMD_SHIFT);
>  		if (!vmaddr)
>  			continue;
> -		vmaddr |= gaddr & ~PMD_MASK;
> -		/* Find vma in the parent mm */
> -		vma = find_vma(gmap->mm, vmaddr);
> -		if (!vma)
> -			continue;
> -		/*
> -		 * We do not discard pages that are backed by
> -		 * hugetlbfs, so we don't have to refault them.
> -		 */
> -		if (is_vm_hugetlb_page(vma))
> -			continue;
> -		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
> -		zap_page_range_single(vma, vmaddr, size, NULL);
> +		start = vmaddr | (from & ~PMD_MASK);
> +		end = (vmaddr | (min(to - 1, next - 1) & ~PMD_MASK)) + 1;

... then simply do:
		end = vmaddr + (next - from);

> +		__gmap_helper_discard(gmap->mm, start, end);
>  	}
>  	mmap_read_unlock(gmap->mm);
>  }
> diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
> new file mode 100644
> index 000000000000..8e66586718f6
> --- /dev/null
> +++ b/arch/s390/mm/gmap_helpers.c
> @@ -0,0 +1,266 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + *  Helper functions for KVM guest address space mapping code
> + *
> + *    Copyright IBM Corp. 2007, 2025
> + *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
> + *               Martin Schwidefsky <schwidefsky@de.ibm.com>
> + *               David Hildenbrand <david@redhat.com>
> + *               Janosch Frank <frankja@linux.vnet.ibm.com>
> + */
> +#include <linux/mm_types.h>
> +#include <linux/mmap_lock.h>
> +#include <linux/mm.h>
> +#include <linux/hugetlb.h>
> +#include <linux/pagewalk.h>
> +#include <linux/ksm.h>
> +#include <asm/gmap_helpers.h>

Please add documentation to all these functions for those of us that
don't live and breath mm code :)

> +
> +static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
> +{
> +	if (!non_swap_entry(entry))
> +		dec_mm_counter(mm, MM_SWAPENTS);
> +	else if (is_migration_entry(entry))
> +		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
> +	free_swap_and_cache(entry);
> +}
> +
> +void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr)

__gmap_helper_zap_mapping_pte ?

> +{
> +	struct vm_area_struct *vma;
> +	spinlock_t *ptl;
> +	pte_t *ptep;
> +
> +	mmap_assert_locked(mm);
> +
> +	/* Find the vm address for the guest address */
> +	vma = vma_lookup(mm, vmaddr);
> +	if (!vma || is_vm_hugetlb_page(vma))
> +		return;
> +
> +	/* Get pointer to the page table entry */
> +	ptep = get_locked_pte(mm, vmaddr, &ptl);
> +	if (!likely(ptep))

if (unlikely(!ptep)) reads nicer to me.

> +		return;
> +	if (pte_swap(*ptep))
> +		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
> +	pte_unmap_unlock(ptep, ptl);
> +}
> +EXPORT_SYMBOL_GPL(__gmap_helper_zap_one);

Looks reasonable, but I'm not well versed enough in mm code to evaluate
that with confidence.

> +
> +void __gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)

Maybe call this gmap_helper_discard_nolock or something.

> +{
> +	struct vm_area_struct *vma;
> +	unsigned long next;
> +
> +	mmap_assert_locked(mm);
> +
> +	while (vmaddr < end) {
> +		vma = find_vma_intersection(mm, vmaddr, end);
> +		if (!vma)
> +			break;
> +		vmaddr = max(vmaddr, vma->vm_start);
> +		next = min(end, vma->vm_end);
> +		if (!is_vm_hugetlb_page(vma))
> +			zap_page_range_single(vma, vmaddr, next - vmaddr, NULL);
> +		vmaddr = next;
> +	}
> +}
> +
> +void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
> +{
> +	mmap_read_lock(mm);
> +	__gmap_helper_discard(mm, vmaddr, end);
> +	mmap_read_unlock(mm);
> +}
> +EXPORT_SYMBOL_GPL(gmap_helper_discard);
> +
> +static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
> +{
> +	struct vm_area_struct *vma;
> +	pgd_t *pgd;
> +	p4d_t *p4d;
> +	pud_t *pud;
> +
> +	/* We need a valid VMA, otherwise this is clearly a fault. */
> +	vma = vma_lookup(mm, addr);
> +	if (!vma)
> +		return -EFAULT;
> +
> +	pgd = pgd_offset(mm, addr);
> +	if (!pgd_present(*pgd))
> +		return -ENOENT;

What about pgd_bad?

> +
> +	p4d = p4d_offset(pgd, addr);
> +	if (!p4d_present(*p4d))
> +		return -ENOENT;
> +
> +	pud = pud_offset(p4d, addr);
> +	if (!pud_present(*pud))
> +		return -ENOENT;
> +
> +	/* Large PUDs are not supported yet. */
> +	if (pud_leaf(*pud))
> +		return -EFAULT;
> +
> +	*pmdp = pmd_offset(pud, addr);
> +	return 0;
> +}
> +
> +void __gmap_helper_set_unused(struct mm_struct *mm, unsigned long vmaddr)

What is this function for? Why do you introduce it now?

> +{
> +	spinlock_t *ptl;
> +	pmd_t *pmdp;
> +	pte_t *ptep;
> +
> +	mmap_assert_locked(mm);
> +
> +	if (pmd_lookup(mm, vmaddr, &pmdp))
> +		return;
> +	ptl = pmd_lock(mm, pmdp);
> +	if (!pmd_present(*pmdp) || pmd_leaf(*pmdp)) {
> +		spin_unlock(ptl);
> +		return;
> +	}
> +	spin_unlock(ptl);
> +
> +	ptep = pte_offset_map_lock(mm, pmdp, vmaddr, &ptl);
> +	if (!ptep)
> +		return;
> +	/* The last byte of a pte can be changed freely without ipte */
> +	__atomic64_or(_PAGE_UNUSED, (long *)ptep);
> +	pte_unmap_unlock(ptep, ptl);
> +}
> +EXPORT_SYMBOL_GPL(__gmap_helper_set_unused);

The stuff below is from arch/s390/mm/gmap.c right?
Are you going to delete it from there?

> +static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
> +				   unsigned long end, struct mm_walk *walk)
> +{

[...]

> +}
> +
> +static const struct mm_walk_ops find_zeropage_ops = {
> +	.pte_entry      = find_zeropage_pte_entry,
> +	.walk_lock      = PGWALK_WRLOCK,
> +};
> +
> +/*
> + * Unshare all shared zeropages, replacing them by anonymous pages. Note that
> + * we cannot simply zap all shared zeropages, because this could later
> + * trigger unexpected userfaultfd missing events.
> + *
> + * This must be called after mm->context.allow_cow_sharing was
> + * set to 0, to avoid future mappings of shared zeropages.
> + *
> + * mm contracts with s390, that even if mm were to remove a page table,
> + * and racing with walk_page_range_vma() calling pte_offset_map_lock()
> + * would fail, it will never insert a page table containing empty zero
> + * pages once mm_forbids_zeropage(mm) i.e.
> + * mm->context.allow_cow_sharing is set to 0.
> + */
> +static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
> +{

[...]

> +}
> +
> +static int __gmap_helper_disable_cow_sharing(struct mm_struct *mm)
> +{

[...]

> +}
> +
> +/*
> + * Disable most COW-sharing of memory pages for the whole process:
> + * (1) Disable KSM and unmerge/unshare any KSM pages.
> + * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
> + *
> + * Not that we currently don't bother with COW-shared pages that are shared
> + * with parent/child processes due to fork().
> + */
> +int gmap_helper_disable_cow_sharing(void)
> +{

[...]

> +}
> +EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);

-- 
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294

Re: [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers

Posted by Claudio Imbrenda 8 months, 3 weeks ago

On Wed, 21 May 2025 16:55:18 +0200
Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:

> On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> > Refactor some gmap functions; move the implementation into a separate
> > file with only helper functions. The new helper functions work on vm
> > addresses, leaving all gmap logic in the gmap functions, which mostly
> > become just wrappers.
> > 
> > The whole gmap handling is going to be moved inside KVM soon, but the
> > helper functions need to touch core mm functions, and thus need to
> > stay in the core of kernel.
> > 
> > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > ---
> >  MAINTAINERS                          |   2 +
> >  arch/s390/include/asm/gmap_helpers.h |  18 ++
> >  arch/s390/kvm/diag.c                 |  11 +-
> >  arch/s390/kvm/kvm-s390.c             |   3 +-
> >  arch/s390/mm/Makefile                |   2 +
> >  arch/s390/mm/gmap.c                  |  46 ++---
> >  arch/s390/mm/gmap_helpers.c          | 266 +++++++++++++++++++++++++++
> >  7 files changed, 307 insertions(+), 41 deletions(-)
> >  create mode 100644 arch/s390/include/asm/gmap_helpers.h
> >  create mode 100644 arch/s390/mm/gmap_helpers.c
> >   
> [...]
> 
> > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
> > index 4869555ff403..17a2a57de3a2 100644
> > --- a/arch/s390/mm/gmap.c
> > +++ b/arch/s390/mm/gmap.c
> >   
> [...]
> 
> >  void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
> >  {
> > -	unsigned long gaddr, vmaddr, size;
> > -	struct vm_area_struct *vma;
> > +	unsigned long vmaddr, next, start, end;
> >  
> >  	mmap_read_lock(gmap->mm);
> > -	for (gaddr = from; gaddr < to;
> > -	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
> > -		/* Find the vm address for the guest address */
> > -		vmaddr = (unsigned long)
> > -			radix_tree_lookup(&gmap->guest_to_host,
> > -					  gaddr >> PMD_SHIFT);
> > +	for ( ; from < to; from = next) {
> > +		next = ALIGN(from + 1, PMD_SIZE);  
> 
> I think you can use pmd_addr_end(from, to) here...

I guess

> 
> > +		vmaddr = (unsigned long)radix_tree_lookup(&gmap->guest_to_host, from >> PMD_SHIFT);
> >  		if (!vmaddr)
> >  			continue;
> > -		vmaddr |= gaddr & ~PMD_MASK;
> > -		/* Find vma in the parent mm */
> > -		vma = find_vma(gmap->mm, vmaddr);
> > -		if (!vma)
> > -			continue;
> > -		/*
> > -		 * We do not discard pages that are backed by
> > -		 * hugetlbfs, so we don't have to refault them.
> > -		 */
> > -		if (is_vm_hugetlb_page(vma))
> > -			continue;
> > -		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
> > -		zap_page_range_single(vma, vmaddr, size, NULL);
> > +		start = vmaddr | (from & ~PMD_MASK);
> > +		end = (vmaddr | (min(to - 1, next - 1) & ~PMD_MASK)) + 1;  
> 
> ... then simply do:
> 		end = vmaddr + (next - from);

looks cleaner, yes

> 
> > +		__gmap_helper_discard(gmap->mm, start, end);
> >  	}
> >  	mmap_read_unlock(gmap->mm);
> >  }
> > diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
> > new file mode 100644
> > index 000000000000..8e66586718f6
> > --- /dev/null
> > +++ b/arch/s390/mm/gmap_helpers.c
> > @@ -0,0 +1,266 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + *  Helper functions for KVM guest address space mapping code
> > + *
> > + *    Copyright IBM Corp. 2007, 2025
> > + *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
> > + *               Martin Schwidefsky <schwidefsky@de.ibm.com>
> > + *               David Hildenbrand <david@redhat.com>
> > + *               Janosch Frank <frankja@linux.vnet.ibm.com>
> > + */
> > +#include <linux/mm_types.h>
> > +#include <linux/mmap_lock.h>
> > +#include <linux/mm.h>
> > +#include <linux/hugetlb.h>
> > +#include <linux/pagewalk.h>
> > +#include <linux/ksm.h>
> > +#include <asm/gmap_helpers.h>  
> 
> Please add documentation to all these functions for those of us that
> don't live and breath mm code :)

eh... yes I think it's a good idea

> 
> > +
> > +static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
> > +{
> > +	if (!non_swap_entry(entry))
> > +		dec_mm_counter(mm, MM_SWAPENTS);
> > +	else if (is_migration_entry(entry))
> > +		dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
> > +	free_swap_and_cache(entry);
> > +}
> > +
> > +void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr)  
> 
> __gmap_helper_zap_mapping_pte ?

but I'm not taking a pte as parameter

> 
> > +{
> > +	struct vm_area_struct *vma;
> > +	spinlock_t *ptl;
> > +	pte_t *ptep;
> > +
> > +	mmap_assert_locked(mm);
> > +
> > +	/* Find the vm address for the guest address */
> > +	vma = vma_lookup(mm, vmaddr);
> > +	if (!vma || is_vm_hugetlb_page(vma))
> > +		return;
> > +
> > +	/* Get pointer to the page table entry */
> > +	ptep = get_locked_pte(mm, vmaddr, &ptl);
> > +	if (!likely(ptep))  
> 
> if (unlikely(!ptep)) reads nicer to me.

ok

> 
> > +		return;
> > +	if (pte_swap(*ptep))
> > +		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
> > +	pte_unmap_unlock(ptep, ptl);
> > +}
> > +EXPORT_SYMBOL_GPL(__gmap_helper_zap_one);  
> 
> Looks reasonable, but I'm not well versed enough in mm code to evaluate
> that with confidence.
> 
> > +
> > +void __gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)  
> 
> Maybe call this gmap_helper_discard_nolock or something.

maybe __gmap_helper_discard_unlocked?

the __ prefix often implies lack of locking

> 
> > +{
> > +	struct vm_area_struct *vma;
> > +	unsigned long next;
> > +
> > +	mmap_assert_locked(mm);
> > +
> > +	while (vmaddr < end) {
> > +		vma = find_vma_intersection(mm, vmaddr, end);
> > +		if (!vma)
> > +			break;
> > +		vmaddr = max(vmaddr, vma->vm_start);
> > +		next = min(end, vma->vm_end);
> > +		if (!is_vm_hugetlb_page(vma))
> > +			zap_page_range_single(vma, vmaddr, next - vmaddr, NULL);
> > +		vmaddr = next;
> > +	}
> > +}
> > +
> > +void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
> > +{
> > +	mmap_read_lock(mm);
> > +	__gmap_helper_discard(mm, vmaddr, end);
> > +	mmap_read_unlock(mm);
> > +}
> > +EXPORT_SYMBOL_GPL(gmap_helper_discard);
> > +
> > +static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
> > +{
> > +	struct vm_area_struct *vma;
> > +	pgd_t *pgd;
> > +	p4d_t *p4d;
> > +	pud_t *pud;
> > +
> > +	/* We need a valid VMA, otherwise this is clearly a fault. */
> > +	vma = vma_lookup(mm, addr);
> > +	if (!vma)
> > +		return -EFAULT;
> > +
> > +	pgd = pgd_offset(mm, addr);
> > +	if (!pgd_present(*pgd))
> > +		return -ENOENT;  
> 
> What about pgd_bad?

I don't know, this code is copied verbatim from mm/gmap.c

> 
> > +
> > +	p4d = p4d_offset(pgd, addr);
> > +	if (!p4d_present(*p4d))
> > +		return -ENOENT;
> > +
> > +	pud = pud_offset(p4d, addr);
> > +	if (!pud_present(*pud))
> > +		return -ENOENT;
> > +
> > +	/* Large PUDs are not supported yet. */
> > +	if (pud_leaf(*pud))
> > +		return -EFAULT;
> > +
> > +	*pmdp = pmd_offset(pud, addr);
> > +	return 0;
> > +}
> > +
> > +void __gmap_helper_set_unused(struct mm_struct *mm, unsigned long vmaddr)  
> 
> What is this function for? Why do you introduce it now?

this is for cmma handling, I'm introducing it now because it needs to
be in this file and I would like to put all the stuff in at once.
I will not need to touch this file anymore if I get this in now.

> 
> > +{
> > +	spinlock_t *ptl;
> > +	pmd_t *pmdp;
> > +	pte_t *ptep;
> > +
> > +	mmap_assert_locked(mm);
> > +
> > +	if (pmd_lookup(mm, vmaddr, &pmdp))
> > +		return;
> > +	ptl = pmd_lock(mm, pmdp);
> > +	if (!pmd_present(*pmdp) || pmd_leaf(*pmdp)) {
> > +		spin_unlock(ptl);
> > +		return;
> > +	}
> > +	spin_unlock(ptl);
> > +
> > +	ptep = pte_offset_map_lock(mm, pmdp, vmaddr, &ptl);
> > +	if (!ptep)
> > +		return;
> > +	/* The last byte of a pte can be changed freely without ipte */
> > +	__atomic64_or(_PAGE_UNUSED, (long *)ptep);
> > +	pte_unmap_unlock(ptep, ptl);
> > +}
> > +EXPORT_SYMBOL_GPL(__gmap_helper_set_unused);  
> 
> The stuff below is from arch/s390/mm/gmap.c right?
> Are you going to delete it from there?

not in this series, but the next series will remove mm/gmap.c altogether

> 
> > +static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
> > +				   unsigned long end, struct mm_walk *walk)
> > +{  
> 
> [...]
> 
> > +}
> > +
> > +static const struct mm_walk_ops find_zeropage_ops = {
> > +	.pte_entry      = find_zeropage_pte_entry,
> > +	.walk_lock      = PGWALK_WRLOCK,
> > +};
> > +
> > +/*
> > + * Unshare all shared zeropages, replacing them by anonymous pages. Note that
> > + * we cannot simply zap all shared zeropages, because this could later
> > + * trigger unexpected userfaultfd missing events.
> > + *
> > + * This must be called after mm->context.allow_cow_sharing was
> > + * set to 0, to avoid future mappings of shared zeropages.
> > + *
> > + * mm contracts with s390, that even if mm were to remove a page table,
> > + * and racing with walk_page_range_vma() calling pte_offset_map_lock()
> > + * would fail, it will never insert a page table containing empty zero
> > + * pages once mm_forbids_zeropage(mm) i.e.
> > + * mm->context.allow_cow_sharing is set to 0.
> > + */
> > +static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
> > +{  
> 
> [...]
> 
> > +}
> > +
> > +static int __gmap_helper_disable_cow_sharing(struct mm_struct *mm)
> > +{  
> 
> [...]
> 
> > +}
> > +
> > +/*
> > + * Disable most COW-sharing of memory pages for the whole process:
> > + * (1) Disable KSM and unmerge/unshare any KSM pages.
> > + * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
> > + *
> > + * Not that we currently don't bother with COW-shared pages that are shared
> > + * with parent/child processes due to fork().
> > + */
> > +int gmap_helper_disable_cow_sharing(void)
> > +{  
> 
> [...]
> 
> > +}
> > +EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);  
>

Re: [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers

Posted by Nina Schoetterl-Glausch 8 months, 3 weeks ago

On Wed, 2025-05-21 at 17:19 +0200, Claudio Imbrenda wrote:
> On Wed, 21 May 2025 16:55:18 +0200
> Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:
> 
> > On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> > > Refactor some gmap functions; move the implementation into a separate
> > > file with only helper functions. The new helper functions work on vm
> > > addresses, leaving all gmap logic in the gmap functions, which mostly
> > > become just wrappers.
> > > 
> > > The whole gmap handling is going to be moved inside KVM soon, but the
> > > helper functions need to touch core mm functions, and thus need to
> > > stay in the core of kernel.
> > > 
> > > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > > ---
> > >  MAINTAINERS                          |   2 +
> > >  arch/s390/include/asm/gmap_helpers.h |  18 ++
> > >  arch/s390/kvm/diag.c                 |  11 +-
> > >  arch/s390/kvm/kvm-s390.c             |   3 +-
> > >  arch/s390/mm/Makefile                |   2 +
> > >  arch/s390/mm/gmap.c                  |  46 ++---
> > >  arch/s390/mm/gmap_helpers.c          | 266 +++++++++++++++++++++++++++
> > >  7 files changed, 307 insertions(+), 41 deletions(-)
> > >  create mode 100644 arch/s390/include/asm/gmap_helpers.h
> > >  create mode 100644 arch/s390/mm/gmap_helpers.c
> > >   
[...]

> > > +void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr)  
> > 
> > __gmap_helper_zap_mapping_pte ?
> 
> but I'm not taking a pte as parameter

The pte being zapped is the one mapping vmaddr, right?
> 
> > 
> > > +{
> > > +	struct vm_area_struct *vma;
> > > +	spinlock_t *ptl;
> > > +	pte_t *ptep;
> > > +
> > > +	mmap_assert_locked(mm);
> > > +
> > > +	/* Find the vm address for the guest address */
> > > +	vma = vma_lookup(mm, vmaddr);
> > > +	if (!vma || is_vm_hugetlb_page(vma))
> > > +		return;
> > > +
> > > +	/* Get pointer to the page table entry */
> > > +	ptep = get_locked_pte(mm, vmaddr, &ptl);
> > > +	if (!likely(ptep))  
> > 
> > if (unlikely(!ptep)) reads nicer to me.
> 
> ok
> 
> > 
> > > +		return;
> > > +	if (pte_swap(*ptep))
> > > +		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
> > > +	pte_unmap_unlock(ptep, ptl);
> > > +}
> > > +EXPORT_SYMBOL_GPL(__gmap_helper_zap_one);  
> > 
> > Looks reasonable, but I'm not well versed enough in mm code to evaluate
> > that with confidence.
> > 
> > > +
> > > +void __gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)  
> > 
> > Maybe call this gmap_helper_discard_nolock or something.
> 
> maybe __gmap_helper_discard_unlocked?
> 
> the __ prefix often implies lack of locking

_nolock *definitely* implies it :P

[...]

> > 
> > The stuff below is from arch/s390/mm/gmap.c right?
> > Are you going to delete it from there?
> 
> not in this series, but the next series will remove mm/gmap.c altogether

Can't you do it with this one?


[...]
-- 
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294

Re: [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers

Posted by Claudio Imbrenda 8 months, 3 weeks ago

On Wed, 21 May 2025 17:30:00 +0200
Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:

> On Wed, 2025-05-21 at 17:19 +0200, Claudio Imbrenda wrote:
> > On Wed, 21 May 2025 16:55:18 +0200
> > Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:
> >   
> > > On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:  
> > > > Refactor some gmap functions; move the implementation into a separate
> > > > file with only helper functions. The new helper functions work on vm
> > > > addresses, leaving all gmap logic in the gmap functions, which mostly
> > > > become just wrappers.
> > > > 
> > > > The whole gmap handling is going to be moved inside KVM soon, but the
> > > > helper functions need to touch core mm functions, and thus need to
> > > > stay in the core of kernel.
> > > > 
> > > > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > > > ---
> > > >  MAINTAINERS                          |   2 +
> > > >  arch/s390/include/asm/gmap_helpers.h |  18 ++
> > > >  arch/s390/kvm/diag.c                 |  11 +-
> > > >  arch/s390/kvm/kvm-s390.c             |   3 +-
> > > >  arch/s390/mm/Makefile                |   2 +
> > > >  arch/s390/mm/gmap.c                  |  46 ++---
> > > >  arch/s390/mm/gmap_helpers.c          | 266 +++++++++++++++++++++++++++
> > > >  7 files changed, 307 insertions(+), 41 deletions(-)
> > > >  create mode 100644 arch/s390/include/asm/gmap_helpers.h
> > > >  create mode 100644 arch/s390/mm/gmap_helpers.c
> > > >     
> [...]
> 
> > > > +void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr)    
> > > 
> > > __gmap_helper_zap_mapping_pte ?  
> > 
> > but I'm not taking a pte as parameter  
> 
> The pte being zapped is the one mapping vmaddr, right?

I don't know, _pte kinda sounds to me as the function would be taking a
pte as parameter

> >   
> > >   
> > > > +{
> > > > +	struct vm_area_struct *vma;
> > > > +	spinlock_t *ptl;
> > > > +	pte_t *ptep;
> > > > +
> > > > +	mmap_assert_locked(mm);
> > > > +
> > > > +	/* Find the vm address for the guest address */
> > > > +	vma = vma_lookup(mm, vmaddr);
> > > > +	if (!vma || is_vm_hugetlb_page(vma))
> > > > +		return;
> > > > +
> > > > +	/* Get pointer to the page table entry */
> > > > +	ptep = get_locked_pte(mm, vmaddr, &ptl);
> > > > +	if (!likely(ptep))    
> > > 
> > > if (unlikely(!ptep)) reads nicer to me.  
> > 
> > ok
> >   
> > >   
> > > > +		return;
> > > > +	if (pte_swap(*ptep))
> > > > +		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
> > > > +	pte_unmap_unlock(ptep, ptl);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(__gmap_helper_zap_one);    
> > > 
> > > Looks reasonable, but I'm not well versed enough in mm code to evaluate
> > > that with confidence.
> > >   
> > > > +
> > > > +void __gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)    
> > > 
> > > Maybe call this gmap_helper_discard_nolock or something.  
> > 
> > maybe __gmap_helper_discard_unlocked?
> > 
> > the __ prefix often implies lack of locking  
> 
> _nolock *definitely* implies it :P
> 
> [...]
> 
> > > 
> > > The stuff below is from arch/s390/mm/gmap.c right?
> > > Are you going to delete it from there?  
> > 
> > not in this series, but the next series will remove mm/gmap.c altogether  
> 
> Can't you do it with this one?

if you mean removing mm/gmap.c, no. I would need to push the whole gmap
rewrite series, which is not ready yet.

if you mean removing the redundant functions... I guess I could

> 
> 
> [...]

Re: [PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers

Posted by Nina Schoetterl-Glausch 8 months, 3 weeks ago

On Wed, 2025-05-21 at 17:41 +0200, Claudio Imbrenda wrote:
> On Wed, 21 May 2025 17:30:00 +0200
> Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:
> 
> > On Wed, 2025-05-21 at 17:19 +0200, Claudio Imbrenda wrote:
> > > On Wed, 21 May 2025 16:55:18 +0200
> > > Nina Schoetterl-Glausch <nsg@linux.ibm.com> wrote:
> > >   
> > > > On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:  
> > > > > Refactor some gmap functions; move the implementation into a separate
> > > > > file with only helper functions. The new helper functions work on vm
> > > > > addresses, leaving all gmap logic in the gmap functions, which mostly
> > > > > become just wrappers.
> > > > > 
> > > > > The whole gmap handling is going to be moved inside KVM soon, but the
> > > > > helper functions need to touch core mm functions, and thus need to
> > > > > stay in the core of kernel.
> > > > > 
> > > > > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > > > > ---
> > > > >  MAINTAINERS                          |   2 +
> > > > >  arch/s390/include/asm/gmap_helpers.h |  18 ++
> > > > >  arch/s390/kvm/diag.c                 |  11 +-
> > > > >  arch/s390/kvm/kvm-s390.c             |   3 +-
> > > > >  arch/s390/mm/Makefile                |   2 +
> > > > >  arch/s390/mm/gmap.c                  |  46 ++---
> > > > >  arch/s390/mm/gmap_helpers.c          | 266 +++++++++++++++++++++++++++
> > > > >  7 files changed, 307 insertions(+), 41 deletions(-)
> > > > >  create mode 100644 arch/s390/include/asm/gmap_helpers.h
> > > > >  create mode 100644 arch/s390/mm/gmap_helpers.c
> > > > >     
> > [...]
> > 
> > > > > +void __gmap_helper_zap_one(struct mm_struct *mm, unsigned long vmaddr)    
> > > > 
> > > > __gmap_helper_zap_mapping_pte ?  
> > > 
> > > but I'm not taking a pte as parameter  
> > 
> > The pte being zapped is the one mapping vmaddr, right?
> 
> I don't know, _pte kinda sounds to me as the function would be taking a
> pte as parameter

__gmap_helper_zap_pte_mapping_addr ?

IMO __gmap_helper_zap_one is rather vague. Zap one? Which one?

[...]

> > > 
> > > > The stuff below is from arch/s390/mm/gmap.c right?
> > > > Are you going to delete it from there?  
> > > 
> > > not in this series, but the next series will remove mm/gmap.c altogether  
> > 
> > Can't you do it with this one?
> 
> if you mean removing mm/gmap.c, no. I would need to push the whole gmap
> rewrite series, which is not ready yet.
> 
> if you mean removing the redundant functions... I guess I could

The latter. I think that would be cleaner.
> 
> > 
> > 
> > [...]

-- 
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294

[PATCH v1 1/5] s390: remove unneeded includes
[PATCH v1 2/5] KVM: s390: remove unneeded srcu lock
[PATCH v1 3/5] KVM: s390: refactor some functions in priv.c
[PATCH v1 4/5] KVM: s390: refactor and split some gmap helpers
[PATCH v1 5/5] KVM: s390: simplify and move pv code