[RFC PATCH 2/2] s390/mm: Batch PTE updates in lazy MMU mode

Alexander Gordeev posted 2 patches 1 week, 1 day ago
[RFC PATCH 2/2] s390/mm: Batch PTE updates in lazy MMU mode
Posted by Alexander Gordeev 1 week, 1 day ago
Make use of the IPTE instruction's "Additional Entries" field to
invalidate multiple PTEs in one go while in lazy MMU mode. This
is the mode in which many memory-management system calls (like
mremap(), mprotect(), etc.) update memory attributes.

To achieve that, the set_pte() and ptep_get() primitives use a
per-CPU cache to store and retrieve PTE values and apply the
cached values to the real page table once lazy MMU mode is left.

The same is done for memory-management platform callbacks that
would otherwise cause intense per-PTE IPTE traffic, reducing the
number of IPTE instructions from up to PTRS_PER_PTE to a single
instruction in the best case. The average reduction is of course
smaller.

Since all existing page table iterators called in lazy MMU mode
handle one table at a time, the per-CPU cache does not need to be
larger than PTRS_PER_PTE entries. That also naturally aligns with
the IPTE instruction, which must not cross a page table boundary.

Before this change, the system calls did:

	lazy_mmu_mode_enable_pte()
	...
	<update PTEs>		// up to PTRS_PER_PTE single-IPTEs
	...
	lazy_mmu_mode_disable()

With this change, the system calls do:

    lazy_mmu_mode_enable_pte()
    ...
    <store new PTE values in the per-CPU cache>
    ...
    lazy_mmu_mode_disable()	// apply cache with one multi-IPTE

When applied to large memory ranges, some system calls show
significant speedups:

    mprotect()    ~15x
    munmap()      ~3x
    mremap()      ~28x

At the same time, fork() shows a measurable slowdown of ~1.5x.

The overall results depend on memory size and access patterns,
but the change generally does not degrade performance.

In addition to a process-wide impact, the rework affects the
whole Central Electronics Complex (CEC). Each (global) IPTE
instruction initiates a quiesce state in a CEC, so reducing
the number of IPTE calls relieves CEC-wide quiesce traffic.

In an extreme case of mprotect() contiguously triggering the
quiesce state on four LPARs in parallel, measurements show
~25x fewer quiesce events.

Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
---
 arch/s390/Kconfig               |   8 +
 arch/s390/include/asm/pgtable.h | 209 +++++++++++++++--
 arch/s390/mm/Makefile           |   1 +
 arch/s390/mm/ipte_batch.c       | 396 ++++++++++++++++++++++++++++++++
 arch/s390/mm/pgtable.c          |   8 +-
 5 files changed, 603 insertions(+), 19 deletions(-)
 create mode 100644 arch/s390/mm/ipte_batch.c

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 7828fbe0fc42..5821d4d42d1d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -732,6 +732,14 @@ config MAX_PHYSMEM_BITS
 	  Increasing the number of bits also increases the kernel image size.
 	  By default 46 bits (64TB) are supported.
 
+config IPTE_BATCH
+	def_bool y
+	prompt "Enables Additional Entries for IPTE instruction"
+	select ARCH_HAS_LAZY_MMU_MODE
+	help
+	  This option enables using of "Additional Entries" field of the IPTE
+	  instruction, which capitalizes on the lazy MMU mode infrastructure.
+
 endmenu
 
 menu "I/O subsystem"
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 67f5df20a57e..fd135e2a1ecf 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -39,6 +39,82 @@ enum {
 
 extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX];
 
+#if !defined(CONFIG_IPTE_BATCH) || defined(__DECOMPRESSOR)
+static inline
+bool ipte_batch_ptep_test_and_clear_young(struct vm_area_struct *vma,
+					  unsigned long addr, pte_t *ptep,
+					  int *res)
+{
+	return false;
+}
+
+static inline
+bool ipte_batch_ptep_get_and_clear(struct mm_struct *mm,
+				   unsigned long addr, pte_t *ptep, pte_t *res)
+{
+	return false;
+}
+
+static inline
+bool ipte_batch_ptep_get_and_clear_full(struct mm_struct *mm,
+					unsigned long addr, pte_t *ptep,
+					int full, pte_t *res)
+{
+	return false;
+}
+
+static inline
+bool ipte_batch_ptep_modify_prot_start(struct vm_area_struct *vma,
+				       unsigned long addr, pte_t *ptep, pte_t *res)
+{
+	return false;
+}
+
+static inline
+bool ipte_batch_ptep_modify_prot_commit(struct vm_area_struct *vma,
+					unsigned long addr, pte_t *ptep,
+					pte_t old_pte, pte_t pte)
+{
+	return false;
+}
+
+static inline
+bool ipte_batch_ptep_set_wrprotect(struct mm_struct *mm,
+				   unsigned long addr, pte_t *ptep)
+{
+	return false;
+}
+
+static inline bool ipte_batch_set_pte(pte_t *ptep, pte_t pte)
+{
+	return false;
+}
+
+static inline bool ipte_batch_ptep_get(pte_t *ptep, pte_t *res)
+{
+	return false;
+}
+#else
+bool ipte_batch_ptep_test_and_clear_young(struct vm_area_struct *vma,
+					  unsigned long addr, pte_t *ptep,
+					  int *res);
+bool ipte_batch_ptep_get_and_clear(struct mm_struct *mm,
+				   unsigned long addr, pte_t *ptep, pte_t *res);
+bool ipte_batch_ptep_get_and_clear_full(struct mm_struct *mm,
+					unsigned long addr, pte_t *ptep,
+					int full, pte_t *res);
+bool ipte_batch_ptep_modify_prot_start(struct vm_area_struct *vma,
+				       unsigned long addr, pte_t *ptep, pte_t *res);
+bool ipte_batch_ptep_modify_prot_commit(struct vm_area_struct *vma,
+					unsigned long addr, pte_t *ptep,
+					pte_t old_pte, pte_t pte);
+
+bool ipte_batch_ptep_set_wrprotect(struct mm_struct *mm,
+				   unsigned long addr, pte_t *ptep);
+bool ipte_batch_set_pte(pte_t *ptep, pte_t pte);
+bool ipte_batch_ptep_get(pte_t *ptep, pte_t *res);
+#endif
+
 static inline void update_page_count(int level, long count)
 {
 	if (IS_ENABLED(CONFIG_PROC_FS))
@@ -978,11 +1054,32 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 	WRITE_ONCE(*pmdp, pmd);
 }
 
-static inline void set_pte(pte_t *ptep, pte_t pte)
+static inline void __set_pte(pte_t *ptep, pte_t pte)
 {
 	WRITE_ONCE(*ptep, pte);
 }
 
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+	if (!ipte_batch_set_pte(ptep, pte))
+		__set_pte(ptep, pte);
+}
+
+static inline pte_t __ptep_get(pte_t *ptep)
+{
+	return READ_ONCE(*ptep);
+}
+
+#define ptep_get ptep_get
+static inline pte_t ptep_get(pte_t *ptep)
+{
+	pte_t res;
+
+	if (ipte_batch_ptep_get(ptep, &res))
+		return res;
+	return __ptep_get(ptep);
+}
+
 static inline void pgd_clear(pgd_t *pgd)
 {
 	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
@@ -1149,6 +1246,26 @@ static __always_inline void __ptep_ipte_range(unsigned long address, int nr,
 	} while (nr != 255);
 }
 
+#ifdef CONFIG_IPTE_BATCH
+void arch_enter_lazy_mmu_mode_pte(struct mm_struct *mm,
+				  unsigned long addr, unsigned long end,
+				  pte_t *pte);
+#define arch_enter_lazy_mmu_mode_pte arch_enter_lazy_mmu_mode_pte
+
+void arch_pause_lazy_mmu_mode(void);
+#define arch_pause_lazy_mmu_mode arch_pause_lazy_mmu_mode
+
+void arch_resume_lazy_mmu_mode(void);
+#define arch_resume_lazy_mmu_mode arch_resume_lazy_mmu_mode
+
+static inline void arch_enter_lazy_mmu_mode(void)
+{
+}
+
+void arch_leave_lazy_mmu_mode(void);
+void arch_flush_lazy_mmu_mode(void);
+#endif
+
 /*
  * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
  * both clear the TLB for the unmapped pte. The reason is that
@@ -1166,8 +1283,8 @@ pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t);
 pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
 
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
-					    unsigned long addr, pte_t *ptep)
+static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma,
+					      unsigned long addr, pte_t *ptep)
 {
 	pte_t pte = *ptep;
 
@@ -1175,6 +1292,16 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 	return pte_young(pte);
 }
 
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+					    unsigned long addr, pte_t *ptep)
+{
+	int res;
+
+	if (ipte_batch_ptep_test_and_clear_young(vma, addr, ptep, &res))
+		return res;
+	return __ptep_test_and_clear_young(vma, addr, ptep);
+}
+
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 					 unsigned long address, pte_t *ptep)
@@ -1183,8 +1310,8 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
 }
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
-				       unsigned long addr, pte_t *ptep)
+static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
+					 unsigned long addr, pte_t *ptep)
 {
 	pte_t res;
 
@@ -1192,14 +1319,49 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 	/* At this point the reference through the mapping is still present */
 	if (mm_is_protected(mm) && pte_present(res))
 		WARN_ON_ONCE(uv_convert_from_secure_pte(res));
+	return res;
+}
+
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pte_t *ptep)
+{
+	pte_t res;
+
+	if (!ipte_batch_ptep_get_and_clear(mm, addr, ptep, &res))
+		res = __ptep_get_and_clear(mm, addr, ptep);
 	page_table_check_pte_clear(mm, addr, res);
 	return res;
 }
 
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
-void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
-			     pte_t *, pte_t, pte_t);
+pte_t ___ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
+void ___ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+			       pte_t *, pte_t, pte_t);
+
+static inline
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
+			     unsigned long addr, pte_t *ptep)
+{
+	pte_t res;
+
+	if (ipte_batch_ptep_modify_prot_start(vma, addr, ptep, &res))
+		return res;
+	return ___ptep_modify_prot_start(vma, addr, ptep);
+}
+
+static inline
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+			     pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+	if (!ipte_batch_ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte))
+		___ptep_modify_prot_commit(vma, addr, ptep, old_pte, pte);
+}
+
+bool ipte_batch_ptep_modify_prot_start(struct vm_area_struct *vma,
+				       unsigned long addr, pte_t *ptep, pte_t *res);
+bool ipte_batch_ptep_modify_prot_commit(struct vm_area_struct *vma,
+					unsigned long addr, pte_t *ptep,
+					pte_t old_pte, pte_t pte);
 
 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
 static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
@@ -1223,9 +1385,9 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
  * full==1 and a simple pte_clear is enough. See tlb.h.
  */
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
-static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
-					    unsigned long addr,
-					    pte_t *ptep, int full)
+static inline pte_t __ptep_get_and_clear_full(struct mm_struct *mm,
+					      unsigned long addr,
+					      pte_t *ptep, int full)
 {
 	pte_t res;
 
@@ -1236,8 +1398,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 		res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
 	}
 
-	page_table_check_pte_clear(mm, addr, res);
-
 	/* Nothing to do */
 	if (!mm_is_protected(mm) || !pte_present(res))
 		return res;
@@ -1258,9 +1418,21 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 	return res;
 }
 
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
+					    unsigned long addr,
+					    pte_t *ptep, int full)
+{
+	pte_t res;
+
+	if (!ipte_batch_ptep_get_and_clear_full(mm, addr, ptep, full, &res))
+		res = __ptep_get_and_clear_full(mm, addr, ptep, full);
+	page_table_check_pte_clear(mm, addr, res);
+	return res;
+}
+
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm,
-				      unsigned long addr, pte_t *ptep)
+static inline void __ptep_set_wrprotect(struct mm_struct *mm,
+					unsigned long addr, pte_t *ptep)
 {
 	pte_t pte = *ptep;
 
@@ -1268,6 +1440,13 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
 		ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
 }
 
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+				      unsigned long addr, pte_t *ptep)
+{
+	if (!ipte_batch_ptep_set_wrprotect(mm, addr, ptep))
+		__ptep_set_wrprotect(mm, addr, ptep);
+}
+
 /*
  * Check if PTEs only differ in _PAGE_PROTECT HW bit, but also allow SW PTE
  * bits in the comparison. Those might change e.g. because of dirty and young
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 193899c39ca7..0f6c6de447d4 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -11,5 +11,6 @@ obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_PTDUMP)		+= dump_pagetables.o
 obj-$(CONFIG_PFAULT)		+= pfault.o
+obj-$(CONFIG_IPTE_BATCH)	+= ipte_batch.o
 
 obj-$(subst m,y,$(CONFIG_KVM))	+= gmap_helpers.o
diff --git a/arch/s390/mm/ipte_batch.c b/arch/s390/mm/ipte_batch.c
new file mode 100644
index 000000000000..49b166d499a9
--- /dev/null
+++ b/arch/s390/mm/ipte_batch.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/pgtable.h>
+#include <asm/facility.h>
+#include <kunit/visibility.h>
+
+#define PTE_POISON	0
+
+struct ipte_batch {
+	struct mm_struct *mm;
+	unsigned long base_addr;
+	unsigned long base_end;
+	pte_t *base_pte;
+	pte_t *start_pte;
+	pte_t *end_pte;
+	pte_t cache[PTRS_PER_PTE];
+};
+
+static DEFINE_PER_CPU(struct ipte_batch, ipte_range);
+
+static int count_contiguous(pte_t *start, pte_t *end, bool *valid)
+{
+	pte_t *ptep;
+
+	*valid = !(pte_val(*start) & _PAGE_INVALID);
+
+	for (ptep = start + 1; ptep < end; ptep++) {
+		if (*valid) {
+			if (pte_val(*ptep) & _PAGE_INVALID)
+				break;
+		} else {
+			if (!(pte_val(*ptep) & _PAGE_INVALID))
+				break;
+		}
+	}
+
+	return ptep - start;
+}
+
+static void __invalidate_pte_range(struct mm_struct *mm, unsigned long addr,
+				   int nr_ptes, pte_t *ptep)
+{
+	atomic_inc(&mm->context.flush_count);
+	if (cpu_has_tlb_lc() &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		__ptep_ipte_range(addr, nr_ptes - 1, ptep, IPTE_LOCAL);
+	else
+		__ptep_ipte_range(addr, nr_ptes - 1, ptep, IPTE_GLOBAL);
+	atomic_dec(&mm->context.flush_count);
+}
+
+static int invalidate_pte_range(struct mm_struct *mm, unsigned long addr,
+				pte_t *start, pte_t *end)
+{
+	int nr_ptes;
+	bool valid;
+
+	nr_ptes = count_contiguous(start, end, &valid);
+	if (valid)
+		__invalidate_pte_range(mm, addr, nr_ptes, start);
+
+	return nr_ptes;
+}
+
+static void set_pte_range(struct mm_struct *mm, unsigned long addr,
+			  pte_t *ptep, pte_t *end, pte_t *cache)
+{
+	int i, nr_ptes;
+
+	while (ptep < end) {
+		nr_ptes = invalidate_pte_range(mm, addr, ptep, end);
+
+		for (i = 0; i < nr_ptes; i++, ptep++, cache++) {
+			__set_pte(ptep, *cache);
+			*cache = __pte(PTE_POISON);
+		}
+
+		addr += nr_ptes * PAGE_SIZE;
+	}
+}
+
+static void enter_ipte_batch(struct mm_struct *mm,
+			     unsigned long addr, unsigned long end, pte_t *pte)
+{
+	struct ipte_batch *ib;
+
+	ib = &get_cpu_var(ipte_range);
+
+	ib->mm = mm;
+	ib->base_addr = addr;
+	ib->base_end = end;
+	ib->base_pte = pte;
+}
+
+static void leave_ipte_batch(void)
+{
+	pte_t *ptep, *start, *start_cache, *cache;
+	unsigned long start_addr, addr;
+	struct ipte_batch *ib;
+	int start_idx;
+
+	ib = &get_cpu_var(ipte_range);
+	if (!ib->mm) {
+		put_cpu_var(ipte_range);
+		return;
+	}
+	put_cpu_var(ipte_range);
+
+	lockdep_assert_preemption_disabled();
+	if (!ib->start_pte)
+		goto done;
+
+	start = ib->start_pte;
+	start_idx = ib->start_pte - ib->base_pte;
+	start_addr = ib->base_addr + start_idx * PAGE_SIZE;
+	addr = start_addr;
+	start_cache = &ib->cache[start_idx];
+	cache = start_cache;
+	for (ptep = start; ptep < ib->end_pte; ptep++, cache++, addr += PAGE_SIZE) {
+		if (pte_val(*cache) == PTE_POISON) {
+			if (start) {
+				set_pte_range(ib->mm, start_addr, start, ptep, start_cache);
+				start = NULL;
+			}
+		} else if (!start) {
+			start = ptep;
+			start_addr = addr;
+			start_cache = cache;
+		}
+	}
+	set_pte_range(ib->mm, start_addr, start, ptep, start_cache);
+
+	ib->start_pte = NULL;
+	ib->end_pte = NULL;
+
+done:
+	ib->mm = NULL;
+	ib->base_addr = 0;
+	ib->base_end = 0;
+	ib->base_pte = NULL;
+
+	put_cpu_var(ipte_range);
+}
+
+static void flush_lazy_mmu_mode(void)
+{
+	unsigned long addr, end;
+	struct ipte_batch *ib;
+	struct mm_struct *mm;
+	pte_t *pte;
+
+	ib = &get_cpu_var(ipte_range);
+	if (ib->mm) {
+		mm = ib->mm;
+		addr = ib->base_addr;
+		end = ib->base_end;
+		pte = ib->base_pte;
+
+		leave_ipte_batch();
+		enter_ipte_batch(mm, addr, end, pte);
+	}
+	put_cpu_var(ipte_range);
+}
+
+void arch_enter_lazy_mmu_mode_pte(struct mm_struct *mm,
+				  unsigned long addr, unsigned long end,
+				  pte_t *pte)
+{
+	if (!test_facility(13))
+		return;
+	enter_ipte_batch(mm, addr, end, pte);
+}
+EXPORT_SYMBOL_IF_KUNIT(arch_enter_lazy_mmu_mode_pte);
+
+void arch_leave_lazy_mmu_mode(void)
+{
+	if (!test_facility(13))
+		return;
+	leave_ipte_batch();
+}
+EXPORT_SYMBOL_IF_KUNIT(arch_leave_lazy_mmu_mode);
+
+void arch_flush_lazy_mmu_mode(void)
+{
+	if (!test_facility(13))
+		return;
+	flush_lazy_mmu_mode();
+}
+EXPORT_SYMBOL_IF_KUNIT(arch_flush_lazy_mmu_mode);
+
+static void __ipte_batch_set_pte(struct ipte_batch *ib, pte_t *ptep, pte_t pte)
+{
+	unsigned int idx = ptep - ib->base_pte;
+
+	lockdep_assert_preemption_disabled();
+	ib->cache[idx] = pte;
+
+	if (!ib->start_pte) {
+		ib->start_pte = ptep;
+		ib->end_pte = ptep + 1;
+	} else if (ptep < ib->start_pte) {
+		ib->start_pte = ptep;
+	} else if (ptep + 1 > ib->end_pte) {
+		ib->end_pte = ptep + 1;
+	}
+}
+
+static pte_t __ipte_batch_ptep_get(struct ipte_batch *ib, pte_t *ptep)
+{
+	unsigned int idx = ptep - ib->base_pte;
+
+	lockdep_assert_preemption_disabled();
+	if (pte_val(ib->cache[idx]) == PTE_POISON)
+		return __ptep_get(ptep);
+	return ib->cache[idx];
+}
+
+static bool lazy_mmu_mode(struct ipte_batch *ib, struct mm_struct *mm, pte_t *ptep)
+{
+	unsigned int nr_ptes;
+
+	lockdep_assert_preemption_disabled();
+	if (!is_lazy_mmu_mode_active())
+		return false;
+	if (!mm)
+		return false;
+	if (!ib->mm)
+		return false;
+	if (ptep < ib->base_pte)
+		return false;
+	nr_ptes = (ib->base_end - ib->base_addr) / PAGE_SIZE;
+	if (ptep >= ib->base_pte + nr_ptes)
+		return false;
+	return true;
+}
+
+static struct ipte_batch *get_ipte_batch_nomm(pte_t *ptep)
+{
+	struct ipte_batch *ib;
+
+	ib = &get_cpu_var(ipte_range);
+	if (!lazy_mmu_mode(ib, ib->mm, ptep)) {
+		put_cpu_var(ipte_range);
+		return NULL;
+	}
+
+	return ib;
+}
+
+static struct ipte_batch *get_ipte_batch(struct mm_struct *mm, pte_t *ptep)
+{
+	struct ipte_batch *ib;
+
+	ib = &get_cpu_var(ipte_range);
+	if (!lazy_mmu_mode(ib, mm, ptep)) {
+		put_cpu_var(ipte_range);
+		return NULL;
+	}
+
+	return ib;
+}
+
+static void put_ipte_batch(struct ipte_batch *ib)
+{
+	put_cpu_var(ipte_range);
+}
+
+bool ipte_batch_set_pte(pte_t *ptep, pte_t pte)
+{
+	struct ipte_batch *ib;
+
+	ib = get_ipte_batch_nomm(ptep);
+	if (!ib)
+		return false;
+	__ipte_batch_set_pte(ib, ptep, pte);
+	put_ipte_batch(ib);
+
+	return true;
+}
+
+bool ipte_batch_ptep_get(pte_t *ptep, pte_t *res)
+{
+	struct ipte_batch *ib;
+
+	ib = get_ipte_batch_nomm(ptep);
+	if (!ib)
+		return false;
+	*res = __ipte_batch_ptep_get(ib, ptep);
+	put_ipte_batch(ib);
+
+	return true;
+}
+
+bool ipte_batch_ptep_test_and_clear_young(struct vm_area_struct *vma,
+					  unsigned long addr, pte_t *ptep,
+					  int *res)
+{
+	struct ipte_batch *ib;
+	pte_t pte, old;
+
+	ib = get_ipte_batch(vma->vm_mm, ptep);
+	if (!ib)
+		return false;
+
+	old = __ipte_batch_ptep_get(ib, ptep);
+	pte = pte_mkold(old);
+	__ipte_batch_set_pte(ib, ptep, pte);
+
+	put_ipte_batch(ib);
+
+	*res = pte_young(old);
+
+	return true;
+}
+
+bool ipte_batch_ptep_get_and_clear(struct mm_struct *mm,
+				   unsigned long addr, pte_t *ptep, pte_t *res)
+{
+	struct ipte_batch *ib;
+	pte_t pte, old;
+
+	ib = get_ipte_batch(mm, ptep);
+	if (!ib)
+		return false;
+
+	old = __ipte_batch_ptep_get(ib, ptep);
+	pte = __pte(_PAGE_INVALID);
+	__ipte_batch_set_pte(ib, ptep, pte);
+
+	put_ipte_batch(ib);
+
+	*res = old;
+
+	return true;
+}
+
+bool ipte_batch_ptep_get_and_clear_full(struct mm_struct *mm,
+					unsigned long addr, pte_t *ptep,
+					int full, pte_t *res)
+{
+	struct ipte_batch *ib;
+	pte_t pte, old;
+
+	ib = get_ipte_batch(mm, ptep);
+	if (!ib)
+		return false;
+
+	old = __ipte_batch_ptep_get(ib, ptep);
+	pte = __pte(_PAGE_INVALID);
+	__ipte_batch_set_pte(ib, ptep, pte);
+
+	put_ipte_batch(ib);
+
+	*res = old;
+
+	return true;
+}
+
+bool ipte_batch_ptep_modify_prot_start(struct vm_area_struct *vma,
+				       unsigned long addr, pte_t *ptep, pte_t *res)
+{
+	return ipte_batch_ptep_get_and_clear(vma->vm_mm, addr, ptep, res);
+}
+
+bool ipte_batch_ptep_modify_prot_commit(struct vm_area_struct *vma,
+					unsigned long addr, pte_t *ptep,
+					pte_t old_pte, pte_t pte)
+{
+	struct ipte_batch *ib;
+
+	ib = get_ipte_batch(vma->vm_mm, ptep);
+	if (!ib)
+		return false;
+	__ipte_batch_set_pte(ib, ptep, pte);
+	put_ipte_batch(ib);
+
+	return true;
+}
+
+bool ipte_batch_ptep_set_wrprotect(struct mm_struct *mm,
+				   unsigned long addr, pte_t *ptep)
+{
+	struct ipte_batch *ib;
+	pte_t pte, old;
+
+	ib = get_ipte_batch(mm, ptep);
+	if (!ib)
+		return false;
+
+	old = __ipte_batch_ptep_get(ib, ptep);
+	pte = pte_wrprotect(old);
+	__ipte_batch_set_pte(ib, ptep, pte);
+
+	put_ipte_batch(ib);
+
+	return true;
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4acd8b140c4b..df36523bcbbb 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -166,14 +166,14 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL(ptep_xchg_lazy);
 
-pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
-			     pte_t *ptep)
+pte_t ___ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
+				pte_t *ptep)
 {
 	return ptep_flush_lazy(vma->vm_mm, addr, ptep, 1);
 }
 
-void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
-			     pte_t *ptep, pte_t old_pte, pte_t pte)
+void ___ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+				pte_t *ptep, pte_t old_pte, pte_t pte)
 {
 	set_pte(ptep, pte);
 }
-- 
2.51.0