[PATCH] KVM: s390: Fix to clear PTE when discarding a swapped page

Gautam Gala posted 1 patch 1 week ago
arch/s390/include/asm/pgtable.h | 22 ++++++++++++++++++++++
arch/s390/mm/gmap_helpers.c     | 12 +++++++++++-
arch/s390/mm/pgtable.c          | 23 +----------------------
3 files changed, 34 insertions(+), 23 deletions(-)
[PATCH] KVM: s390: Fix to clear PTE when discarding a swapped page
Posted by Gautam Gala 1 week ago
KVM run fails when guests with 'cmm' cpu feature and host are
under memory pressure and use swap heavily. This is because
npages becomes ENOMEN (out of memory) in hva_to_pfn_slow()
which inturn propagates as EFAULT to qemu. Clearing the page
table entry when discarding an address that maps to a swap
entry resolves the issue.

Suggested-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Gautam Gala <ggala@linux.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 22 ++++++++++++++++++++++
 arch/s390/mm/gmap_helpers.c     | 12 +++++++++++-
 arch/s390/mm/pgtable.c          | 23 +----------------------
 3 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index c1a7a92f0575..b7100c6a4054 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -2055,4 +2055,26 @@ static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt)
 	return res;
 }
 
+static inline pgste_t pgste_get_lock(pte_t *ptep)
+{
+	unsigned long value = 0;
+#ifdef CONFIG_PGSTE
+	unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
+
+	do {
+		value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
+	} while (value & PGSTE_PCL_BIT);
+	value |= PGSTE_PCL_BIT;
+#endif
+	return __pgste(value);
+}
+
+static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+	barrier();
+	WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
+#endif
+}
+
 #endif /* _S390_PAGE_H */
diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c
index b63f427e7289..d4c3c36855e2 100644
--- a/arch/s390/mm/gmap_helpers.c
+++ b/arch/s390/mm/gmap_helpers.c
@@ -15,6 +15,7 @@
 #include <linux/pagewalk.h>
 #include <linux/ksm.h>
 #include <asm/gmap_helpers.h>
+#include <asm/pgtable.h>
 
 /**
  * ptep_zap_swap_entry() - discard a swap entry.
@@ -47,6 +48,7 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
 {
 	struct vm_area_struct *vma;
 	spinlock_t *ptl;
+	pgste_t pgste;
 	pte_t *ptep;
 
 	mmap_assert_locked(mm);
@@ -60,8 +62,16 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
 	ptep = get_locked_pte(mm, vmaddr, &ptl);
 	if (unlikely(!ptep))
 		return;
-	if (pte_swap(*ptep))
+	if (pte_swap(*ptep)) {
+		preempt_disable();
+		pgste = pgste_get_lock(ptep);
+
 		ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
+		pte_clear(mm, vmaddr, ptep);
+
+		pgste_set_unlock(ptep, pgste);
+		preempt_enable();
+	}
 	pte_unmap_unlock(ptep, ptl);
 }
 EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 60688be4e876..879f39366e6c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -24,6 +24,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/page-states.h>
+#include <asm/pgtable.h>
 #include <asm/machine.h>
 
 pgprot_t pgprot_writecombine(pgprot_t prot)
@@ -115,28 +116,6 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
 	return old;
 }
 
-static inline pgste_t pgste_get_lock(pte_t *ptep)
-{
-	unsigned long value = 0;
-#ifdef CONFIG_PGSTE
-	unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
-
-	do {
-		value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
-	} while (value & PGSTE_PCL_BIT);
-	value |= PGSTE_PCL_BIT;
-#endif
-	return __pgste(value);
-}
-
-static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
-	barrier();
-	WRITE_ONCE(*(unsigned long *)(ptep + PTRS_PER_PTE), pgste_val(pgste) & ~PGSTE_PCL_BIT);
-#endif
-}
-
 static inline pgste_t pgste_get(pte_t *ptep)
 {
 	unsigned long pgste = 0;
-- 
2.51.0
Re: [PATCH] KVM: s390: Fix to clear PTE when discarding a swapped page
Posted by kernel test robot 4 days, 6 hours ago
Hi Gautam,

kernel test robot noticed the following build warnings:

[auto build test WARNING on s390/features]
[also build test WARNING on linus/master v6.17-rc7 next-20250926]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Gautam-Gala/KVM-s390-Fix-to-clear-PTE-when-discarding-a-swapped-page/20250924-201847
base:   https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git features
patch link:    https://lore.kernel.org/r/20250924121707.145350-1-ggala%40linux.ibm.com
patch subject: [PATCH] KVM: s390: Fix to clear PTE when discarding a swapped page
config: s390-defconfig (https://download.01.org/0day-ci/archive/20250928/202509280003.NWFBhwme-lkp@intel.com/config)
compiler: clang version 22.0.0git (https://github.com/llvm/llvm-project cafc064fc7a96b3979a023ddae1da2b499d6c954)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250928/202509280003.NWFBhwme-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202509280003.NWFBhwme-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from arch/s390/boot/als.c:9:
   In file included from arch/s390/include/asm/sclp.h:26:
   In file included from arch/s390/include/asm/chpid.h:10:
   In file included from arch/s390/include/asm/cio.h:10:
   In file included from arch/s390/include/asm/dma-types.h:7:
   In file included from include/linux/io.h:12:
   In file included from arch/s390/include/asm/io.h:15:
>> arch/s390/include/asm/pgtable.h:2065:48: warning: passing 'unsigned long *' to parameter of type 'long *' converts between pointers to integer types with different sign [-Wpointer-sign]
    2065 |                 value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
         |                                                              ^~~
   arch/s390/include/asm/atomic_ops.h:161:1: note: passing argument to parameter 'ptr' here
     161 | __ATOMIC64_OPS(__atomic64_or,  "ogr")
         | ^
   arch/s390/include/asm/atomic_ops.h:157:2: note: expanded from macro '__ATOMIC64_OPS'
     157 |         __ATOMIC64_OP(op_name##_barrier, op_string)
         |         ^
   arch/s390/include/asm/atomic_ops.h:141:53: note: expanded from macro '__ATOMIC64_OP'
     141 | static __always_inline long op_name(long val, long *ptr)                \
         |                                                     ^
   1 warning generated.


vim +2065 arch/s390/include/asm/pgtable.h

  2057	
  2058	static inline pgste_t pgste_get_lock(pte_t *ptep)
  2059	{
  2060		unsigned long value = 0;
  2061	#ifdef CONFIG_PGSTE
  2062		unsigned long *ptr = (unsigned long *)(ptep + PTRS_PER_PTE);
  2063	
  2064		do {
> 2065			value = __atomic64_or_barrier(PGSTE_PCL_BIT, ptr);
  2066		} while (value & PGSTE_PCL_BIT);
  2067		value |= PGSTE_PCL_BIT;
  2068	#endif
  2069		return __pgste(value);
  2070	}
  2071	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] KVM: s390: Fix to clear PTE when discarding a swapped page
Posted by David Hildenbrand 1 week ago
On 24.09.25 14:17, Gautam Gala wrote:
> KVM run fails when guests with 'cmm' cpu feature and host are
> under memory pressure and use swap heavily. This is because
> npages becomes ENOMEN (out of memory) in hva_to_pfn_slow()
> which inturn propagates as EFAULT to qemu. Clearing the page
> table entry when discarding an address that maps to a swap
> entry resolves the issue.
> 
> Suggested-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> Signed-off-by: Gautam Gala <ggala@linux.ibm.com>
> ---

Sounds bad,

I assume we want Fixes: and CC: stable, right?

-- 
Cheers

David / dhildenb
Re: [PATCH] KVM: s390: Fix to clear PTE when discarding a swapped page
Posted by Claudio Imbrenda 1 week ago
On Wed, 24 Sep 2025 14:22:05 +0200
David Hildenbrand <david@redhat.com> wrote:

> On 24.09.25 14:17, Gautam Gala wrote:
> > KVM run fails when guests with 'cmm' cpu feature and host are
> > under memory pressure and use swap heavily. This is because
> > npages becomes ENOMEN (out of memory) in hva_to_pfn_slow()
> > which inturn propagates as EFAULT to qemu. Clearing the page
> > table entry when discarding an address that maps to a swap
> > entry resolves the issue.
> > 
> > Suggested-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > Signed-off-by: Gautam Gala <ggala@linux.ibm.com>
> > ---  
> 
> Sounds bad,
> 
> I assume we want Fixes: and CC: stable, right?

yes; I'll add those when picking up

Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>