Honoring these PTE bits is the exception, so let's invert the meaning.
With this change, most callers don't have to pass any flags.
No functional change intended.
Signed-off-by: David Hildenbrand <david@redhat.com>
---
mm/internal.h | 16 ++++++++--------
mm/madvise.c | 3 +--
mm/memory.c | 11 +++++------
mm/mempolicy.c | 4 +---
mm/mlock.c | 3 +--
mm/mremap.c | 3 +--
mm/rmap.c | 3 +--
7 files changed, 18 insertions(+), 25 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index e84217e27778d..9690c75063881 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -202,17 +202,17 @@ static inline void vma_close(struct vm_area_struct *vma)
/* Flags for folio_pte_batch(). */
typedef int __bitwise fpb_t;
-/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */
-#define FPB_IGNORE_DIRTY ((__force fpb_t)BIT(0))
+/* Compare PTEs honoring the dirty bit. */
+#define FPB_HONOR_DIRTY ((__force fpb_t)BIT(0))
-/* Compare PTEs after pte_clear_soft_dirty(), ignoring the soft-dirty bit. */
-#define FPB_IGNORE_SOFT_DIRTY ((__force fpb_t)BIT(1))
+/* Compare PTEs honoring the soft-dirty bit. */
+#define FPB_HONOR_SOFT_DIRTY ((__force fpb_t)BIT(1))
static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
{
- if (flags & FPB_IGNORE_DIRTY)
+ if (!(flags & FPB_HONOR_DIRTY))
pte = pte_mkclean(pte);
- if (likely(flags & FPB_IGNORE_SOFT_DIRTY))
+ if (likely(!(flags & FPB_HONOR_SOFT_DIRTY)))
pte = pte_clear_soft_dirty(pte);
return pte_wrprotect(pte_mkold(pte));
}
@@ -236,8 +236,8 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
* pages of the same large folio.
*
* All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN,
- * the accessed bit, writable bit, dirty bit (with FPB_IGNORE_DIRTY) and
- * soft-dirty bit (with FPB_IGNORE_SOFT_DIRTY).
+ * the accessed bit, writable bit, dirty bit (unless FPB_HONOR_DIRTY is set) and
+ * soft-dirty bit (unless FPB_HONOR_SOFT_DIRTY is set).
*
* start_ptep must map any page of the folio. max_nr must be at least one and
* must be limited by the caller so scanning cannot exceed a single page table.
diff --git a/mm/madvise.c b/mm/madvise.c
index e61e32b2cd91f..661bb743d2216 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -347,10 +347,9 @@ static inline int madvise_folio_pte_batch(unsigned long addr, unsigned long end,
pte_t pte, bool *any_young,
bool *any_dirty)
{
- const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
int max_nr = (end - addr) / PAGE_SIZE;
- return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
+ return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL,
any_young, any_dirty);
}
diff --git a/mm/memory.c b/mm/memory.c
index 0f9b32a20e5b7..ab2d6c1425691 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -990,10 +990,10 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
* by keeping the batching logic separate.
*/
if (unlikely(!*prealloc && folio_test_large(folio) && max_nr != 1)) {
- if (src_vma->vm_flags & VM_SHARED)
- flags |= FPB_IGNORE_DIRTY;
- if (!vma_soft_dirty_enabled(src_vma))
- flags |= FPB_IGNORE_SOFT_DIRTY;
+ if (!(src_vma->vm_flags & VM_SHARED))
+ flags |= FPB_HONOR_DIRTY;
+ if (vma_soft_dirty_enabled(src_vma))
+ flags |= FPB_HONOR_SOFT_DIRTY;
nr = folio_pte_batch(folio, addr, src_pte, pte, max_nr, flags,
&any_writable, NULL, NULL);
@@ -1535,7 +1535,6 @@ static inline int zap_present_ptes(struct mmu_gather *tlb,
struct zap_details *details, int *rss, bool *force_flush,
bool *force_break, bool *any_skipped)
{
- const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
struct mm_struct *mm = tlb->mm;
struct folio *folio;
struct page *page;
@@ -1565,7 +1564,7 @@ static inline int zap_present_ptes(struct mmu_gather *tlb,
* by keeping the batching logic separate.
*/
if (unlikely(folio_test_large(folio) && max_nr != 1)) {
- nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags,
+ nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, 0,
NULL, NULL, NULL);
zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1ff7b2174eb77..2a25eedc3b1c0 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -675,7 +675,6 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk)
static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
- const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
struct vm_area_struct *vma = walk->vma;
struct folio *folio;
struct queue_pages *qp = walk->private;
@@ -713,8 +712,7 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
continue;
if (folio_test_large(folio) && max_nr != 1)
nr = folio_pte_batch(folio, addr, pte, ptent,
- max_nr, fpb_flags,
- NULL, NULL, NULL);
+ max_nr, 0, NULL, NULL, NULL);
/*
* vm_normal_folio() filters out zero pages, but there might
* still be reserved folios to skip, perhaps in a VDSO.
diff --git a/mm/mlock.c b/mm/mlock.c
index 3cb72b579ffd3..2238cdc5eb1c1 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -307,14 +307,13 @@ void munlock_folio(struct folio *folio)
static inline unsigned int folio_mlock_step(struct folio *folio,
pte_t *pte, unsigned long addr, unsigned long end)
{
- const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
unsigned int count = (end - addr) >> PAGE_SHIFT;
pte_t ptent = ptep_get(pte);
if (!folio_test_large(folio))
return 1;
- return folio_pte_batch(folio, addr, pte, ptent, count, fpb_flags, NULL,
+ return folio_pte_batch(folio, addr, pte, ptent, count, 0, NULL,
NULL, NULL);
}
diff --git a/mm/mremap.c b/mm/mremap.c
index 36585041c760d..d4d3ffc931502 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -173,7 +173,6 @@ static pte_t move_soft_dirty_pte(pte_t pte)
static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t pte, int max_nr)
{
- const fpb_t flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
struct folio *folio;
if (max_nr == 1)
@@ -183,7 +182,7 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr
if (!folio || !folio_test_large(folio))
return 1;
- return folio_pte_batch(folio, addr, ptep, pte, max_nr, flags, NULL,
+ return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL,
NULL, NULL);
}
diff --git a/mm/rmap.c b/mm/rmap.c
index 3b74bb19c11dd..a29d7d29c7283 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1849,7 +1849,6 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page,
static inline bool can_batch_unmap_folio_ptes(unsigned long addr,
struct folio *folio, pte_t *ptep)
{
- const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
int max_nr = folio_nr_pages(folio);
pte_t pte = ptep_get(ptep);
@@ -1860,7 +1859,7 @@ static inline bool can_batch_unmap_folio_ptes(unsigned long addr,
if (pte_pfn(pte) != folio_pfn(folio))
return false;
- return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
+ return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL,
NULL, NULL) == max_nr;
}
--
2.49.0
On Fri, Jun 27, 2025 at 01:55:07PM +0200, David Hildenbrand wrote: > Honoring these PTE bits is the exception, so let's invert the meaning. > > With this change, most callers don't have to pass any flags. > > No functional change intended. > > Signed-off-by: David Hildenbrand <david@redhat.com> I'm not gonna jump into a dialectics battle :-), so whatever ends up being the FPB_flag: Reviewed-by: Oscar Salvador <osalvador@suse.de> -- Oscar Salvador SUSE Labs
On 27 Jun 2025, at 7:55, David Hildenbrand wrote: > Honoring these PTE bits is the exception, so let's invert the meaning. > > With this change, most callers don't have to pass any flags. > > No functional change intended. > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > mm/internal.h | 16 ++++++++-------- > mm/madvise.c | 3 +-- > mm/memory.c | 11 +++++------ > mm/mempolicy.c | 4 +--- > mm/mlock.c | 3 +-- > mm/mremap.c | 3 +-- > mm/rmap.c | 3 +-- > 7 files changed, 18 insertions(+), 25 deletions(-) > LGTM. I assume the callers no longer pass any flags will be converted to use folio_pte_batch() (no flags) later. I will keep reading. Reviewed-by: Zi Yan <ziy@nvidia.com> Best Regards, Yan, Zi
On 27/06/25 5:25 pm, David Hildenbrand wrote: > Honoring these PTE bits is the exception, so let's invert the meaning. > > With this change, most callers don't have to pass any flags. > > No functional change intended. FWIW I had proposed this kind of change earlier to Ryan (CCed) and he pointed out: "Doesn't that argument apply in reverse if you want to ignore something new in future? By default we are comparing all the bits in the pte when determining the batch. The flags request to ignore certain bits. If we want to ignore extra bits in future, we add new flags and the existing callers don't need to be updated. With your approach, every time you want the ability to ignore extra bits, you need to update all the callers, which is error prone." > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > mm/internal.h | 16 ++++++++-------- > mm/madvise.c | 3 +-- > mm/memory.c | 11 +++++------ > mm/mempolicy.c | 4 +--- > mm/mlock.c | 3 +-- > mm/mremap.c | 3 +-- > mm/rmap.c | 3 +-- > 7 files changed, 18 insertions(+), 25 deletions(-) > > diff --git a/mm/internal.h b/mm/internal.h > index e84217e27778d..9690c75063881 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -202,17 +202,17 @@ static inline void vma_close(struct vm_area_struct *vma) > /* Flags for folio_pte_batch(). */ > typedef int __bitwise fpb_t; > > -/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */ > -#define FPB_IGNORE_DIRTY ((__force fpb_t)BIT(0)) > +/* Compare PTEs honoring the dirty bit. */ > +#define FPB_HONOR_DIRTY ((__force fpb_t)BIT(0)) > > -/* Compare PTEs after pte_clear_soft_dirty(), ignoring the soft-dirty bit. */ > -#define FPB_IGNORE_SOFT_DIRTY ((__force fpb_t)BIT(1)) > +/* Compare PTEs honoring the soft-dirty bit. */ > +#define FPB_HONOR_SOFT_DIRTY ((__force fpb_t)BIT(1)) > > static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags) > { > - if (flags & FPB_IGNORE_DIRTY) > + if (!(flags & FPB_HONOR_DIRTY)) > pte = pte_mkclean(pte); > - if (likely(flags & FPB_IGNORE_SOFT_DIRTY)) > + if (likely(!(flags & FPB_HONOR_SOFT_DIRTY))) > pte = pte_clear_soft_dirty(pte); > return pte_wrprotect(pte_mkold(pte)); > } > @@ -236,8 +236,8 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags) > * pages of the same large folio. > * > * All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN, > - * the accessed bit, writable bit, dirty bit (with FPB_IGNORE_DIRTY) and > - * soft-dirty bit (with FPB_IGNORE_SOFT_DIRTY). > + * the accessed bit, writable bit, dirty bit (unless FPB_HONOR_DIRTY is set) and > + * soft-dirty bit (unless FPB_HONOR_SOFT_DIRTY is set). > * > * start_ptep must map any page of the folio. max_nr must be at least one and > * must be limited by the caller so scanning cannot exceed a single page table. > diff --git a/mm/madvise.c b/mm/madvise.c > index e61e32b2cd91f..661bb743d2216 100644 > --- a/mm/madvise.c > +++ b/mm/madvise.c > @@ -347,10 +347,9 @@ static inline int madvise_folio_pte_batch(unsigned long addr, unsigned long end, > pte_t pte, bool *any_young, > bool *any_dirty) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > int max_nr = (end - addr) / PAGE_SIZE; > > - return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, > + return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL, > any_young, any_dirty); > } > > diff --git a/mm/memory.c b/mm/memory.c > index 0f9b32a20e5b7..ab2d6c1425691 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -990,10 +990,10 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma > * by keeping the batching logic separate. > */ > if (unlikely(!*prealloc && folio_test_large(folio) && max_nr != 1)) { > - if (src_vma->vm_flags & VM_SHARED) > - flags |= FPB_IGNORE_DIRTY; > - if (!vma_soft_dirty_enabled(src_vma)) > - flags |= FPB_IGNORE_SOFT_DIRTY; > + if (!(src_vma->vm_flags & VM_SHARED)) > + flags |= FPB_HONOR_DIRTY; > + if (vma_soft_dirty_enabled(src_vma)) > + flags |= FPB_HONOR_SOFT_DIRTY; > > nr = folio_pte_batch(folio, addr, src_pte, pte, max_nr, flags, > &any_writable, NULL, NULL); > @@ -1535,7 +1535,6 @@ static inline int zap_present_ptes(struct mmu_gather *tlb, > struct zap_details *details, int *rss, bool *force_flush, > bool *force_break, bool *any_skipped) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > struct mm_struct *mm = tlb->mm; > struct folio *folio; > struct page *page; > @@ -1565,7 +1564,7 @@ static inline int zap_present_ptes(struct mmu_gather *tlb, > * by keeping the batching logic separate. > */ > if (unlikely(folio_test_large(folio) && max_nr != 1)) { > - nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags, > + nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, 0, > NULL, NULL, NULL); > > zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr, > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index 1ff7b2174eb77..2a25eedc3b1c0 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -675,7 +675,6 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk) > static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, > unsigned long end, struct mm_walk *walk) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > struct vm_area_struct *vma = walk->vma; > struct folio *folio; > struct queue_pages *qp = walk->private; > @@ -713,8 +712,7 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, > continue; > if (folio_test_large(folio) && max_nr != 1) > nr = folio_pte_batch(folio, addr, pte, ptent, > - max_nr, fpb_flags, > - NULL, NULL, NULL); > + max_nr, 0, NULL, NULL, NULL); > /* > * vm_normal_folio() filters out zero pages, but there might > * still be reserved folios to skip, perhaps in a VDSO. > diff --git a/mm/mlock.c b/mm/mlock.c > index 3cb72b579ffd3..2238cdc5eb1c1 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -307,14 +307,13 @@ void munlock_folio(struct folio *folio) > static inline unsigned int folio_mlock_step(struct folio *folio, > pte_t *pte, unsigned long addr, unsigned long end) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > unsigned int count = (end - addr) >> PAGE_SHIFT; > pte_t ptent = ptep_get(pte); > > if (!folio_test_large(folio)) > return 1; > > - return folio_pte_batch(folio, addr, pte, ptent, count, fpb_flags, NULL, > + return folio_pte_batch(folio, addr, pte, ptent, count, 0, NULL, > NULL, NULL); > } > > diff --git a/mm/mremap.c b/mm/mremap.c > index 36585041c760d..d4d3ffc931502 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -173,7 +173,6 @@ static pte_t move_soft_dirty_pte(pte_t pte) > static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr, > pte_t *ptep, pte_t pte, int max_nr) > { > - const fpb_t flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > struct folio *folio; > > if (max_nr == 1) > @@ -183,7 +182,7 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr > if (!folio || !folio_test_large(folio)) > return 1; > > - return folio_pte_batch(folio, addr, ptep, pte, max_nr, flags, NULL, > + return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL, > NULL, NULL); > } > > diff --git a/mm/rmap.c b/mm/rmap.c > index 3b74bb19c11dd..a29d7d29c7283 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -1849,7 +1849,6 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page, > static inline bool can_batch_unmap_folio_ptes(unsigned long addr, > struct folio *folio, pte_t *ptep) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > int max_nr = folio_nr_pages(folio); > pte_t pte = ptep_get(ptep); > > @@ -1860,7 +1859,7 @@ static inline bool can_batch_unmap_folio_ptes(unsigned long addr, > if (pte_pfn(pte) != folio_pfn(folio)) > return false; > > - return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, > + return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL, > NULL, NULL) == max_nr; > } >
On 28.06.25 05:37, Dev Jain wrote: > > On 27/06/25 5:25 pm, David Hildenbrand wrote: >> Honoring these PTE bits is the exception, so let's invert the meaning. >> >> With this change, most callers don't have to pass any flags. >> >> No functional change intended. > > FWIW I had proposed this kind of change earlier to Ryan (CCed) and > he pointed out: "Doesn't that argument apply in reverse if you want > to ignore something new in future? > > By default we are comparing all the bits in the pte when determining the batch. > The flags request to ignore certain bits. That statement is not true: as default we ignore the write and young bit. And we don't have flags for that ;) Now we also ignore the dirty and soft-dity bit as default, unless told not to do that by one very specific caller. > If we want to ignore extra bits in > future, we add new flags and the existing callers don't need to be updated. What stops you from using FPB_IGNORE_* for something else in the future? As a side note, there are not that many relevant PTE bits to worry about in the near future ;) I mean, uffd-wp, sure, ... and before we add a FPB_HONOR_UFFD_WP to all users to be safe (and changing the default to ignore), you could add a FPB_IGNORE_UFFD_WP first, to then check who really can tolerate just ignoring it (most of them, I assume). -- Cheers, David / dhildenb
On 29/06/25 2:30 am, David Hildenbrand wrote: > On 28.06.25 05:37, Dev Jain wrote: >> >> On 27/06/25 5:25 pm, David Hildenbrand wrote: >>> Honoring these PTE bits is the exception, so let's invert the meaning. >>> >>> With this change, most callers don't have to pass any flags. >>> >>> No functional change intended. >> >> FWIW I had proposed this kind of change earlier to Ryan (CCed) and >> he pointed out: "Doesn't that argument apply in reverse if you want >> to ignore something new in future? >> >> By default we are comparing all the bits in the pte when determining >> the batch. >> The flags request to ignore certain bits. > > That statement is not true: as default we ignore the write and young > bit. And we don't have flags for that ;) > > Now we also ignore the dirty and soft-dity bit as default, unless told > not to do that by one very specific caller. > >> If we want to ignore extra bits in >> future, we add new flags and the existing callers don't need to be >> updated. > > What stops you from using FPB_IGNORE_* for something else in the future? > > As a side note, there are not that many relevant PTE bits to worry > about in the near future ;) > > I mean, uffd-wp, sure, ... and before we add a FPB_HONOR_UFFD_WP to > all users to be safe (and changing the default to ignore), you could > add a FPB_IGNORE_UFFD_WP first, to then check who really can tolerate > just ignoring it (most of them, I assume). I agree.
On 30/06/2025 04:34, Dev Jain wrote: > > On 29/06/25 2:30 am, David Hildenbrand wrote: >> On 28.06.25 05:37, Dev Jain wrote: >>> >>> On 27/06/25 5:25 pm, David Hildenbrand wrote: >>>> Honoring these PTE bits is the exception, so let's invert the meaning. >>>> >>>> With this change, most callers don't have to pass any flags. >>>> >>>> No functional change intended. >>> >>> FWIW I had proposed this kind of change earlier to Ryan (CCed) and >>> he pointed out: "Doesn't that argument apply in reverse if you want >>> to ignore something new in future? >>> >>> By default we are comparing all the bits in the pte when determining the batch. >>> The flags request to ignore certain bits. >> >> That statement is not true: as default we ignore the write and young bit. And >> we don't have flags for that ;) >> >> Now we also ignore the dirty and soft-dity bit as default, unless told not to >> do that by one very specific caller. >> >>> If we want to ignore extra bits in >>> future, we add new flags and the existing callers don't need to be updated. >> >> What stops you from using FPB_IGNORE_* for something else in the future? >> >> As a side note, there are not that many relevant PTE bits to worry about in >> the near future ;) >> >> I mean, uffd-wp, sure, ... and before we add a FPB_HONOR_UFFD_WP to all users >> to be safe (and changing the default to ignore), you could add a >> FPB_IGNORE_UFFD_WP first, to then check who really can tolerate just ignoring >> it (most of them, I assume). > I agree. Meh. Personally I think if you start mixing HONOR and IGNORE flags, it becomes very confusing to work out what is being checked for and what is not. I stand by my original view. But yeah, writable and young confuse it a bit... How about generalizing by explicitly requiring IGNORE flags for write and young, then also create a flags macro for the common case? #define FPB_IGNORE_COMMON (FPB_IGNORE_WRITE | FPB_IGNORE_YOUNG | \ FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY) It's not a hill I'm going to die on though... Thanks, Ryan
On 30.06.25 11:04, Ryan Roberts wrote: > On 30/06/2025 04:34, Dev Jain wrote: >> >> On 29/06/25 2:30 am, David Hildenbrand wrote: >>> On 28.06.25 05:37, Dev Jain wrote: >>>> >>>> On 27/06/25 5:25 pm, David Hildenbrand wrote: >>>>> Honoring these PTE bits is the exception, so let's invert the meaning. >>>>> >>>>> With this change, most callers don't have to pass any flags. >>>>> >>>>> No functional change intended. >>>> >>>> FWIW I had proposed this kind of change earlier to Ryan (CCed) and >>>> he pointed out: "Doesn't that argument apply in reverse if you want >>>> to ignore something new in future? >>>> >>>> By default we are comparing all the bits in the pte when determining the batch. >>>> The flags request to ignore certain bits. >>> >>> That statement is not true: as default we ignore the write and young bit. And >>> we don't have flags for that ;) >>> >>> Now we also ignore the dirty and soft-dity bit as default, unless told not to >>> do that by one very specific caller. >>> >>>> If we want to ignore extra bits in >>>> future, we add new flags and the existing callers don't need to be updated. >>> >>> What stops you from using FPB_IGNORE_* for something else in the future? >>> >>> As a side note, there are not that many relevant PTE bits to worry about in >>> the near future ;) >>> >>> I mean, uffd-wp, sure, ... and before we add a FPB_HONOR_UFFD_WP to all users >>> to be safe (and changing the default to ignore), you could add a >>> FPB_IGNORE_UFFD_WP first, to then check who really can tolerate just ignoring >>> it (most of them, I assume). >> I agree. > > Meh. Personally I think if you start mixing HONOR and IGNORE flags, it becomes > very confusing to work out what is being checked for and what is not. I stand by > my original view. But yeah, writable and young confuse it a bit... How about > generalizing by explicitly requiring IGNORE flags for write and young, then also > create a flags macro for the common case? > > #define FPB_IGNORE_COMMON (FPB_IGNORE_WRITE | FPB_IGNORE_YOUNG | \ > FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY) > > It's not a hill I'm going to die on though... How about we make this function simpler, not more complicated? ;) -- Cheers, David / dhildenb
On 30/06/2025 10:08, David Hildenbrand wrote: > On 30.06.25 11:04, Ryan Roberts wrote: >> On 30/06/2025 04:34, Dev Jain wrote: >>> >>> On 29/06/25 2:30 am, David Hildenbrand wrote: >>>> On 28.06.25 05:37, Dev Jain wrote: >>>>> >>>>> On 27/06/25 5:25 pm, David Hildenbrand wrote: >>>>>> Honoring these PTE bits is the exception, so let's invert the meaning. >>>>>> >>>>>> With this change, most callers don't have to pass any flags. >>>>>> >>>>>> No functional change intended. >>>>> >>>>> FWIW I had proposed this kind of change earlier to Ryan (CCed) and >>>>> he pointed out: "Doesn't that argument apply in reverse if you want >>>>> to ignore something new in future? >>>>> >>>>> By default we are comparing all the bits in the pte when determining the >>>>> batch. >>>>> The flags request to ignore certain bits. >>>> >>>> That statement is not true: as default we ignore the write and young bit. And >>>> we don't have flags for that ;) >>>> >>>> Now we also ignore the dirty and soft-dity bit as default, unless told not to >>>> do that by one very specific caller. >>>> >>>>> If we want to ignore extra bits in >>>>> future, we add new flags and the existing callers don't need to be updated. >>>> >>>> What stops you from using FPB_IGNORE_* for something else in the future? >>>> >>>> As a side note, there are not that many relevant PTE bits to worry about in >>>> the near future ;) >>>> >>>> I mean, uffd-wp, sure, ... and before we add a FPB_HONOR_UFFD_WP to all users >>>> to be safe (and changing the default to ignore), you could add a >>>> FPB_IGNORE_UFFD_WP first, to then check who really can tolerate just ignoring >>>> it (most of them, I assume). >>> I agree. >> >> Meh. Personally I think if you start mixing HONOR and IGNORE flags, it becomes >> very confusing to work out what is being checked for and what is not. I stand by >> my original view. But yeah, writable and young confuse it a bit... How about >> generalizing by explicitly requiring IGNORE flags for write and young, then also >> create a flags macro for the common case? >> >> #define FPB_IGNORE_COMMON (FPB_IGNORE_WRITE | FPB_IGNORE_YOUNG | \ >> FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY) >> >> It's not a hill I'm going to die on though... > > How about we make this function simpler, not more complicated? ;) I think here we both have different views of what is simpler... You are trying to optimize for the callers writing less code. I'm trying to optimize for the reader to be able to easily determine what the function will do for a given caller. But it's your name above the door :) I don't want to bikeshed, so go ahead change to HONOR.
On 30.06.25 11:18, Ryan Roberts wrote: > On 30/06/2025 10:08, David Hildenbrand wrote: >> On 30.06.25 11:04, Ryan Roberts wrote: >>> On 30/06/2025 04:34, Dev Jain wrote: >>>> >>>> On 29/06/25 2:30 am, David Hildenbrand wrote: >>>>> On 28.06.25 05:37, Dev Jain wrote: >>>>>> >>>>>> On 27/06/25 5:25 pm, David Hildenbrand wrote: >>>>>>> Honoring these PTE bits is the exception, so let's invert the meaning. >>>>>>> >>>>>>> With this change, most callers don't have to pass any flags. >>>>>>> >>>>>>> No functional change intended. >>>>>> >>>>>> FWIW I had proposed this kind of change earlier to Ryan (CCed) and >>>>>> he pointed out: "Doesn't that argument apply in reverse if you want >>>>>> to ignore something new in future? >>>>>> >>>>>> By default we are comparing all the bits in the pte when determining the >>>>>> batch. >>>>>> The flags request to ignore certain bits. >>>>> >>>>> That statement is not true: as default we ignore the write and young bit. And >>>>> we don't have flags for that ;) >>>>> >>>>> Now we also ignore the dirty and soft-dity bit as default, unless told not to >>>>> do that by one very specific caller. >>>>> >>>>>> If we want to ignore extra bits in >>>>>> future, we add new flags and the existing callers don't need to be updated. >>>>> >>>>> What stops you from using FPB_IGNORE_* for something else in the future? >>>>> >>>>> As a side note, there are not that many relevant PTE bits to worry about in >>>>> the near future ;) >>>>> >>>>> I mean, uffd-wp, sure, ... and before we add a FPB_HONOR_UFFD_WP to all users >>>>> to be safe (and changing the default to ignore), you could add a >>>>> FPB_IGNORE_UFFD_WP first, to then check who really can tolerate just ignoring >>>>> it (most of them, I assume). >>>> I agree. >>> >>> Meh. Personally I think if you start mixing HONOR and IGNORE flags, it becomes >>> very confusing to work out what is being checked for and what is not. I stand by >>> my original view. But yeah, writable and young confuse it a bit... How about >>> generalizing by explicitly requiring IGNORE flags for write and young, then also >>> create a flags macro for the common case? >>> >>> #define FPB_IGNORE_COMMON (FPB_IGNORE_WRITE | FPB_IGNORE_YOUNG | \ >>> FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY) >>> >>> It's not a hill I'm going to die on though... >> >> How about we make this function simpler, not more complicated? ;) > > I think here we both have different views of what is simpler... You are trying > to optimize for the callers writing less code. I'm trying to optimize for the > reader to be able to easily determine what the function will do for a given caller. See patch number #3: I want the default function -- folio_pte_batch() -- to not have any flags at all. And I don't want to achieve that by internally using flags when calling folio_pte_batch_ext(). If you don't specify flags (folio_pte_batch()), behave just as if calling folio_pte_batch_ext() without flags. Anything else would be more confusing IMHO. I agree that mixing HONOR and IGNORE is not a good idea. But then, it's really only uffd-wp that still could be batched, and likely we want it to be the default, and respect/honor/whatever instead in the cases where we really have to. (If we really want to go down that path and batch it :) ) -- Cheers, David / dhildenb
On 30/06/2025 10:24, David Hildenbrand wrote: > On 30.06.25 11:18, Ryan Roberts wrote: >> On 30/06/2025 10:08, David Hildenbrand wrote: >>> On 30.06.25 11:04, Ryan Roberts wrote: >>>> On 30/06/2025 04:34, Dev Jain wrote: >>>>> >>>>> On 29/06/25 2:30 am, David Hildenbrand wrote: >>>>>> On 28.06.25 05:37, Dev Jain wrote: >>>>>>> >>>>>>> On 27/06/25 5:25 pm, David Hildenbrand wrote: >>>>>>>> Honoring these PTE bits is the exception, so let's invert the meaning. >>>>>>>> >>>>>>>> With this change, most callers don't have to pass any flags. >>>>>>>> >>>>>>>> No functional change intended. >>>>>>> >>>>>>> FWIW I had proposed this kind of change earlier to Ryan (CCed) and >>>>>>> he pointed out: "Doesn't that argument apply in reverse if you want >>>>>>> to ignore something new in future? >>>>>>> >>>>>>> By default we are comparing all the bits in the pte when determining the >>>>>>> batch. >>>>>>> The flags request to ignore certain bits. >>>>>> >>>>>> That statement is not true: as default we ignore the write and young bit. And >>>>>> we don't have flags for that ;) >>>>>> >>>>>> Now we also ignore the dirty and soft-dity bit as default, unless told not to >>>>>> do that by one very specific caller. >>>>>> >>>>>>> If we want to ignore extra bits in >>>>>>> future, we add new flags and the existing callers don't need to be updated. >>>>>> >>>>>> What stops you from using FPB_IGNORE_* for something else in the future? >>>>>> >>>>>> As a side note, there are not that many relevant PTE bits to worry about in >>>>>> the near future ;) >>>>>> >>>>>> I mean, uffd-wp, sure, ... and before we add a FPB_HONOR_UFFD_WP to all users >>>>>> to be safe (and changing the default to ignore), you could add a >>>>>> FPB_IGNORE_UFFD_WP first, to then check who really can tolerate just ignoring >>>>>> it (most of them, I assume). >>>>> I agree. >>>> >>>> Meh. Personally I think if you start mixing HONOR and IGNORE flags, it becomes >>>> very confusing to work out what is being checked for and what is not. I >>>> stand by >>>> my original view. But yeah, writable and young confuse it a bit... How about >>>> generalizing by explicitly requiring IGNORE flags for write and young, then >>>> also >>>> create a flags macro for the common case? >>>> >>>> #define FPB_IGNORE_COMMON (FPB_IGNORE_WRITE | FPB_IGNORE_YOUNG | \ >>>> FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY) >>>> >>>> It's not a hill I'm going to die on though... >>> >>> How about we make this function simpler, not more complicated? ;) >> >> I think here we both have different views of what is simpler... You are trying >> to optimize for the callers writing less code. I'm trying to optimize for the >> reader to be able to easily determine what the function will do for a given >> caller. > > See patch number #3: I want the default function -- folio_pte_batch() -- to not > have any flags at all. > > And I don't want to achieve that by internally using flags when calling > folio_pte_batch_ext(). > > If you don't specify flags (folio_pte_batch()), behave just as if calling > folio_pte_batch_ext() without flags. Anything else would be more confusing IMHO. OK, sorry I don't have the context of your actual series... I was just trying to defend what my quote that Dev sent. I'll go take a look at your series. > > I agree that mixing HONOR and IGNORE is not a good idea. But then, it's really > only uffd-wp that still could be batched, and likely we want it to be the > default, and respect/honor/whatever instead in the cases where we really have to. > > (If we really want to go down that path and batch it :) ) FWIW, I think we will need to honor the write bit for Dev's mprotect series (I just sent review comments against his v4). So if you want most callers to just call folio_pte_batch() and that will ignore the write bit, then I guess folio_pte_batch_ext() will have to accept a FPB_HONOR_WRITE bit? Which I guess aligns with what you are doing here to make all the flags HONOR.
On 30.06.25 12:57, Ryan Roberts wrote: > On 30/06/2025 10:24, David Hildenbrand wrote: >> On 30.06.25 11:18, Ryan Roberts wrote: >>> On 30/06/2025 10:08, David Hildenbrand wrote: >>>> On 30.06.25 11:04, Ryan Roberts wrote: >>>>> On 30/06/2025 04:34, Dev Jain wrote: >>>>>> >>>>>> On 29/06/25 2:30 am, David Hildenbrand wrote: >>>>>>> On 28.06.25 05:37, Dev Jain wrote: >>>>>>>> >>>>>>>> On 27/06/25 5:25 pm, David Hildenbrand wrote: >>>>>>>>> Honoring these PTE bits is the exception, so let's invert the meaning. >>>>>>>>> >>>>>>>>> With this change, most callers don't have to pass any flags. >>>>>>>>> >>>>>>>>> No functional change intended. >>>>>>>> >>>>>>>> FWIW I had proposed this kind of change earlier to Ryan (CCed) and >>>>>>>> he pointed out: "Doesn't that argument apply in reverse if you want >>>>>>>> to ignore something new in future? >>>>>>>> >>>>>>>> By default we are comparing all the bits in the pte when determining the >>>>>>>> batch. >>>>>>>> The flags request to ignore certain bits. >>>>>>> >>>>>>> That statement is not true: as default we ignore the write and young bit. And >>>>>>> we don't have flags for that ;) >>>>>>> >>>>>>> Now we also ignore the dirty and soft-dity bit as default, unless told not to >>>>>>> do that by one very specific caller. >>>>>>> >>>>>>>> If we want to ignore extra bits in >>>>>>>> future, we add new flags and the existing callers don't need to be updated. >>>>>>> >>>>>>> What stops you from using FPB_IGNORE_* for something else in the future? >>>>>>> >>>>>>> As a side note, there are not that many relevant PTE bits to worry about in >>>>>>> the near future ;) >>>>>>> >>>>>>> I mean, uffd-wp, sure, ... and before we add a FPB_HONOR_UFFD_WP to all users >>>>>>> to be safe (and changing the default to ignore), you could add a >>>>>>> FPB_IGNORE_UFFD_WP first, to then check who really can tolerate just ignoring >>>>>>> it (most of them, I assume). >>>>>> I agree. >>>>> >>>>> Meh. Personally I think if you start mixing HONOR and IGNORE flags, it becomes >>>>> very confusing to work out what is being checked for and what is not. I >>>>> stand by >>>>> my original view. But yeah, writable and young confuse it a bit... How about >>>>> generalizing by explicitly requiring IGNORE flags for write and young, then >>>>> also >>>>> create a flags macro for the common case? >>>>> >>>>> #define FPB_IGNORE_COMMON (FPB_IGNORE_WRITE | FPB_IGNORE_YOUNG | \ >>>>> FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY) >>>>> >>>>> It's not a hill I'm going to die on though... >>>> >>>> How about we make this function simpler, not more complicated? ;) >>> >>> I think here we both have different views of what is simpler... You are trying >>> to optimize for the callers writing less code. I'm trying to optimize for the >>> reader to be able to easily determine what the function will do for a given >>> caller. >> >> See patch number #3: I want the default function -- folio_pte_batch() -- to not >> have any flags at all. >> >> And I don't want to achieve that by internally using flags when calling >> folio_pte_batch_ext(). >> >> If you don't specify flags (folio_pte_batch()), behave just as if calling >> folio_pte_batch_ext() without flags. Anything else would be more confusing IMHO. > > OK, sorry I don't have the context of your actual series... I was just trying to > defend what my quote that Dev sent. I'll go take a look at your series. If we'd not do the split and always provide flags + non-inlined variant, then I'd agree. > >> >> I agree that mixing HONOR and IGNORE is not a good idea. But then, it's really >> only uffd-wp that still could be batched, and likely we want it to be the >> default, and respect/honor/whatever instead in the cases where we really have to. >> >> (If we really want to go down that path and batch it :) ) > > FWIW, I think we will need to honor the write bit for Dev's mprotect series (I > just sent review comments against his v4). So if you want most callers to just > call folio_pte_batch() and that will ignore the write bit, then I guess > folio_pte_batch_ext() will have to accept a FPB_HONOR_WRITE bit? Which I guess > aligns with what you are doing here to make all the flags HONOR. I have to take a look at that one. Maybe it's sufficient to know if any PTE was writable, just like we did with fork(). But yes, anybody who requires more advanced handling (flags) shall use the extended variant -- however that one will be called. -- Cheers, David / dhildenb
On Fri, Jun 27, 2025 at 01:55:07PM +0200, David Hildenbrand wrote: > Honoring these PTE bits is the exception, so let's invert the meaning. > > With this change, most callers don't have to pass any flags. > > No functional change intended. > > Signed-off-by: David Hildenbrand <david@redhat.com> This is a nice change, it removes a lot of code I really didn't enjoy looking at for introducing these flags all over the place. But a nit on the naming below, I'm not a fan of 'honor' here :) > --- > mm/internal.h | 16 ++++++++-------- > mm/madvise.c | 3 +-- > mm/memory.c | 11 +++++------ > mm/mempolicy.c | 4 +--- > mm/mlock.c | 3 +-- > mm/mremap.c | 3 +-- > mm/rmap.c | 3 +-- > 7 files changed, 18 insertions(+), 25 deletions(-) > > diff --git a/mm/internal.h b/mm/internal.h > index e84217e27778d..9690c75063881 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -202,17 +202,17 @@ static inline void vma_close(struct vm_area_struct *vma) > /* Flags for folio_pte_batch(). */ > typedef int __bitwise fpb_t; > > -/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */ > -#define FPB_IGNORE_DIRTY ((__force fpb_t)BIT(0)) > +/* Compare PTEs honoring the dirty bit. */ > +#define FPB_HONOR_DIRTY ((__force fpb_t)BIT(0)) Hm not to be petty but... :) I'm not sure I find 'honor' very clear here. Ignore is very clear, 'honor' (God the British English in me wants to say honour here but stipp :P) doesn't necessarily tell you what is going to happen. Perhaps PROPAGATE? or OBEY? > > -/* Compare PTEs after pte_clear_soft_dirty(), ignoring the soft-dirty bit. */ > -#define FPB_IGNORE_SOFT_DIRTY ((__force fpb_t)BIT(1)) > +/* Compare PTEs honoring the soft-dirty bit. */ > +#define FPB_HONOR_SOFT_DIRTY ((__force fpb_t)BIT(1)) > > static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags) > { > - if (flags & FPB_IGNORE_DIRTY) > + if (!(flags & FPB_HONOR_DIRTY)) > pte = pte_mkclean(pte); > - if (likely(flags & FPB_IGNORE_SOFT_DIRTY)) > + if (likely(!(flags & FPB_HONOR_SOFT_DIRTY))) > pte = pte_clear_soft_dirty(pte); > return pte_wrprotect(pte_mkold(pte)); > } > @@ -236,8 +236,8 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags) > * pages of the same large folio. > * > * All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN, > - * the accessed bit, writable bit, dirty bit (with FPB_IGNORE_DIRTY) and > - * soft-dirty bit (with FPB_IGNORE_SOFT_DIRTY). > + * the accessed bit, writable bit, dirty bit (unless FPB_HONOR_DIRTY is set) and > + * soft-dirty bit (unless FPB_HONOR_SOFT_DIRTY is set). > * > * start_ptep must map any page of the folio. max_nr must be at least one and > * must be limited by the caller so scanning cannot exceed a single page table. > diff --git a/mm/madvise.c b/mm/madvise.c > index e61e32b2cd91f..661bb743d2216 100644 > --- a/mm/madvise.c > +++ b/mm/madvise.c > @@ -347,10 +347,9 @@ static inline int madvise_folio_pte_batch(unsigned long addr, unsigned long end, > pte_t pte, bool *any_young, > bool *any_dirty) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > int max_nr = (end - addr) / PAGE_SIZE; > > - return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, > + return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL, > any_young, any_dirty); > } > > diff --git a/mm/memory.c b/mm/memory.c > index 0f9b32a20e5b7..ab2d6c1425691 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -990,10 +990,10 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma > * by keeping the batching logic separate. > */ > if (unlikely(!*prealloc && folio_test_large(folio) && max_nr != 1)) { > - if (src_vma->vm_flags & VM_SHARED) > - flags |= FPB_IGNORE_DIRTY; > - if (!vma_soft_dirty_enabled(src_vma)) > - flags |= FPB_IGNORE_SOFT_DIRTY; > + if (!(src_vma->vm_flags & VM_SHARED)) > + flags |= FPB_HONOR_DIRTY; > + if (vma_soft_dirty_enabled(src_vma)) > + flags |= FPB_HONOR_SOFT_DIRTY; > > nr = folio_pte_batch(folio, addr, src_pte, pte, max_nr, flags, > &any_writable, NULL, NULL); > @@ -1535,7 +1535,6 @@ static inline int zap_present_ptes(struct mmu_gather *tlb, > struct zap_details *details, int *rss, bool *force_flush, > bool *force_break, bool *any_skipped) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > struct mm_struct *mm = tlb->mm; > struct folio *folio; > struct page *page; > @@ -1565,7 +1564,7 @@ static inline int zap_present_ptes(struct mmu_gather *tlb, > * by keeping the batching logic separate. > */ > if (unlikely(folio_test_large(folio) && max_nr != 1)) { > - nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags, > + nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, 0, > NULL, NULL, NULL); > > zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr, > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index 1ff7b2174eb77..2a25eedc3b1c0 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -675,7 +675,6 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk) > static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, > unsigned long end, struct mm_walk *walk) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > struct vm_area_struct *vma = walk->vma; > struct folio *folio; > struct queue_pages *qp = walk->private; > @@ -713,8 +712,7 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, > continue; > if (folio_test_large(folio) && max_nr != 1) > nr = folio_pte_batch(folio, addr, pte, ptent, > - max_nr, fpb_flags, > - NULL, NULL, NULL); > + max_nr, 0, NULL, NULL, NULL); > /* > * vm_normal_folio() filters out zero pages, but there might > * still be reserved folios to skip, perhaps in a VDSO. > diff --git a/mm/mlock.c b/mm/mlock.c > index 3cb72b579ffd3..2238cdc5eb1c1 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -307,14 +307,13 @@ void munlock_folio(struct folio *folio) > static inline unsigned int folio_mlock_step(struct folio *folio, > pte_t *pte, unsigned long addr, unsigned long end) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > unsigned int count = (end - addr) >> PAGE_SHIFT; > pte_t ptent = ptep_get(pte); > > if (!folio_test_large(folio)) > return 1; > > - return folio_pte_batch(folio, addr, pte, ptent, count, fpb_flags, NULL, > + return folio_pte_batch(folio, addr, pte, ptent, count, 0, NULL, > NULL, NULL); > } > > diff --git a/mm/mremap.c b/mm/mremap.c > index 36585041c760d..d4d3ffc931502 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -173,7 +173,6 @@ static pte_t move_soft_dirty_pte(pte_t pte) > static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr, > pte_t *ptep, pte_t pte, int max_nr) > { > - const fpb_t flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > struct folio *folio; > > if (max_nr == 1) > @@ -183,7 +182,7 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr > if (!folio || !folio_test_large(folio)) > return 1; > > - return folio_pte_batch(folio, addr, ptep, pte, max_nr, flags, NULL, > + return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL, > NULL, NULL); > } > > diff --git a/mm/rmap.c b/mm/rmap.c > index 3b74bb19c11dd..a29d7d29c7283 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -1849,7 +1849,6 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page, > static inline bool can_batch_unmap_folio_ptes(unsigned long addr, > struct folio *folio, pte_t *ptep) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > int max_nr = folio_nr_pages(folio); > pte_t pte = ptep_get(ptep); > > @@ -1860,7 +1859,7 @@ static inline bool can_batch_unmap_folio_ptes(unsigned long addr, > if (pte_pfn(pte) != folio_pfn(folio)) > return false; > > - return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, > + return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL, > NULL, NULL) == max_nr; I hope a later patch gets rid of this NULL, NULL, NULL... :) > } > > -- > 2.49.0 >
On 27.06.25 18:28, Lorenzo Stoakes wrote: > On Fri, Jun 27, 2025 at 01:55:07PM +0200, David Hildenbrand wrote: >> Honoring these PTE bits is the exception, so let's invert the meaning. >> >> With this change, most callers don't have to pass any flags. >> >> No functional change intended. >> >> Signed-off-by: David Hildenbrand <david@redhat.com> > > This is a nice change, it removes a lot of code I really didn't enjoy > looking at for introducing these flags all over the place. > > But a nit on the naming below, I'm not a fan of 'honor' here :) > >> --- >> mm/internal.h | 16 ++++++++-------- >> mm/madvise.c | 3 +-- >> mm/memory.c | 11 +++++------ >> mm/mempolicy.c | 4 +--- >> mm/mlock.c | 3 +-- >> mm/mremap.c | 3 +-- >> mm/rmap.c | 3 +-- >> 7 files changed, 18 insertions(+), 25 deletions(-) >> >> diff --git a/mm/internal.h b/mm/internal.h >> index e84217e27778d..9690c75063881 100644 >> --- a/mm/internal.h >> +++ b/mm/internal.h >> @@ -202,17 +202,17 @@ static inline void vma_close(struct vm_area_struct *vma) >> /* Flags for folio_pte_batch(). */ >> typedef int __bitwise fpb_t; >> >> -/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */ >> -#define FPB_IGNORE_DIRTY ((__force fpb_t)BIT(0)) >> +/* Compare PTEs honoring the dirty bit. */ >> +#define FPB_HONOR_DIRTY ((__force fpb_t)BIT(0)) > > Hm not to be petty but... :) > > I'm not sure I find 'honor' very clear here. Ignore is very clear, 'honor' (God > the British English in me wants to say honour here but stipp :P) doesn't > necessarily tell you what is going to happen. > > Perhaps PROPAGATE? or OBEY? RESPECT? :) -- Cheers, David / dhildenb
On Fri, Jun 27, 2025 at 06:30:13PM +0200, David Hildenbrand wrote: > On 27.06.25 18:28, Lorenzo Stoakes wrote: > > On Fri, Jun 27, 2025 at 01:55:07PM +0200, David Hildenbrand wrote: > > > Honoring these PTE bits is the exception, so let's invert the meaning. > > > > > > With this change, most callers don't have to pass any flags. > > > > > > No functional change intended. > > > > > > Signed-off-by: David Hildenbrand <david@redhat.com> > > > > This is a nice change, it removes a lot of code I really didn't enjoy > > looking at for introducing these flags all over the place. > > > > But a nit on the naming below, I'm not a fan of 'honor' here :) > > > > > --- > > > mm/internal.h | 16 ++++++++-------- > > > mm/madvise.c | 3 +-- > > > mm/memory.c | 11 +++++------ > > > mm/mempolicy.c | 4 +--- > > > mm/mlock.c | 3 +-- > > > mm/mremap.c | 3 +-- > > > mm/rmap.c | 3 +-- > > > 7 files changed, 18 insertions(+), 25 deletions(-) > > > > > > diff --git a/mm/internal.h b/mm/internal.h > > > index e84217e27778d..9690c75063881 100644 > > > --- a/mm/internal.h > > > +++ b/mm/internal.h > > > @@ -202,17 +202,17 @@ static inline void vma_close(struct vm_area_struct *vma) > > > /* Flags for folio_pte_batch(). */ > > > typedef int __bitwise fpb_t; > > > > > > -/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */ > > > -#define FPB_IGNORE_DIRTY ((__force fpb_t)BIT(0)) > > > +/* Compare PTEs honoring the dirty bit. */ > > > +#define FPB_HONOR_DIRTY ((__force fpb_t)BIT(0)) > > > > Hm not to be petty but... :) > > > > I'm not sure I find 'honor' very clear here. Ignore is very clear, 'honor' (God > > the British English in me wants to say honour here but stipp :P) doesn't > > necessarily tell you what is going to happen. > > > > Perhaps PROPAGATE? or OBEY? > > RESPECT? :) 🎵 R-E-S-P-E-C-T find out what it means to me... ;) 🎵 This works too :>) > > -- > Cheers, > > David / dhildenb >
On Fri, Jun 27, 2025 at 05:33:06PM +0100, Lorenzo Stoakes wrote: > On Fri, Jun 27, 2025 at 06:30:13PM +0200, David Hildenbrand wrote: > > On 27.06.25 18:28, Lorenzo Stoakes wrote: > > > On Fri, Jun 27, 2025 at 01:55:07PM +0200, David Hildenbrand wrote: > > > > Honoring these PTE bits is the exception, so let's invert the meaning. > > > > > > > > With this change, most callers don't have to pass any flags. > > > > > > > > No functional change intended. > > > > > > > > Signed-off-by: David Hildenbrand <david@redhat.com> > > > > > > This is a nice change, it removes a lot of code I really didn't enjoy > > > looking at for introducing these flags all over the place. > > > > > > But a nit on the naming below, I'm not a fan of 'honor' here :) > > > > > > > --- > > > > mm/internal.h | 16 ++++++++-------- > > > > mm/madvise.c | 3 +-- > > > > mm/memory.c | 11 +++++------ > > > > mm/mempolicy.c | 4 +--- > > > > mm/mlock.c | 3 +-- > > > > mm/mremap.c | 3 +-- > > > > mm/rmap.c | 3 +-- > > > > 7 files changed, 18 insertions(+), 25 deletions(-) > > > > > > > > diff --git a/mm/internal.h b/mm/internal.h > > > > index e84217e27778d..9690c75063881 100644 > > > > --- a/mm/internal.h > > > > +++ b/mm/internal.h > > > > @@ -202,17 +202,17 @@ static inline void vma_close(struct vm_area_struct *vma) > > > > /* Flags for folio_pte_batch(). */ > > > > typedef int __bitwise fpb_t; > > > > > > > > -/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */ > > > > -#define FPB_IGNORE_DIRTY ((__force fpb_t)BIT(0)) > > > > +/* Compare PTEs honoring the dirty bit. */ > > > > +#define FPB_HONOR_DIRTY ((__force fpb_t)BIT(0)) > > > > > > Hm not to be petty but... :) > > > > > > I'm not sure I find 'honor' very clear here. Ignore is very clear, 'honor' (God > > > the British English in me wants to say honour here but stipp :P) doesn't > > > necessarily tell you what is going to happen. > > > > > > Perhaps PROPAGATE? or OBEY? > > > > RESPECT? :) DONT_IGNORE ;-) > 🎵 R-E-S-P-E-C-T find out what it means to me... ;) 🎵 > > This works too :>) -- Sincerely yours, Mike.
On 29.06.25 10:59, Mike Rapoport wrote: > On Fri, Jun 27, 2025 at 05:33:06PM +0100, Lorenzo Stoakes wrote: >> On Fri, Jun 27, 2025 at 06:30:13PM +0200, David Hildenbrand wrote: >>> On 27.06.25 18:28, Lorenzo Stoakes wrote: >>>> On Fri, Jun 27, 2025 at 01:55:07PM +0200, David Hildenbrand wrote: >>>>> Honoring these PTE bits is the exception, so let's invert the meaning. >>>>> >>>>> With this change, most callers don't have to pass any flags. >>>>> >>>>> No functional change intended. >>>>> >>>>> Signed-off-by: David Hildenbrand <david@redhat.com> >>>> >>>> This is a nice change, it removes a lot of code I really didn't enjoy >>>> looking at for introducing these flags all over the place. >>>> >>>> But a nit on the naming below, I'm not a fan of 'honor' here :) >>>> >>>>> --- >>>>> mm/internal.h | 16 ++++++++-------- >>>>> mm/madvise.c | 3 +-- >>>>> mm/memory.c | 11 +++++------ >>>>> mm/mempolicy.c | 4 +--- >>>>> mm/mlock.c | 3 +-- >>>>> mm/mremap.c | 3 +-- >>>>> mm/rmap.c | 3 +-- >>>>> 7 files changed, 18 insertions(+), 25 deletions(-) >>>>> >>>>> diff --git a/mm/internal.h b/mm/internal.h >>>>> index e84217e27778d..9690c75063881 100644 >>>>> --- a/mm/internal.h >>>>> +++ b/mm/internal.h >>>>> @@ -202,17 +202,17 @@ static inline void vma_close(struct vm_area_struct *vma) >>>>> /* Flags for folio_pte_batch(). */ >>>>> typedef int __bitwise fpb_t; >>>>> >>>>> -/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */ >>>>> -#define FPB_IGNORE_DIRTY ((__force fpb_t)BIT(0)) >>>>> +/* Compare PTEs honoring the dirty bit. */ >>>>> +#define FPB_HONOR_DIRTY ((__force fpb_t)BIT(0)) >>>> >>>> Hm not to be petty but... :) >>>> >>>> I'm not sure I find 'honor' very clear here. Ignore is very clear, 'honor' (God >>>> the British English in me wants to say honour here but stipp :P) doesn't >>>> necessarily tell you what is going to happen. >>>> >>>> Perhaps PROPAGATE? or OBEY? >>> >>> RESPECT? :) > > DONT_IGNORE ;-) Well, yes, that would be an obvious option as well, but I think I'll go with "RESPECT" for now. -- Cheers, David / dhildenb
On Fri, Jun 27, 2025 at 7:55 PM David Hildenbrand <david@redhat.com> wrote: > > Honoring these PTE bits is the exception, so let's invert the meaning. > > With this change, most callers don't have to pass any flags. > > No functional change intended. > LGTM. Feel free to add: Reviewed-by: Lance Yang <lance.yang@linux.dev> Thanks, Lance > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > mm/internal.h | 16 ++++++++-------- > mm/madvise.c | 3 +-- > mm/memory.c | 11 +++++------ > mm/mempolicy.c | 4 +--- > mm/mlock.c | 3 +-- > mm/mremap.c | 3 +-- > mm/rmap.c | 3 +-- > 7 files changed, 18 insertions(+), 25 deletions(-) > > diff --git a/mm/internal.h b/mm/internal.h > index e84217e27778d..9690c75063881 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -202,17 +202,17 @@ static inline void vma_close(struct vm_area_struct *vma) > /* Flags for folio_pte_batch(). */ > typedef int __bitwise fpb_t; > > -/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */ > -#define FPB_IGNORE_DIRTY ((__force fpb_t)BIT(0)) > +/* Compare PTEs honoring the dirty bit. */ > +#define FPB_HONOR_DIRTY ((__force fpb_t)BIT(0)) > > -/* Compare PTEs after pte_clear_soft_dirty(), ignoring the soft-dirty bit. */ > -#define FPB_IGNORE_SOFT_DIRTY ((__force fpb_t)BIT(1)) > +/* Compare PTEs honoring the soft-dirty bit. */ > +#define FPB_HONOR_SOFT_DIRTY ((__force fpb_t)BIT(1)) > > static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags) > { > - if (flags & FPB_IGNORE_DIRTY) > + if (!(flags & FPB_HONOR_DIRTY)) > pte = pte_mkclean(pte); > - if (likely(flags & FPB_IGNORE_SOFT_DIRTY)) > + if (likely(!(flags & FPB_HONOR_SOFT_DIRTY))) > pte = pte_clear_soft_dirty(pte); > return pte_wrprotect(pte_mkold(pte)); > } > @@ -236,8 +236,8 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags) > * pages of the same large folio. > * > * All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN, > - * the accessed bit, writable bit, dirty bit (with FPB_IGNORE_DIRTY) and > - * soft-dirty bit (with FPB_IGNORE_SOFT_DIRTY). > + * the accessed bit, writable bit, dirty bit (unless FPB_HONOR_DIRTY is set) and > + * soft-dirty bit (unless FPB_HONOR_SOFT_DIRTY is set). > * > * start_ptep must map any page of the folio. max_nr must be at least one and > * must be limited by the caller so scanning cannot exceed a single page table. > diff --git a/mm/madvise.c b/mm/madvise.c > index e61e32b2cd91f..661bb743d2216 100644 > --- a/mm/madvise.c > +++ b/mm/madvise.c > @@ -347,10 +347,9 @@ static inline int madvise_folio_pte_batch(unsigned long addr, unsigned long end, > pte_t pte, bool *any_young, > bool *any_dirty) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > int max_nr = (end - addr) / PAGE_SIZE; > > - return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, > + return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL, > any_young, any_dirty); > } > > diff --git a/mm/memory.c b/mm/memory.c > index 0f9b32a20e5b7..ab2d6c1425691 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -990,10 +990,10 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma > * by keeping the batching logic separate. > */ > if (unlikely(!*prealloc && folio_test_large(folio) && max_nr != 1)) { > - if (src_vma->vm_flags & VM_SHARED) > - flags |= FPB_IGNORE_DIRTY; > - if (!vma_soft_dirty_enabled(src_vma)) > - flags |= FPB_IGNORE_SOFT_DIRTY; > + if (!(src_vma->vm_flags & VM_SHARED)) > + flags |= FPB_HONOR_DIRTY; > + if (vma_soft_dirty_enabled(src_vma)) > + flags |= FPB_HONOR_SOFT_DIRTY; > > nr = folio_pte_batch(folio, addr, src_pte, pte, max_nr, flags, > &any_writable, NULL, NULL); > @@ -1535,7 +1535,6 @@ static inline int zap_present_ptes(struct mmu_gather *tlb, > struct zap_details *details, int *rss, bool *force_flush, > bool *force_break, bool *any_skipped) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > struct mm_struct *mm = tlb->mm; > struct folio *folio; > struct page *page; > @@ -1565,7 +1564,7 @@ static inline int zap_present_ptes(struct mmu_gather *tlb, > * by keeping the batching logic separate. > */ > if (unlikely(folio_test_large(folio) && max_nr != 1)) { > - nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags, > + nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, 0, > NULL, NULL, NULL); > > zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr, > diff --git a/mm/mempolicy.c b/mm/mempolicy.c > index 1ff7b2174eb77..2a25eedc3b1c0 100644 > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -675,7 +675,6 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk) > static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, > unsigned long end, struct mm_walk *walk) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > struct vm_area_struct *vma = walk->vma; > struct folio *folio; > struct queue_pages *qp = walk->private; > @@ -713,8 +712,7 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, > continue; > if (folio_test_large(folio) && max_nr != 1) > nr = folio_pte_batch(folio, addr, pte, ptent, > - max_nr, fpb_flags, > - NULL, NULL, NULL); > + max_nr, 0, NULL, NULL, NULL); > /* > * vm_normal_folio() filters out zero pages, but there might > * still be reserved folios to skip, perhaps in a VDSO. > diff --git a/mm/mlock.c b/mm/mlock.c > index 3cb72b579ffd3..2238cdc5eb1c1 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -307,14 +307,13 @@ void munlock_folio(struct folio *folio) > static inline unsigned int folio_mlock_step(struct folio *folio, > pte_t *pte, unsigned long addr, unsigned long end) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > unsigned int count = (end - addr) >> PAGE_SHIFT; > pte_t ptent = ptep_get(pte); > > if (!folio_test_large(folio)) > return 1; > > - return folio_pte_batch(folio, addr, pte, ptent, count, fpb_flags, NULL, > + return folio_pte_batch(folio, addr, pte, ptent, count, 0, NULL, > NULL, NULL); > } > > diff --git a/mm/mremap.c b/mm/mremap.c > index 36585041c760d..d4d3ffc931502 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -173,7 +173,6 @@ static pte_t move_soft_dirty_pte(pte_t pte) > static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr, > pte_t *ptep, pte_t pte, int max_nr) > { > - const fpb_t flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > struct folio *folio; > > if (max_nr == 1) > @@ -183,7 +182,7 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr > if (!folio || !folio_test_large(folio)) > return 1; > > - return folio_pte_batch(folio, addr, ptep, pte, max_nr, flags, NULL, > + return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL, > NULL, NULL); > } > > diff --git a/mm/rmap.c b/mm/rmap.c > index 3b74bb19c11dd..a29d7d29c7283 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -1849,7 +1849,6 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page, > static inline bool can_batch_unmap_folio_ptes(unsigned long addr, > struct folio *folio, pte_t *ptep) > { > - const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; > int max_nr = folio_nr_pages(folio); > pte_t pte = ptep_get(ptep); > > @@ -1860,7 +1859,7 @@ static inline bool can_batch_unmap_folio_ptes(unsigned long addr, > if (pte_pfn(pte) != folio_pfn(folio)) > return false; > > - return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, > + return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL, > NULL, NULL) == max_nr; > } > > -- > 2.49.0 > >
© 2016 - 2025 Red Hat, Inc.