Rather than updating start and a confusing local parameter 'tmp' in
madvise_walk_vmas(), instead store the current range being operated upon in
the struct madvise_behavior helper object in a range pair and use this
consistently in all operations.
This makes it clearer what is going on and opens the door to further
cleanup now we store state regarding what is currently being operated upon
here.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
mm/madvise.c | 101 ++++++++++++++++++++++++++++-----------------------
1 file changed, 55 insertions(+), 46 deletions(-)
diff --git a/mm/madvise.c b/mm/madvise.c
index 47485653c2a1..6faa38b92111 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -58,17 +58,26 @@ enum madvise_lock_mode {
MADVISE_VMA_READ_LOCK,
};
+struct madvise_behavior_range {
+ unsigned long start, end;
+};
+
struct madvise_behavior {
struct mm_struct *mm;
int behavior;
struct mmu_gather *tlb;
enum madvise_lock_mode lock_mode;
struct anon_vma_name *anon_name;
+
+ /*
+ * The range over which the behaviour is currently being applied. If
+ * traversing multiple VMAs, this is updated for each.
+ */
+ struct madvise_behavior_range range;
};
#ifdef CONFIG_ANON_VMA_NAME
-static int madvise_walk_vmas(unsigned long start, unsigned long end,
- struct madvise_behavior *madv_behavior);
+static int madvise_walk_vmas(struct madvise_behavior *madv_behavior);
struct anon_vma_name *anon_vma_name_alloc(const char *name)
{
@@ -149,8 +158,9 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
madv_behavior.behavior =
anon_name ? __MADV_SET_ANON_VMA_NAME : __MADV_CLEAR_ANON_VMA_NAME;
-
- return madvise_walk_vmas(start, end, &madv_behavior);
+ madv_behavior.range.start = start;
+ madv_behavior.range.end = end;
+ return madvise_walk_vmas(&madv_behavior);
}
static bool is_anon_vma_name(int behavior)
@@ -1012,12 +1022,13 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
return -EINVAL;
}
-static long madvise_populate(unsigned long start, unsigned long end,
- struct madvise_behavior *madv_behavior)
+static long madvise_populate(struct madvise_behavior *madv_behavior)
{
struct mm_struct *mm = madv_behavior->mm;
const bool write = madv_behavior->behavior == MADV_POPULATE_WRITE;
int locked = 1;
+ unsigned long start = madv_behavior->range.start;
+ unsigned long end = madv_behavior->range.end;
long pages;
while (start < end) {
@@ -1308,12 +1319,13 @@ static long madvise_guard_remove(struct vm_area_struct *vma,
*/
static int madvise_vma_behavior(struct vm_area_struct *vma,
struct vm_area_struct **prev,
- unsigned long start, unsigned long end,
struct madvise_behavior *madv_behavior)
{
int behavior = madv_behavior->behavior;
struct anon_vma_name *anon_name = madv_behavior->anon_name;
vm_flags_t new_flags = vma->vm_flags;
+ unsigned long start = madv_behavior->range.start;
+ unsigned long end = madv_behavior->range.end;
int error;
if (unlikely(!can_modify_vma_madv(vma, behavior)))
@@ -1425,10 +1437,11 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
/*
* Error injection support for memory error handling.
*/
-static int madvise_inject_error(unsigned long start, unsigned long end,
- struct madvise_behavior *madv_behavior)
+static int madvise_inject_error(struct madvise_behavior *madv_behavior)
{
unsigned long size;
+ unsigned long start = madv_behavior->range.start;
+ unsigned long end = madv_behavior->range.end;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1482,8 +1495,7 @@ static bool is_memory_failure(struct madvise_behavior *madv_behavior)
#else
-static int madvise_inject_error(unsigned long start, unsigned long end,
- struct madvise_behavior *madv_behavior)
+static int madvise_inject_error(struct madvise_behavior *madv_behavior)
{
return 0;
}
@@ -1565,20 +1577,20 @@ static bool process_madvise_remote_valid(int behavior)
* If a VMA read lock could not be acquired, we return NULL and expect caller to
* fallback to mmap lock behaviour.
*/
-static struct vm_area_struct *try_vma_read_lock(struct mm_struct *mm,
- struct madvise_behavior *madv_behavior,
- unsigned long start, unsigned long end)
+static
+struct vm_area_struct *try_vma_read_lock(struct madvise_behavior *madv_behavior)
{
+ struct mm_struct *mm = madv_behavior->mm;
struct vm_area_struct *vma;
- vma = lock_vma_under_rcu(mm, start);
+ vma = lock_vma_under_rcu(mm, madv_behavior->range.start);
if (!vma)
goto take_mmap_read_lock;
/*
* Must span only a single VMA; uffd and remote processes are
* unsupported.
*/
- if (end > vma->vm_end || current->mm != mm ||
+ if (madv_behavior->range.end > vma->vm_end || current->mm != mm ||
userfaultfd_armed(vma)) {
vma_end_read(vma);
goto take_mmap_read_lock;
@@ -1600,13 +1612,13 @@ static struct vm_area_struct *try_vma_read_lock(struct mm_struct *mm,
* Must be called with the mmap_lock held for reading or writing.
*/
static
-int madvise_walk_vmas(unsigned long start, unsigned long end,
- struct madvise_behavior *madv_behavior)
+int madvise_walk_vmas(struct madvise_behavior *madv_behavior)
{
struct mm_struct *mm = madv_behavior->mm;
+ struct madvise_behavior_range *range = &madv_behavior->range;
+ unsigned long end = range->end;
struct vm_area_struct *vma;
struct vm_area_struct *prev;
- unsigned long tmp;
int unmapped_error = 0;
int error;
@@ -1615,11 +1627,10 @@ int madvise_walk_vmas(unsigned long start, unsigned long end,
* tentatively, avoiding walking VMAs.
*/
if (madv_behavior->lock_mode == MADVISE_VMA_READ_LOCK) {
- vma = try_vma_read_lock(mm, madv_behavior, start, end);
+ vma = try_vma_read_lock(madv_behavior);
if (vma) {
prev = vma;
- error = madvise_vma_behavior(vma, &prev, start, end,
- madv_behavior);
+ error = madvise_vma_behavior(vma, &prev, madv_behavior);
vma_end_read(vma);
return error;
}
@@ -1630,8 +1641,8 @@ int madvise_walk_vmas(unsigned long start, unsigned long end,
* ranges, just ignore them, but return -ENOMEM at the end.
* - different from the way of handling in mlock etc.
*/
- vma = find_vma_prev(mm, start, &prev);
- if (vma && start > vma->vm_start)
+ vma = find_vma_prev(mm, range->start, &prev);
+ if (vma && range->start > vma->vm_start)
prev = vma;
for (;;) {
@@ -1640,32 +1651,29 @@ int madvise_walk_vmas(unsigned long start, unsigned long end,
return -ENOMEM;
/* Here start < (end|vma->vm_end). */
- if (start < vma->vm_start) {
+ if (range->start < vma->vm_start) {
unmapped_error = -ENOMEM;
- start = vma->vm_start;
- if (start >= end)
+ range->start = vma->vm_start;
+ if (range->start >= end)
break;
}
- /* Here vma->vm_start <= start < (end|vma->vm_end) */
- tmp = vma->vm_end;
- if (end < tmp)
- tmp = end;
+ /* Here vma->vm_start <= range->start < (end|vma->vm_end) */
+ range->end = min(vma->vm_end, end);
- /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
- error = madvise_vma_behavior(vma, &prev, start, tmp,
- madv_behavior);
+ /* Here vma->vm_start <= range->start < range->end <= (end|vma->vm_end). */
+ error = madvise_vma_behavior(vma, &prev, madv_behavior);
if (error)
return error;
- start = tmp;
- if (prev && start < prev->vm_end)
- start = prev->vm_end;
- if (start >= end)
+ range->start = range->end;
+ if (prev && range->start < prev->vm_end)
+ range->start = prev->vm_end;
+ if (range->start >= range->end)
break;
if (prev)
vma = find_vma(mm, prev->vm_end);
else /* madvise_remove dropped mmap_lock */
- vma = find_vma(mm, start);
+ vma = find_vma(mm, range->start);
}
return unmapped_error;
@@ -1846,22 +1854,23 @@ static int madvise_do_behavior(unsigned long start, size_t len_in,
struct madvise_behavior *madv_behavior)
{
struct blk_plug plug;
- unsigned long end;
int error;
+ struct madvise_behavior_range *range = &madv_behavior->range;
if (is_memory_failure(madv_behavior)) {
- end = start + len_in;
- return madvise_inject_error(start, end, madv_behavior);
+ range->start = start;
+ range->end = start + len_in;
+ return madvise_inject_error(madv_behavior);
}
- start = get_untagged_addr(madv_behavior->mm, start);
- end = start + PAGE_ALIGN(len_in);
+ range->start = get_untagged_addr(madv_behavior->mm, start);
+ range->end = range->start + PAGE_ALIGN(len_in);
blk_start_plug(&plug);
if (is_madvise_populate(madv_behavior))
- error = madvise_populate(start, end, madv_behavior);
+ error = madvise_populate(madv_behavior);
else
- error = madvise_walk_vmas(start, end, madv_behavior);
+ error = madvise_walk_vmas(madv_behavior);
blk_finish_plug(&plug);
return error;
}
--
2.49.0
On 6/19/25 22:26, Lorenzo Stoakes wrote: > Rather than updating start and a confusing local parameter 'tmp' in > madvise_walk_vmas(), instead store the current range being operated upon in > the struct madvise_behavior helper object in a range pair and use this > consistently in all operations. Yeah much better but it still took me a bit to understand that "end" is now the original range->end that doesn't change during the iterations. Maybe make it const and comment? > This makes it clearer what is going on and opens the door to further > cleanup now we store state regarding what is currently being operated upon > here. > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> > --- > mm/madvise.c | 101 ++++++++++++++++++++++++++++----------------------- > 1 file changed, 55 insertions(+), 46 deletions(-) > > diff --git a/mm/madvise.c b/mm/madvise.c > index 47485653c2a1..6faa38b92111 100644 > --- a/mm/madvise.c > +++ b/mm/madvise.c > @@ -58,17 +58,26 @@ enum madvise_lock_mode { > MADVISE_VMA_READ_LOCK, > }; > > +struct madvise_behavior_range { > + unsigned long start, end; I also thought multiple names on one line is only done for local variables, but always separate declarations in structs. But I don't know if it's documented as such or if there are pre-existing counter examples. Consider it a non-binding agreement with Zi :) Thanks!
On Fri, Jun 20, 2025 at 03:49:31PM +0200, Vlastimil Babka wrote: > On 6/19/25 22:26, Lorenzo Stoakes wrote: > > Rather than updating start and a confusing local parameter 'tmp' in > > madvise_walk_vmas(), instead store the current range being operated upon in > > the struct madvise_behavior helper object in a range pair and use this > > consistently in all operations. > > Yeah much better but it still took me a bit to understand that "end" is now > the original range->end that doesn't change during the iterations. Maybe > make it const and comment? Sorry missed this, will adjust on respin.
On Fri, Jun 20, 2025 at 03:49:31PM +0200, Vlastimil Babka wrote: > On 6/19/25 22:26, Lorenzo Stoakes wrote: > > Rather than updating start and a confusing local parameter 'tmp' in > > madvise_walk_vmas(), instead store the current range being operated upon in > > the struct madvise_behavior helper object in a range pair and use this > > consistently in all operations. > > Yeah much better but it still took me a bit to understand that "end" is now > the original range->end that doesn't change during the iterations. Maybe > make it const and comment? > > > This makes it clearer what is going on and opens the door to further > > cleanup now we store state regarding what is currently being operated upon > > here. > > > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Thanks! > > > --- > > mm/madvise.c | 101 ++++++++++++++++++++++++++++----------------------- > > 1 file changed, 55 insertions(+), 46 deletions(-) > > > > diff --git a/mm/madvise.c b/mm/madvise.c > > index 47485653c2a1..6faa38b92111 100644 > > --- a/mm/madvise.c > > +++ b/mm/madvise.c > > @@ -58,17 +58,26 @@ enum madvise_lock_mode { > > MADVISE_VMA_READ_LOCK, > > }; > > > > +struct madvise_behavior_range { > > + unsigned long start, end; > > I also thought multiple names on one line is only done for local variables, > but always separate declarations in structs. But I don't know if it's > documented as such or if there are pre-existing counter examples. Consider > it a non-binding agreement with Zi :) Hm I didn't realise that was a thing at struct level, and since Zi also highlights this I'll change it :) Sorry Zi - my confusion, I was thinking comma-separated wasn't just for locals. > > Thanks!
On 19 Jun 2025, at 16:26, Lorenzo Stoakes wrote: > Rather than updating start and a confusing local parameter 'tmp' in > madvise_walk_vmas(), instead store the current range being operated upon in > the struct madvise_behavior helper object in a range pair and use this > consistently in all operations. > > This makes it clearer what is going on and opens the door to further > cleanup now we store state regarding what is currently being operated upon > here. > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > --- > mm/madvise.c | 101 ++++++++++++++++++++++++++++----------------------- > 1 file changed, 55 insertions(+), 46 deletions(-) > > diff --git a/mm/madvise.c b/mm/madvise.c > index 47485653c2a1..6faa38b92111 100644 > --- a/mm/madvise.c > +++ b/mm/madvise.c > @@ -58,17 +58,26 @@ enum madvise_lock_mode { > MADVISE_VMA_READ_LOCK, > }; > > +struct madvise_behavior_range { > + unsigned long start, end; > +}; > + Declare members separately? <snip> > @@ -1425,10 +1437,11 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, > /* > * Error injection support for memory error handling. > */ > -static int madvise_inject_error(unsigned long start, unsigned long end, > - struct madvise_behavior *madv_behavior) > +static int madvise_inject_error(struct madvise_behavior *madv_behavior) > { > unsigned long size; > + unsigned long start = madv_behavior->range.start; > + unsigned long end = madv_behavior->range.end; > > if (!capable(CAP_SYS_ADMIN)) > return -EPERM; > @@ -1482,8 +1495,7 @@ static bool is_memory_failure(struct madvise_behavior *madv_behavior) > > #else > > -static int madvise_inject_error(unsigned long start, unsigned long end, > - struct madvise_behavior *madv_behavior) > +static int madvise_inject_error(struct madvise_behavior *madv_behavior) > { > return 0; > } OK, now I get why you pass struct madvise_behavior to madvise_inject_error() in Patch 2. The changes make sense to me now. Maybe delay that conversation in this one. > @@ -1565,20 +1577,20 @@ static bool process_madvise_remote_valid(int behavior) > * If a VMA read lock could not be acquired, we return NULL and expect caller to > * fallback to mmap lock behaviour. > */ > -static struct vm_area_struct *try_vma_read_lock(struct mm_struct *mm, > - struct madvise_behavior *madv_behavior, > - unsigned long start, unsigned long end) > +static > +struct vm_area_struct *try_vma_read_lock(struct madvise_behavior *madv_behavior) > { > + struct mm_struct *mm = madv_behavior->mm; Is the struct mm_struct removal missed in Patch 2? <snip> > @@ -1846,22 +1854,23 @@ static int madvise_do_behavior(unsigned long start, size_t len_in, > struct madvise_behavior *madv_behavior) > { > struct blk_plug plug; > - unsigned long end; > int error; > + struct madvise_behavior_range *range = &madv_behavior->range; > > if (is_memory_failure(madv_behavior)) { > - end = start + len_in; > - return madvise_inject_error(start, end, madv_behavior); > + range->start = start; > + range->end = start + len_in; > + return madvise_inject_error(madv_behavior); > } > > - start = get_untagged_addr(madv_behavior->mm, start); > - end = start + PAGE_ALIGN(len_in); > + range->start = get_untagged_addr(madv_behavior->mm, start); > + range->end = range->start + PAGE_ALIGN(len_in); > > blk_start_plug(&plug); > if (is_madvise_populate(madv_behavior)) > - error = madvise_populate(start, end, madv_behavior); > + error = madvise_populate(madv_behavior); > else > - error = madvise_walk_vmas(start, end, madv_behavior); > + error = madvise_walk_vmas(madv_behavior); > blk_finish_plug(&plug); > return error; > } We almost can pass just struct madvise_behavior to madvise_do_behavior(). I wonder why memory_failure behaves differently. -- Best Regards, Yan, Zi
On Thu, Jun 19, 2025 at 09:54:11PM -0400, Zi Yan wrote: > On 19 Jun 2025, at 16:26, Lorenzo Stoakes wrote: > > > Rather than updating start and a confusing local parameter 'tmp' in > > madvise_walk_vmas(), instead store the current range being operated upon in > > the struct madvise_behavior helper object in a range pair and use this > > consistently in all operations. > > > > This makes it clearer what is going on and opens the door to further > > cleanup now we store state regarding what is currently being operated upon > > here. > > > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > --- > > mm/madvise.c | 101 ++++++++++++++++++++++++++++----------------------- > > 1 file changed, 55 insertions(+), 46 deletions(-) > > > > diff --git a/mm/madvise.c b/mm/madvise.c > > index 47485653c2a1..6faa38b92111 100644 > > --- a/mm/madvise.c > > +++ b/mm/madvise.c > > @@ -58,17 +58,26 @@ enum madvise_lock_mode { > > MADVISE_VMA_READ_LOCK, > > }; > > > > +struct madvise_behavior_range { > > + unsigned long start, end; > > +}; > > + > > Declare members separately? Can do, but this is one of those subject things where everyone has different views, if I did it the other way no doubt somebody else would comment about declaring together :P I think as a range here it's not a big deal unless you feel strongly about it? > > <snip> > > > @@ -1425,10 +1437,11 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, > > /* > > * Error injection support for memory error handling. > > */ > > -static int madvise_inject_error(unsigned long start, unsigned long end, > > - struct madvise_behavior *madv_behavior) > > +static int madvise_inject_error(struct madvise_behavior *madv_behavior) > > { > > unsigned long size; > > + unsigned long start = madv_behavior->range.start; > > + unsigned long end = madv_behavior->range.end; > > > > if (!capable(CAP_SYS_ADMIN)) > > return -EPERM; > > @@ -1482,8 +1495,7 @@ static bool is_memory_failure(struct madvise_behavior *madv_behavior) > > > > #else > > > > -static int madvise_inject_error(unsigned long start, unsigned long end, > > - struct madvise_behavior *madv_behavior) > > +static int madvise_inject_error(struct madvise_behavior *madv_behavior) > > { > > return 0; > > } > > OK, now I get why you pass struct madvise_behavior to madvise_inject_error() > in Patch 2. The changes make sense to me now. Maybe delay that conversation > in this one. I think it's valuable there because otherwise all the function invocations were inconsistent, but after 2/5 become completely consistent. I mention this in the commit message and I think it's valuable so you're not doing: if (foo) bar(x, y, z) if (blah) baz(y, x, z) etc. When you quickly read through it's easy to get confused/lost as to what's going on, whereas if they all have the same signatures it's very clear you're offloading the heavy lifting to each function. > > > > > @@ -1565,20 +1577,20 @@ static bool process_madvise_remote_valid(int behavior) > > * If a VMA read lock could not be acquired, we return NULL and expect caller to > > * fallback to mmap lock behaviour. > > */ > > -static struct vm_area_struct *try_vma_read_lock(struct mm_struct *mm, > > - struct madvise_behavior *madv_behavior, > > - unsigned long start, unsigned long end) > > +static > > +struct vm_area_struct *try_vma_read_lock(struct madvise_behavior *madv_behavior) > > { > > + struct mm_struct *mm = madv_behavior->mm; > > Is the struct mm_struct removal missed in Patch 2? Yeah, I will go back and put it in on respin. > > > <snip> > > > @@ -1846,22 +1854,23 @@ static int madvise_do_behavior(unsigned long start, size_t len_in, > > struct madvise_behavior *madv_behavior) > > { > > struct blk_plug plug; > > - unsigned long end; > > int error; > > + struct madvise_behavior_range *range = &madv_behavior->range; > > > > if (is_memory_failure(madv_behavior)) { > > - end = start + len_in; > > - return madvise_inject_error(start, end, madv_behavior); > > + range->start = start; > > + range->end = start + len_in; > > + return madvise_inject_error(madv_behavior); > > } > > > > - start = get_untagged_addr(madv_behavior->mm, start); > > - end = start + PAGE_ALIGN(len_in); > > + range->start = get_untagged_addr(madv_behavior->mm, start); > > + range->end = range->start + PAGE_ALIGN(len_in); > > > > blk_start_plug(&plug); > > if (is_madvise_populate(madv_behavior)) > > - error = madvise_populate(start, end, madv_behavior); > > + error = madvise_populate(madv_behavior); > > else > > - error = madvise_walk_vmas(start, end, madv_behavior); > > + error = madvise_walk_vmas(madv_behavior); > > blk_finish_plug(&plug); > > return error; > > } > > We almost can pass just struct madvise_behavior to madvise_do_behavior(). > I wonder why memory_failure behaves differently. There's complexity around the start, end stuff (Barry bumped into some of this) and I don't want to mess with that in this series. This series is meant to have no functional changes. > > -- > Best Regards, > Yan, Zi
On 19 Jun 2025, at 21:54, Zi Yan wrote: > On 19 Jun 2025, at 16:26, Lorenzo Stoakes wrote: > >> Rather than updating start and a confusing local parameter 'tmp' in >> madvise_walk_vmas(), instead store the current range being operated upon in >> the struct madvise_behavior helper object in a range pair and use this >> consistently in all operations. >> >> This makes it clearer what is going on and opens the door to further >> cleanup now we store state regarding what is currently being operated upon >> here. >> >> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> >> --- >> mm/madvise.c | 101 ++++++++++++++++++++++++++++----------------------- >> 1 file changed, 55 insertions(+), 46 deletions(-) >> >> diff --git a/mm/madvise.c b/mm/madvise.c >> index 47485653c2a1..6faa38b92111 100644 >> --- a/mm/madvise.c >> +++ b/mm/madvise.c >> @@ -58,17 +58,26 @@ enum madvise_lock_mode { >> MADVISE_VMA_READ_LOCK, >> }; >> >> +struct madvise_behavior_range { >> + unsigned long start, end; >> +}; >> + > > Declare members separately? > > <snip> > >> @@ -1425,10 +1437,11 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, >> /* >> * Error injection support for memory error handling. >> */ >> -static int madvise_inject_error(unsigned long start, unsigned long end, >> - struct madvise_behavior *madv_behavior) >> +static int madvise_inject_error(struct madvise_behavior *madv_behavior) >> { >> unsigned long size; >> + unsigned long start = madv_behavior->range.start; >> + unsigned long end = madv_behavior->range.end; >> >> if (!capable(CAP_SYS_ADMIN)) >> return -EPERM; >> @@ -1482,8 +1495,7 @@ static bool is_memory_failure(struct madvise_behavior *madv_behavior) >> >> #else >> >> -static int madvise_inject_error(unsigned long start, unsigned long end, >> - struct madvise_behavior *madv_behavior) >> +static int madvise_inject_error(struct madvise_behavior *madv_behavior) >> { >> return 0; >> } > > OK, now I get why you pass struct madvise_behavior to madvise_inject_error() > in Patch 2. The changes make sense to me now. Maybe delay that conversation > in this one. > > > >> @@ -1565,20 +1577,20 @@ static bool process_madvise_remote_valid(int behavior) >> * If a VMA read lock could not be acquired, we return NULL and expect caller to >> * fallback to mmap lock behaviour. >> */ >> -static struct vm_area_struct *try_vma_read_lock(struct mm_struct *mm, >> - struct madvise_behavior *madv_behavior, >> - unsigned long start, unsigned long end) >> +static >> +struct vm_area_struct *try_vma_read_lock(struct madvise_behavior *madv_behavior) >> { >> + struct mm_struct *mm = madv_behavior->mm; > > Is the struct mm_struct removal missed in Patch 2? > > > <snip> > >> @@ -1846,22 +1854,23 @@ static int madvise_do_behavior(unsigned long start, size_t len_in, >> struct madvise_behavior *madv_behavior) >> { >> struct blk_plug plug; >> - unsigned long end; >> int error; >> + struct madvise_behavior_range *range = &madv_behavior->range; >> >> if (is_memory_failure(madv_behavior)) { >> - end = start + len_in; >> - return madvise_inject_error(start, end, madv_behavior); >> + range->start = start; >> + range->end = start + len_in; >> + return madvise_inject_error(madv_behavior); >> } >> >> - start = get_untagged_addr(madv_behavior->mm, start); >> - end = start + PAGE_ALIGN(len_in); >> + range->start = get_untagged_addr(madv_behavior->mm, start); >> + range->end = range->start + PAGE_ALIGN(len_in); >> >> blk_start_plug(&plug); >> if (is_madvise_populate(madv_behavior)) >> - error = madvise_populate(start, end, madv_behavior); >> + error = madvise_populate(madv_behavior); >> else >> - error = madvise_walk_vmas(start, end, madv_behavior); >> + error = madvise_walk_vmas(madv_behavior); >> blk_finish_plug(&plug); >> return error; >> } > > We almost can pass just struct madvise_behavior to madvise_do_behavior(). > I wonder why memory_failure behaves differently. Based on git history, it seems that no one paid attention to madvise_inject_error() and the [start, start + len_in] has never been changed since it was added back from 2009. OK, it seems that Kirill (cc'd) moved start = untagged_addr(start); from before madvise_inject_error() to after it at commit 428e106ae1ad ("mm: Introduce untagged_addr_remote()"). It changed code behavior. So memory_failure should get the same range as others, meaning madvise_do_behavior() can just take struct madvise_behavior and the range can be set at the call sites. -- Best Regards, Yan, Zi
On Thu, Jun 19, 2025 at 10:13:19PM -0400, Zi Yan wrote: > On 19 Jun 2025, at 21:54, Zi Yan wrote: > >> @@ -1846,22 +1854,23 @@ static int madvise_do_behavior(unsigned long start, size_t len_in, > >> struct madvise_behavior *madv_behavior) > >> { > >> struct blk_plug plug; > >> - unsigned long end; > >> int error; > >> + struct madvise_behavior_range *range = &madv_behavior->range; > >> > >> if (is_memory_failure(madv_behavior)) { > >> - end = start + len_in; > >> - return madvise_inject_error(start, end, madv_behavior); > >> + range->start = start; > >> + range->end = start + len_in; > >> + return madvise_inject_error(madv_behavior); > >> } > >> > >> - start = get_untagged_addr(madv_behavior->mm, start); > >> - end = start + PAGE_ALIGN(len_in); > >> + range->start = get_untagged_addr(madv_behavior->mm, start); > >> + range->end = range->start + PAGE_ALIGN(len_in); > >> > >> blk_start_plug(&plug); > >> if (is_madvise_populate(madv_behavior)) > >> - error = madvise_populate(start, end, madv_behavior); > >> + error = madvise_populate(madv_behavior); > >> else > >> - error = madvise_walk_vmas(start, end, madv_behavior); > >> + error = madvise_walk_vmas(madv_behavior); > >> blk_finish_plug(&plug); > >> return error; > >> } > > > > We almost can pass just struct madvise_behavior to madvise_do_behavior(). > > I wonder why memory_failure behaves differently. > > Based on git history, it seems that no one paid attention to > madvise_inject_error() and the [start, start + len_in] has never been > changed since it was added back from 2009. > > OK, it seems that Kirill (cc'd) moved start = untagged_addr(start); from > before madvise_inject_error() to after it at commit 428e106ae1ad > ("mm: Introduce untagged_addr_remote()"). It changed code behavior. > > So memory_failure should get the same range as others, meaning > madvise_do_behavior() can just take struct madvise_behavior > and the range can be set at the call sites. Well it's also page aligned in other cases right? Anyway overall I'm not touching that in this series, sorry. This is a clean up, and this stuff is a functional change that needs to be carefully thought out. So this is legitimately I think one for a follow-up. But this series makes it easier to fix up later :)
© 2016 - 2025 Red Hat, Inc.