From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
Open code the do_vmi_align_munmap() call so that it can be broken up
later in the series.
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
---
mm/mmap.c | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c
index e9858ca8bbd4..f5b33de4e717 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2915,6 +2915,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
struct vm_area_struct *next, *prev, *merge;
pgoff_t pglen = len >> PAGE_SHIFT;
unsigned long charged = 0;
+ struct vma_munmap_struct vms;
+ struct ma_state mas_detach;
+ struct maple_tree mt_detach;
unsigned long end = addr + len;
unsigned long merge_start = addr, merge_end = end;
bool writable_file_mapping = false;
@@ -2947,9 +2950,24 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
/* Find the first overlapping VMA */
vma = vma_find(&vmi, end);
if (vma) {
- if (do_vmi_align_munmap(&vmi, vma, mm, addr, end, uf, false))
+ mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
+ mt_on_stack(mt_detach);
+ mas_init(&mas_detach, &mt_detach, /* addr = */ 0);
+ init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false);
+ if (vms_gather_munmap_vmas(&vms, &mas_detach))
+ return -ENOMEM;
+
+ if (vma_iter_clear_gfp(&vmi, addr, end, GFP_KERNEL))
return -ENOMEM;
+
+ vms_complete_munmap_vmas(&vms, &mas_detach);
+ next = vms.next;
+ prev = vms.prev;
+ vma_prev(&vmi);
vma = NULL;
+ } else {
+ next = vma_next(&vmi);
+ prev = vma_prev(&vmi);
}
/*
@@ -2962,8 +2980,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vm_flags |= VM_ACCOUNT;
}
- next = vma_next(&vmi);
- prev = vma_prev(&vmi);
if (vm_flags & VM_SPECIAL) {
if (prev)
vma_iter_next_range(&vmi);
--
2.43.0
On Thu, Jul 04, 2024 at 02:27:11PM GMT, Liam R. Howlett wrote:
> From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
>
> Open code the do_vmi_align_munmap() call so that it can be broken up
> later in the series.
>
> Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
> ---
> mm/mmap.c | 22 +++++++++++++++++++---
> 1 file changed, 19 insertions(+), 3 deletions(-)
>
> diff --git a/mm/mmap.c b/mm/mmap.c
> index e9858ca8bbd4..f5b33de4e717 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2915,6 +2915,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> struct vm_area_struct *next, *prev, *merge;
> pgoff_t pglen = len >> PAGE_SHIFT;
> unsigned long charged = 0;
> + struct vma_munmap_struct vms;
> + struct ma_state mas_detach;
> + struct maple_tree mt_detach;
> unsigned long end = addr + len;
> unsigned long merge_start = addr, merge_end = end;
> bool writable_file_mapping = false;
> @@ -2947,9 +2950,24 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> /* Find the first overlapping VMA */
> vma = vma_find(&vmi, end);
> if (vma) {
> - if (do_vmi_align_munmap(&vmi, vma, mm, addr, end, uf, false))
> + mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
> + mt_on_stack(mt_detach);
> + mas_init(&mas_detach, &mt_detach, /* addr = */ 0);
I'm guessing this is exactly equivalent (or equivalent enough for our purposes
here) to the MA_STATE() call in do_vmi_align_munmap()?
Checking the two against each other it seems that it is indeed.
> + init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false);
> + if (vms_gather_munmap_vmas(&vms, &mas_detach))
> + return -ENOMEM;
In do_vmi_align_munmap() we also invoke vmalidate_mm(), why aren't we doing that
here?
> +
> + if (vma_iter_clear_gfp(&vmi, addr, end, GFP_KERNEL))
> return -ENOMEM;
Same here.
> +
> + vms_complete_munmap_vmas(&vms, &mas_detach);
> + next = vms.next;
> + prev = vms.prev;
> + vma_prev(&vmi);
I'm sure this is correct, but just to double-check - we want to set the VMI to
prev here right?
It might be worth adding a small cmoment saying '/* vmi now points at prev */'
or similar, I've found it can get quite hard to follow where the iterator is at
sometimes.
> vma = NULL;
> + } else {
> + next = vma_next(&vmi);
> + prev = vma_prev(&vmi);
But here we move forward to the next VMA and set this to next, then go back to
the _original_ one and this is prev?
Actually I guess if vma == NULL, next gets you to the next, and prev jumps back
to prev, with nothing between, and so that makes sense.
> }
>
> /*
> @@ -2962,8 +2980,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> vm_flags |= VM_ACCOUNT;
> }
>
> - next = vma_next(&vmi);
> - prev = vma_prev(&vmi);
> if (vm_flags & VM_SPECIAL) {
> if (prev)
> vma_iter_next_range(&vmi);
> --
> 2.43.0
>
>
* Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [240705 16:06]:
> On Thu, Jul 04, 2024 at 02:27:11PM GMT, Liam R. Howlett wrote:
> > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
> >
> > Open code the do_vmi_align_munmap() call so that it can be broken up
> > later in the series.
> >
> > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
> > ---
> > mm/mmap.c | 22 +++++++++++++++++++---
> > 1 file changed, 19 insertions(+), 3 deletions(-)
> >
> > diff --git a/mm/mmap.c b/mm/mmap.c
> > index e9858ca8bbd4..f5b33de4e717 100644
> > --- a/mm/mmap.c
> > +++ b/mm/mmap.c
> > @@ -2915,6 +2915,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > struct vm_area_struct *next, *prev, *merge;
> > pgoff_t pglen = len >> PAGE_SHIFT;
> > unsigned long charged = 0;
> > + struct vma_munmap_struct vms;
> > + struct ma_state mas_detach;
> > + struct maple_tree mt_detach;
> > unsigned long end = addr + len;
> > unsigned long merge_start = addr, merge_end = end;
> > bool writable_file_mapping = false;
> > @@ -2947,9 +2950,24 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > /* Find the first overlapping VMA */
> > vma = vma_find(&vmi, end);
> > if (vma) {
> > - if (do_vmi_align_munmap(&vmi, vma, mm, addr, end, uf, false))
> > + mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
> > + mt_on_stack(mt_detach);
> > + mas_init(&mas_detach, &mt_detach, /* addr = */ 0);
>
> I'm guessing this is exactly equivalent (or equivalent enough for our purposes
> here) to the MA_STATE() call in do_vmi_align_munmap()?
Yes, what we are doing is inlining the function call so that it can be
split to parts. So for reviewing, I just made it in-line.
>
> Checking the two against each other it seems that it is indeed.
>
> > + init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false);
> > + if (vms_gather_munmap_vmas(&vms, &mas_detach))
> > + return -ENOMEM;
>
> In do_vmi_align_munmap() we also invoke vmalidate_mm(), why aren't we doing that
> here?
I don't see the validate_mm() call in do_vmi_align_munmap, it is called
in the vms_complete_munmap_vmas() function though.
>
> > +
> > + if (vma_iter_clear_gfp(&vmi, addr, end, GFP_KERNEL))
> > return -ENOMEM;
>
> Same here.
It would fail here, our count would be wrong.
>
> > +
> > + vms_complete_munmap_vmas(&vms, &mas_detach);
> > + next = vms.next;
> > + prev = vms.prev;
> > + vma_prev(&vmi);
>
> I'm sure this is correct, but just to double-check - we want to set the VMI to
> prev here right?
Yes. In the current function, the vmi points to prev when we are done
this dance - so I put it there.
>
> It might be worth adding a small cmoment saying '/* vmi now points at prev */'
> or similar, I've found it can get quite hard to follow where the iterator is at
> sometimes.
So this is about to change..
>
> > vma = NULL;
> > + } else {
> > + next = vma_next(&vmi);
> > + prev = vma_prev(&vmi);
>
> But here we move forward to the next VMA and set this to next, then go back to
> the _original_ one and this is prev?
>
> Actually I guess if vma == NULL, next gets you to the next, and prev jumps back
> to prev, with nothing between, and so that makes sense.
Yes, this is what I'm doing.
>
>
> > }
> >
> > /*
> > @@ -2962,8 +2980,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > vm_flags |= VM_ACCOUNT;
> > }
> >
> > - next = vma_next(&vmi);
> > - prev = vma_prev(&vmi);
> > if (vm_flags & VM_SPECIAL) {
> > if (prev)
> > vma_iter_next_range(&vmi);
> > --
> > 2.43.0
> >
> >
On Fri, Jul 05, 2024 at 04:30:46PM GMT, Liam R. Howlett wrote:
> * Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [240705 16:06]:
> > On Thu, Jul 04, 2024 at 02:27:11PM GMT, Liam R. Howlett wrote:
> > > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
> > >
> > > Open code the do_vmi_align_munmap() call so that it can be broken up
> > > later in the series.
> > >
> > > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
> > > ---
> > > mm/mmap.c | 22 +++++++++++++++++++---
> > > 1 file changed, 19 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/mm/mmap.c b/mm/mmap.c
> > > index e9858ca8bbd4..f5b33de4e717 100644
> > > --- a/mm/mmap.c
> > > +++ b/mm/mmap.c
> > > @@ -2915,6 +2915,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > > struct vm_area_struct *next, *prev, *merge;
> > > pgoff_t pglen = len >> PAGE_SHIFT;
> > > unsigned long charged = 0;
> > > + struct vma_munmap_struct vms;
> > > + struct ma_state mas_detach;
> > > + struct maple_tree mt_detach;
> > > unsigned long end = addr + len;
> > > unsigned long merge_start = addr, merge_end = end;
> > > bool writable_file_mapping = false;
> > > @@ -2947,9 +2950,24 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > > /* Find the first overlapping VMA */
> > > vma = vma_find(&vmi, end);
> > > if (vma) {
> > > - if (do_vmi_align_munmap(&vmi, vma, mm, addr, end, uf, false))
> > > + mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
> > > + mt_on_stack(mt_detach);
> > > + mas_init(&mas_detach, &mt_detach, /* addr = */ 0);
> >
> > I'm guessing this is exactly equivalent (or equivalent enough for our purposes
> > here) to the MA_STATE() call in do_vmi_align_munmap()?
>
> Yes, what we are doing is inlining the function call so that it can be
> split to parts. So for reviewing, I just made it in-line.
>
> >
> > Checking the two against each other it seems that it is indeed.
> >
> > > + init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false);
> > > + if (vms_gather_munmap_vmas(&vms, &mas_detach))
> > > + return -ENOMEM;
> >
> > In do_vmi_align_munmap() we also invoke vmalidate_mm(), why aren't we doing that
> > here?
>
> I don't see the validate_mm() call in do_vmi_align_munmap, it is called
> in the vms_complete_munmap_vmas() function though.
In do_vmi_align_munmap() you have:
error = vms_gather_munmap_vmas(&vms, &mas_detach);
if (error)
goto gather_failed;
...
gather_failed:
validate_mm(mm);
return error;
>
> >
> > > +
> > > + if (vma_iter_clear_gfp(&vmi, addr, end, GFP_KERNEL))
> > > return -ENOMEM;
> >
> > Same here.
>
> It would fail here, our count would be wrong.
Right, but in do_vmi_align_munmap() you have:
error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL);
if (error)
goto clear_tree_failed;
...
clear_tree_failed:
abort_munmap_vmas(&mas_detach);
gather_failed:
validate_mm(mm);
>
> >
> > > +
> > > + vms_complete_munmap_vmas(&vms, &mas_detach);
> > > + next = vms.next;
> > > + prev = vms.prev;
> > > + vma_prev(&vmi);
> >
> > I'm sure this is correct, but just to double-check - we want to set the VMI to
> > prev here right?
>
> Yes. In the current function, the vmi points to prev when we are done
> this dance - so I put it there.
> >
> > It might be worth adding a small cmoment saying '/* vmi now points at prev */'
> > or similar, I've found it can get quite hard to follow where the iterator is at
> > sometimes.
>
> So this is about to change..
Yeah, I saw :)
>
> >
> > > vma = NULL;
> > > + } else {
> > > + next = vma_next(&vmi);
> > > + prev = vma_prev(&vmi);
> >
> > But here we move forward to the next VMA and set this to next, then go back to
> > the _original_ one and this is prev?
> >
> > Actually I guess if vma == NULL, next gets you to the next, and prev jumps back
> > to prev, with nothing between, and so that makes sense.
>
> Yes, this is what I'm doing.
>
> >
> >
> > > }
> > >
> > > /*
> > > @@ -2962,8 +2980,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > > vm_flags |= VM_ACCOUNT;
> > > }
> > >
> > > - next = vma_next(&vmi);
> > > - prev = vma_prev(&vmi);
> > > if (vm_flags & VM_SPECIAL) {
> > > if (prev)
> > > vma_iter_next_range(&vmi);
> > > --
> > > 2.43.0
> > >
> > >
* Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [240705 16:36]:
> On Fri, Jul 05, 2024 at 04:30:46PM GMT, Liam R. Howlett wrote:
> > * Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [240705 16:06]:
> > > On Thu, Jul 04, 2024 at 02:27:11PM GMT, Liam R. Howlett wrote:
> > > > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
> > > >
> > > > Open code the do_vmi_align_munmap() call so that it can be broken up
> > > > later in the series.
> > > >
> > > > Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
> > > > ---
> > > > mm/mmap.c | 22 +++++++++++++++++++---
> > > > 1 file changed, 19 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/mm/mmap.c b/mm/mmap.c
> > > > index e9858ca8bbd4..f5b33de4e717 100644
> > > > --- a/mm/mmap.c
> > > > +++ b/mm/mmap.c
> > > > @@ -2915,6 +2915,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > > > struct vm_area_struct *next, *prev, *merge;
> > > > pgoff_t pglen = len >> PAGE_SHIFT;
> > > > unsigned long charged = 0;
> > > > + struct vma_munmap_struct vms;
> > > > + struct ma_state mas_detach;
> > > > + struct maple_tree mt_detach;
> > > > unsigned long end = addr + len;
> > > > unsigned long merge_start = addr, merge_end = end;
> > > > bool writable_file_mapping = false;
> > > > @@ -2947,9 +2950,24 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > > > /* Find the first overlapping VMA */
> > > > vma = vma_find(&vmi, end);
> > > > if (vma) {
> > > > - if (do_vmi_align_munmap(&vmi, vma, mm, addr, end, uf, false))
> > > > + mt_init_flags(&mt_detach, vmi.mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
> > > > + mt_on_stack(mt_detach);
> > > > + mas_init(&mas_detach, &mt_detach, /* addr = */ 0);
> > >
> > > I'm guessing this is exactly equivalent (or equivalent enough for our purposes
> > > here) to the MA_STATE() call in do_vmi_align_munmap()?
> >
> > Yes, what we are doing is inlining the function call so that it can be
> > split to parts. So for reviewing, I just made it in-line.
> >
> > >
> > > Checking the two against each other it seems that it is indeed.
> > >
> > > > + init_vma_munmap(&vms, &vmi, vma, addr, end, uf, /* unlock = */ false);
> > > > + if (vms_gather_munmap_vmas(&vms, &mas_detach))
> > > > + return -ENOMEM;
> > >
> > > In do_vmi_align_munmap() we also invoke vmalidate_mm(), why aren't we doing that
> > > here?
> >
> > I don't see the validate_mm() call in do_vmi_align_munmap, it is called
> > in the vms_complete_munmap_vmas() function though.
>
> In do_vmi_align_munmap() you have:
>
> error = vms_gather_munmap_vmas(&vms, &mas_detach);
> if (error)
> goto gather_failed;
>
> ...
>
> gather_failed:
> validate_mm(mm);
> return error;
>
> >
> > >
> > > > +
> > > > + if (vma_iter_clear_gfp(&vmi, addr, end, GFP_KERNEL))
> > > > return -ENOMEM;
> > >
> > > Same here.
> >
> > It would fail here, our count would be wrong.
>
> Right, but in do_vmi_align_munmap() you have:
>
> error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL);
> if (error)
> goto clear_tree_failed;
>
> ...
>
> clear_tree_failed:
> abort_munmap_vmas(&mas_detach);
> gather_failed:
> validate_mm(mm);
>
Right, so I also missed my cleanup in the abort_munmap_vmas() here..
Thanks, I'll fix these.
>
> >
> > >
> > > > +
> > > > + vms_complete_munmap_vmas(&vms, &mas_detach);
> > > > + next = vms.next;
> > > > + prev = vms.prev;
> > > > + vma_prev(&vmi);
> > >
> > > I'm sure this is correct, but just to double-check - we want to set the VMI to
> > > prev here right?
> >
> > Yes. In the current function, the vmi points to prev when we are done
> > this dance - so I put it there.
> > >
> > > It might be worth adding a small cmoment saying '/* vmi now points at prev */'
> > > or similar, I've found it can get quite hard to follow where the iterator is at
> > > sometimes.
> >
> > So this is about to change..
>
> Yeah, I saw :)
>
> >
> > >
> > > > vma = NULL;
> > > > + } else {
> > > > + next = vma_next(&vmi);
> > > > + prev = vma_prev(&vmi);
> > >
> > > But here we move forward to the next VMA and set this to next, then go back to
> > > the _original_ one and this is prev?
> > >
> > > Actually I guess if vma == NULL, next gets you to the next, and prev jumps back
> > > to prev, with nothing between, and so that makes sense.
> >
> > Yes, this is what I'm doing.
> >
> > >
> > >
> > > > }
> > > >
> > > > /*
> > > > @@ -2962,8 +2980,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
> > > > vm_flags |= VM_ACCOUNT;
> > > > }
> > > >
> > > > - next = vma_next(&vmi);
> > > > - prev = vma_prev(&vmi);
> > > > if (vm_flags & VM_SPECIAL) {
> > > > if (prev)
> > > > vma_iter_next_range(&vmi);
> > > > --
> > > > 2.43.0
> > > >
> > > >
© 2016 - 2026 Red Hat, Inc.