mm/mmap.c | 48 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-)
The maple tree limits the gap returned to a window that specifically
fits what was asked. This may not be optimal in the case of switching
search directions or a gap that does not satisfy the requested space for
other reasons. Fix the search by retrying the operation and limiting
the search window in the rare occasion that a conflict occurs.
Reported-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Fixes: 3499a13168da ("mm/mmap: use maple tree for unmapped_area{_topdown}")
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
---
v1 changes:
- Add comment about avoiding prev check
- Remove check for VM_GROWSUP on prev since vm_end_gap() does this
mm/mmap.c | 48 +++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 43 insertions(+), 5 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c
index 055fbd5ed762..6b9116f1c304 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1548,7 +1548,8 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
*/
static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
{
- unsigned long length, gap;
+ unsigned long length, gap, low_limit;
+ struct vm_area_struct *tmp;
MA_STATE(mas, ¤t->mm->mm_mt, 0, 0);
@@ -1557,12 +1558,29 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
if (length < info->length)
return -ENOMEM;
- if (mas_empty_area(&mas, info->low_limit, info->high_limit - 1,
- length))
+ low_limit = info->low_limit;
+retry:
+ if (mas_empty_area(&mas, low_limit, info->high_limit - 1, length))
return -ENOMEM;
gap = mas.index;
gap += (info->align_offset - gap) & info->align_mask;
+ tmp = mas_next(&mas, ULONG_MAX);
+ if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+ if (vm_start_gap(tmp) < gap + length - 1) {
+ low_limit = tmp->vm_end;
+ mas_reset(&mas);
+ goto retry;
+ }
+ } else {
+ tmp = mas_prev(&mas, 0);
+ if (tmp && vm_end_gap(tmp) > gap) {
+ low_limit = vm_end_gap(tmp);
+ mas_reset(&mas);
+ goto retry;
+ }
+ }
+
return gap;
}
@@ -1578,7 +1596,8 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
*/
static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
{
- unsigned long length, gap;
+ unsigned long length, gap, high_limit, gap_end;
+ struct vm_area_struct *tmp;
MA_STATE(mas, ¤t->mm->mm_mt, 0, 0);
/* Adjust search length to account for worst case alignment overhead */
@@ -1586,12 +1605,31 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
if (length < info->length)
return -ENOMEM;
- if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1,
+ high_limit = info->high_limit;
+retry:
+ if (mas_empty_area_rev(&mas, info->low_limit, high_limit - 1,
length))
return -ENOMEM;
gap = mas.last + 1 - info->length;
gap -= (gap - info->align_offset) & info->align_mask;
+ gap_end = mas.last;
+ tmp = mas_next(&mas, ULONG_MAX);
+ if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+ if (vm_start_gap(tmp) <= gap_end) {
+ high_limit = vm_start_gap(tmp);
+ mas_reset(&mas);
+ goto retry;
+ }
+ } else {
+ tmp = mas_prev(&mas, 0);
+ if (tmp && vm_end_gap(tmp) > gap) {
+ high_limit = tmp->vm_start;
+ mas_reset(&mas);
+ goto retry;
+ }
+ }
+
return gap;
}
--
2.39.2
This reintroduces the issue described in https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.com/ Linux 6.2.13 can no longer successfully run the mmap-test reproducer linked there. Linux 6.2.12 passes. Regards, Tad.
On 29.04.23 15:32, Tad wrote: > This reintroduces the issue described in > https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.com/ Yes, I also ran into this (even though I'd somehow missed it the previous time). Apparently the issue arises at mm/mmap.c:1582, where low_limit is set to vm_end_gap(tmp). Occasionally, this returns a 64-bit address (e.g. 0x7fedea581000), which is obviously greater than high_limit for a 32-bit mmap, and causes the next call to mas_empty_area() to fail. I'm not sure why vm_end_gap(tmp) occasionally returns a 64-bit address, or if the best solution is to just check for this and skip the retry if it occurs… -- Michael
* Michael Keyes <mgkeyes@vigovproductions.net> [230430 18:41]: > On 29.04.23 15:32, Tad wrote: > > This reintroduces the issue described in > > https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.com/ > Yes, I also ran into this (even though I'd somehow missed it the > previous time). Rick Edgecombe reported something similar [1]. This is probably to do with my stack guard checks I recently added. > > Apparently the issue arises at mm/mmap.c:1582, where low_limit is set to > vm_end_gap(tmp). Occasionally, this returns a 64-bit address (e.g. > 0x7fedea581000), which is obviously greater than high_limit for a 32-bit > mmap, and causes the next call to mas_empty_area() to fail. > > I'm not sure why vm_end_gap(tmp) occasionally returns a 64-bit address, > or if the best solution is to just check for this and skip the retry if > it occurs… > Thanks for the debugging. I will look into it. I am currently trying to revise how the iterators, prev/next deal with shifting outside the requested limits. I suspect it's something to do with hitting the limit and what someone would assume the next operation means. [1] https://lore.kernel.org/linux-mm/32f156ba80010fd97dbaf0a0cdfc84366608624d.camel@intel.com/
...Adding Rick to the Cc this time. * Liam R. Howlett <Liam.Howlett@Oracle.com> [230502 10:08]: > * Michael Keyes <mgkeyes@vigovproductions.net> [230430 18:41]: > > On 29.04.23 15:32, Tad wrote: > > > This reintroduces the issue described in > > > https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.com/ > > Yes, I also ran into this (even though I'd somehow missed it the > > previous time). > > Rick Edgecombe reported something similar [1]. > > This is probably to do with my stack guard checks I recently added. > > > > > Apparently the issue arises at mm/mmap.c:1582, where low_limit is set to > > vm_end_gap(tmp). Occasionally, this returns a 64-bit address (e.g. > > 0x7fedea581000), which is obviously greater than high_limit for a 32-bit > > mmap, and causes the next call to mas_empty_area() to fail. > > > > I'm not sure why vm_end_gap(tmp) occasionally returns a 64-bit address, > > or if the best solution is to just check for this and skip the retry if > > it occurs… > > > > Thanks for the debugging. I will look into it. > > I am currently trying to revise how the iterators, prev/next deal with > shifting outside the requested limits. I suspect it's something to do > with hitting the limit and what someone would assume the next operation > means. > > [1] https://lore.kernel.org/linux-mm/32f156ba80010fd97dbaf0a0cdfc84366608624d.camel@intel.com/
Hi Liam, It's a bit hard to follow this particular issue on v6.1 as there are many email threads related to this. I just wanted to ask if whether this is fixed on mainline and v6.1 stable yet. If there's a new thread tackling this issue, I'd appreciate it if you can link it here. Thanks, regards On 5/2/23 23:09, Liam R. Howlett wrote: > ...Adding Rick to the Cc this time. > > * Liam R. Howlett <Liam.Howlett@Oracle.com> [230502 10:08]: >> * Michael Keyes <mgkeyes@vigovproductions.net> [230430 18:41]: >>> On 29.04.23 15:32, Tad wrote: >>>> This reintroduces the issue described in >>>> https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.com/ >>> Yes, I also ran into this (even though I'd somehow missed it the >>> previous time). >> >> Rick Edgecombe reported something similar [1]. >> >> This is probably to do with my stack guard checks I recently added. >> >>> >>> Apparently the issue arises at mm/mmap.c:1582, where low_limit is set to >>> vm_end_gap(tmp). Occasionally, this returns a 64-bit address (e.g. >>> 0x7fedea581000), which is obviously greater than high_limit for a 32-bit >>> mmap, and causes the next call to mas_empty_area() to fail. >>> >>> I'm not sure why vm_end_gap(tmp) occasionally returns a 64-bit address, >>> or if the best solution is to just check for this and skip the retry if >>> it occurs… >>> >> >> Thanks for the debugging. I will look into it. >> >> I am currently trying to revise how the iterators, prev/next deal with >> shifting outside the requested limits. I suspect it's something to do >> with hitting the limit and what someone would assume the next operation >> means. >> >> [1] https://lore.kernel.org/linux-mm/32f156ba80010fd97dbaf0a0cdfc84366608624d.camel@intel.com/ > >
* Juhyung Park <qkrwngud825@gmail.com> [230515 04:57]: > Hi Liam, > > It's a bit hard to follow this particular issue on v6.1 as there are many > email threads related to this. > > I just wanted to ask if whether this is fixed on mainline and v6.1 stable > yet. Not yet. It is in mm-unstable at this time. > > If there's a new thread tackling this issue, I'd appreciate it if you can > link it here. https://lore.kernel.org/linux-mm/20230505145829.74574-1-zhangpeng.00@bytedance.com/ > > Thanks, > regards > > On 5/2/23 23:09, Liam R. Howlett wrote: > > ...Adding Rick to the Cc this time. > > > > * Liam R. Howlett <Liam.Howlett@Oracle.com> [230502 10:08]: > > > * Michael Keyes <mgkeyes@vigovproductions.net> [230430 18:41]: > > > > On 29.04.23 15:32, Tad wrote: > > > > > This reintroduces the issue described in > > > > > https://lore.kernel.org/linux-mm/cb8dc31a-fef2-1d09-f133-e9f7b9f9e77a@sony.com/ > > > > Yes, I also ran into this (even though I'd somehow missed it the > > > > previous time). > > > > > > Rick Edgecombe reported something similar [1]. > > > > > > This is probably to do with my stack guard checks I recently added. > > > > > > > > > > > Apparently the issue arises at mm/mmap.c:1582, where low_limit is set to > > > > vm_end_gap(tmp). Occasionally, this returns a 64-bit address (e.g. > > > > 0x7fedea581000), which is obviously greater than high_limit for a 32-bit > > > > mmap, and causes the next call to mas_empty_area() to fail. > > > > > > > > I'm not sure why vm_end_gap(tmp) occasionally returns a 64-bit address, > > > > or if the best solution is to just check for this and skip the retry if > > > > it occurs… > > > > > > > > > > Thanks for the debugging. I will look into it. > > > > > > I am currently trying to revise how the iterators, prev/next deal with > > > shifting outside the requested limits. I suspect it's something to do > > > with hitting the limit and what someone would assume the next operation > > > means. > > > > > > [1] https://lore.kernel.org/linux-mm/32f156ba80010fd97dbaf0a0cdfc84366608624d.camel@intel.com/ > > > >
On Fri, 14 Apr 2023 14:59:19 -0400 "Liam R. Howlett" <Liam.Howlett@oracle.com> wrote: > The maple tree limits the gap returned to a window that specifically > fits what was asked. This may not be optimal in the case of switching > search directions or a gap that does not satisfy the requested space for > other reasons. Fix the search by retrying the operation and limiting > the search window in the rare occasion that a conflict occurs. This is a performance regression, yes? Of what magnitude?
© 2016 - 2025 Red Hat, Inc.