Place checks into a separate function so the mremap() system call is less
egregiously long, remove unnecessary mremap_to() offset_in_page() check and
just check that earlier so we keep all such basic checks together.
Separate out the VMA in-place expansion, hugetlb and expand/move logic into
separate, readable functions.
De-duplicate code where possible, add comments and ensure that all error
handling explicitly specifies the error at the point of it occurring rather
than setting a prefixed error value and implicitly setting (which is bug
prone).
This lays the groundwork for subsequent patches further simplifying and
extending the mremap() implementation.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
mm/mremap.c | 405 ++++++++++++++++++++++++++++++++--------------------
1 file changed, 251 insertions(+), 154 deletions(-)
diff --git a/mm/mremap.c b/mm/mremap.c
index c3e4c86d0b8d..c4abda8dfc57 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -942,33 +942,14 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
unsigned long ret;
unsigned long map_flags = 0;
- if (offset_in_page(new_addr))
- return -EINVAL;
-
+ /* Is the new length or address silly? */
if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
return -EINVAL;
- /* Ensure the old/new locations do not overlap */
+ /* Ensure the old/new locations do not overlap. */
if (addr + old_len > new_addr && new_addr + new_len > addr)
return -EINVAL;
- /*
- * move_vma() need us to stay 4 maps below the threshold, otherwise
- * it will bail out at the very beginning.
- * That is a problem if we have already unmaped the regions here
- * (new_addr, and old_addr), because userspace will not know the
- * state of the vma's after it gets -ENOMEM.
- * So, to avoid such scenario we can pre-compute if the whole
- * operation has high chances to success map-wise.
- * Worst-scenario case is when both vma's (new_addr and old_addr) get
- * split in 3 before unmapping it.
- * That means 2 more maps (1 for each) to the ones we already hold.
- * Check whether current map count plus 2 still leads us to 4 maps below
- * the threshold, otherwise return -ENOMEM here to be more safe.
- */
- if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
- return -ENOMEM;
-
if (flags & MREMAP_FIXED) {
/*
* In mremap_to().
@@ -1035,6 +1016,218 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
return 1;
}
+/* Do the mremap() flags require that the new_addr parameter be specified? */
+static bool implies_new_addr(unsigned long flags)
+{
+ return flags & (MREMAP_FIXED | MREMAP_DONTUNMAP);
+}
+
+/*
+ * Are the parameters passed to mremap() valid? If so return 0, otherwise return
+ * error.
+ */
+static unsigned long check_mremap_params(unsigned long addr,
+ unsigned long flags,
+ unsigned long old_len,
+ unsigned long new_len,
+ unsigned long new_addr)
+{
+ /* Ensure no unexpected flag values. */
+ if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
+ return -EINVAL;
+
+ /* Start address must be page-aligned. */
+ if (offset_in_page(addr))
+ return -EINVAL;
+
+ /*
+ * We allow a zero old-len as a special case
+ * for DOS-emu "duplicate shm area" thing. But
+ * a zero new-len is nonsensical.
+ */
+ if (!PAGE_ALIGN(new_len))
+ return -EINVAL;
+
+ /* Remainder of checks are for cases with specific new_addr. */
+ if (!implies_new_addr(flags))
+ return 0;
+
+ /* The new address must be page-aligned. */
+ if (offset_in_page(new_addr))
+ return -EINVAL;
+
+ /* A fixed address implies a move. */
+ if (!(flags & MREMAP_MAYMOVE))
+ return -EINVAL;
+
+ /* MREMAP_DONTUNMAP does not allow resizing in the process. */
+ if (flags & MREMAP_DONTUNMAP && old_len != new_len)
+ return -EINVAL;
+
+ /*
+ * move_vma() need us to stay 4 maps below the threshold, otherwise
+ * it will bail out at the very beginning.
+ * That is a problem if we have already unmaped the regions here
+ * (new_addr, and old_addr), because userspace will not know the
+ * state of the vma's after it gets -ENOMEM.
+ * So, to avoid such scenario we can pre-compute if the whole
+ * operation has high chances to success map-wise.
+ * Worst-scenario case is when both vma's (new_addr and old_addr) get
+ * split in 3 before unmapping it.
+ * That means 2 more maps (1 for each) to the ones we already hold.
+ * Check whether current map count plus 2 still leads us to 4 maps below
+ * the threshold, otherwise return -ENOMEM here to be more safe.
+ */
+ if ((current->mm->map_count + 2) >= sysctl_max_map_count - 3)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/*
+ * We know we can expand the VMA in-place by delta pages, so do so.
+ *
+ * If we discover the VMA is locked, update mm_struct statistics accordingly and
+ * indicate so to the caller.
+ */
+static unsigned long expand_vma_inplace(struct vm_area_struct *vma,
+ unsigned long delta, bool *locked)
+{
+ struct mm_struct *mm = current->mm;
+ long pages = delta >> PAGE_SHIFT;
+ VMA_ITERATOR(vmi, mm, vma->vm_end);
+ long charged = 0;
+
+ if (vma->vm_flags & VM_ACCOUNT) {
+ if (security_vm_enough_memory_mm(mm, pages))
+ return -ENOMEM;
+
+ charged = pages;
+ }
+
+ /*
+ * Function vma_merge_extend() is called on the
+ * extension we are adding to the already existing vma,
+ * vma_merge_extend() will merge this extension with the
+ * already existing vma (expand operation itself) and
+ * possibly also with the next vma if it becomes
+ * adjacent to the expanded vma and otherwise
+ * compatible.
+ */
+ vma = vma_merge_extend(&vmi, vma, delta);
+ if (!vma) {
+ vm_unacct_memory(charged);
+ return -ENOMEM;
+ }
+
+ vm_stat_account(mm, vma->vm_flags, pages);
+ if (vma->vm_flags & VM_LOCKED) {
+ mm->locked_vm += pages;
+ *locked = true;
+ }
+
+ return 0;
+}
+
+static bool align_hugetlb(struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long new_addr,
+ unsigned long *old_len_ptr,
+ unsigned long *new_len_ptr,
+ unsigned long *delta_ptr)
+{
+ unsigned long old_len = *old_len_ptr;
+ unsigned long new_len = *new_len_ptr;
+ struct hstate *h __maybe_unused = hstate_vma(vma);
+
+ old_len = ALIGN(old_len, huge_page_size(h));
+ new_len = ALIGN(new_len, huge_page_size(h));
+
+ /* addrs must be huge page aligned */
+ if (addr & ~huge_page_mask(h))
+ return false;
+ if (new_addr & ~huge_page_mask(h))
+ return false;
+
+ /*
+ * Don't allow remap expansion, because the underlying hugetlb
+ * reservation is not yet capable to handle split reservation.
+ */
+ if (new_len > old_len)
+ return false;
+
+ *old_len_ptr = old_len;
+ *new_len_ptr = new_len;
+ *delta_ptr = abs_diff(old_len, new_len);
+ return true;
+}
+
+/*
+ * We are mremap()'ing without specifying a fixed address to move to, but are
+ * requesting that the VMA's size be increased.
+ *
+ * Try to do so in-place, if this fails, then move the VMA to a new location to
+ * action the change.
+ */
+static unsigned long expand_vma(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long old_len,
+ unsigned long new_len, unsigned long flags,
+ bool *locked_ptr, unsigned long *new_addr_ptr,
+ struct vm_userfaultfd_ctx *uf_ptr,
+ struct list_head *uf_unmap_ptr)
+{
+ unsigned long err;
+ unsigned long map_flags;
+ unsigned long new_addr; /* We ignore any user-supplied one. */
+ pgoff_t pgoff;
+
+ err = resize_is_valid(vma, addr, old_len, new_len, flags);
+ if (err)
+ return err;
+
+ /*
+ * [addr, old_len) spans precisely to the end of the VMA, so try to
+ * expand it in-place.
+ */
+ if (old_len == vma->vm_end - addr &&
+ vma_expandable(vma, new_len - old_len)) {
+ err = expand_vma_inplace(vma, new_len - old_len, locked_ptr);
+ if (IS_ERR_VALUE(err))
+ return err;
+
+ /*
+ * We want to populate the newly expanded portion of the VMA to
+ * satisfy the expectation that mlock()'ing a VMA maintains all
+ * of its pages in memory.
+ */
+ if (*locked_ptr)
+ *new_addr_ptr = addr;
+
+ /* OK we're done! */
+ return addr;
+ }
+
+ /*
+ * We weren't able to just expand or shrink the area,
+ * we need to create a new one and move it.
+ */
+
+ /* We're not allowed to move the VMA, so error out. */
+ if (!(flags & MREMAP_MAYMOVE))
+ return -ENOMEM;
+
+ /* Find a new location to move the VMA to. */
+ map_flags = (vma->vm_flags & VM_MAYSHARE) ? MAP_SHARED : 0;
+ pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
+ new_addr = get_unmapped_area(vma->vm_file, 0, new_len, pgoff, map_flags);
+ if (IS_ERR_VALUE(new_addr))
+ return new_addr;
+ *new_addr_ptr = new_addr;
+
+ return move_vma(vma, addr, old_len, new_len, new_addr,
+ locked_ptr, flags, uf_ptr, uf_unmap_ptr);
+}
+
/*
* Expand (or shrink) an existing mapping, potentially moving it at the
* same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
@@ -1048,7 +1241,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- unsigned long ret = -EINVAL;
+ unsigned long ret;
+ unsigned long delta;
bool locked = false;
struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
LIST_HEAD(uf_unmap_early);
@@ -1067,70 +1261,38 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
*/
addr = untagged_addr(addr);
- if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
- return ret;
-
- if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
- return ret;
-
- /*
- * MREMAP_DONTUNMAP is always a move and it does not allow resizing
- * in the process.
- */
- if (flags & MREMAP_DONTUNMAP &&
- (!(flags & MREMAP_MAYMOVE) || old_len != new_len))
- return ret;
-
-
- if (offset_in_page(addr))
+ ret = check_mremap_params(addr, flags, old_len, new_len, new_addr);
+ if (ret)
return ret;
old_len = PAGE_ALIGN(old_len);
new_len = PAGE_ALIGN(new_len);
+ delta = abs_diff(old_len, new_len);
- /*
- * We allow a zero old-len as a special case
- * for DOS-emu "duplicate shm area" thing. But
- * a zero new-len is nonsensical.
- */
- if (!new_len)
- return ret;
-
- if (mmap_write_lock_killable(current->mm))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
+
vma = vma_lookup(mm, addr);
if (!vma) {
ret = -EFAULT;
goto out;
}
- /* Don't allow remapping vmas when they have already been sealed */
+ /* If mseal()'d, mremap() is prohibited. */
if (!can_modify_vma(vma)) {
ret = -EPERM;
goto out;
}
- if (is_vm_hugetlb_page(vma)) {
- struct hstate *h __maybe_unused = hstate_vma(vma);
-
- old_len = ALIGN(old_len, huge_page_size(h));
- new_len = ALIGN(new_len, huge_page_size(h));
-
- /* addrs must be huge page aligned */
- if (addr & ~huge_page_mask(h))
- goto out;
- if (new_addr & ~huge_page_mask(h))
- goto out;
-
- /*
- * Don't allow remap expansion, because the underlying hugetlb
- * reservation is not yet capable to handle split reservation.
- */
- if (new_len > old_len)
- goto out;
+ /* Align to hugetlb page size, if required. */
+ if (is_vm_hugetlb_page(vma) &&
+ !align_hugetlb(vma, addr, new_addr, &old_len, &new_len, &delta)) {
+ ret = -EINVAL;
+ goto out;
}
- if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) {
+ /* Are we RELOCATING the VMA to a SPECIFIC address? */
+ if (implies_new_addr(flags)) {
ret = mremap_to(addr, old_len, new_addr, new_len,
&locked, flags, &uf, &uf_unmap_early,
&uf_unmap);
@@ -1138,109 +1300,44 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
}
/*
- * Always allow a shrinking remap: that just unmaps
- * the unnecessary pages..
- * do_vmi_munmap does all the needed commit accounting, and
- * unlocks the mmap_lock if so directed.
+ * From here on in we are only RESIZING the VMA, attempting to do so
+ * in-place, moving the VMA if we cannot.
*/
- if (old_len >= new_len) {
- VMA_ITERATOR(vmi, mm, addr + new_len);
- if (old_len == new_len) {
- ret = addr;
- goto out;
- }
+ /* NO-OP CASE - resizing to the same size. */
+ if (new_len == old_len) {
+ ret = addr;
+ goto out;
+ }
+
+ /* SHRINK CASE. Can always be done in-place. */
+ if (new_len < old_len) {
+ VMA_ITERATOR(vmi, mm, addr + new_len);
- ret = do_vmi_munmap(&vmi, mm, addr + new_len, old_len - new_len,
+ /*
+ * Simply unmap the shrunken portion of the VMA. This does all
+ * the needed commit accounting, unlocking the mmap lock.
+ */
+ ret = do_vmi_munmap(&vmi, mm, addr + new_len, delta,
&uf_unmap, true);
if (ret)
goto out;
+ /* We succeeded, mmap lock released for us. */
ret = addr;
goto out_unlocked;
}
- /*
- * Ok, we need to grow..
- */
- ret = resize_is_valid(vma, addr, old_len, new_len, flags);
- if (ret)
- goto out;
-
- /* old_len exactly to the end of the area..
- */
- if (old_len == vma->vm_end - addr) {
- unsigned long delta = new_len - old_len;
-
- /* can we just expand the current mapping? */
- if (vma_expandable(vma, delta)) {
- long pages = delta >> PAGE_SHIFT;
- VMA_ITERATOR(vmi, mm, vma->vm_end);
- long charged = 0;
-
- if (vma->vm_flags & VM_ACCOUNT) {
- if (security_vm_enough_memory_mm(mm, pages)) {
- ret = -ENOMEM;
- goto out;
- }
- charged = pages;
- }
-
- /*
- * Function vma_merge_extend() is called on the
- * extension we are adding to the already existing vma,
- * vma_merge_extend() will merge this extension with the
- * already existing vma (expand operation itself) and
- * possibly also with the next vma if it becomes
- * adjacent to the expanded vma and otherwise
- * compatible.
- */
- vma = vma_merge_extend(&vmi, vma, delta);
- if (!vma) {
- vm_unacct_memory(charged);
- ret = -ENOMEM;
- goto out;
- }
+ /* EXPAND case. We try to do in-place, if we can't, then we move it. */
+ ret = expand_vma(vma, addr, old_len, new_len, flags, &locked, &new_addr,
+ &uf, &uf_unmap);
- vm_stat_account(mm, vma->vm_flags, pages);
- if (vma->vm_flags & VM_LOCKED) {
- mm->locked_vm += pages;
- locked = true;
- new_addr = addr;
- }
- ret = addr;
- goto out;
- }
- }
-
- /*
- * We weren't able to just expand or shrink the area,
- * we need to create a new one and move it..
- */
- ret = -ENOMEM;
- if (flags & MREMAP_MAYMOVE) {
- unsigned long map_flags = 0;
- if (vma->vm_flags & VM_MAYSHARE)
- map_flags |= MAP_SHARED;
-
- new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
- vma->vm_pgoff +
- ((addr - vma->vm_start) >> PAGE_SHIFT),
- map_flags);
- if (IS_ERR_VALUE(new_addr)) {
- ret = new_addr;
- goto out;
- }
-
- ret = move_vma(vma, addr, old_len, new_len, new_addr,
- &locked, flags, &uf, &uf_unmap);
- }
out:
if (offset_in_page(ret))
locked = false;
- mmap_write_unlock(current->mm);
+ mmap_write_unlock(mm);
if (locked && new_len > old_len)
- mm_populate(new_addr + old_len, new_len - old_len);
+ mm_populate(new_addr + old_len, delta);
out_unlocked:
userfaultfd_unmap_complete(mm, &uf_unmap_early);
mremap_userfaultfd_complete(&uf, addr, ret, old_len);
--
2.48.1
On Mon, Mar 03, 2025 at 11:08:32AM +0000, Lorenzo Stoakes wrote: > Place checks into a separate function so the mremap() system call is less > egregiously long, remove unnecessary mremap_to() offset_in_page() check and > just check that earlier so we keep all such basic checks together. > > Separate out the VMA in-place expansion, hugetlb and expand/move logic into > separate, readable functions. > > De-duplicate code where possible, add comments and ensure that all error > handling explicitly specifies the error at the point of it occurring rather > than setting a prefixed error value and implicitly setting (which is bug > prone). > > This lays the groundwork for subsequent patches further simplifying and > extending the mremap() implementation. > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > --- Nice refactoring—reviewing it was a nice learning experience. Looks good to me, Reviewed-by: Harry Yoo <harry.yoo@oracle.com> -- Cheers, Harry > mm/mremap.c | 405 ++++++++++++++++++++++++++++++++-------------------- > 1 file changed, 251 insertions(+), 154 deletions(-)
On Wed, Mar 05, 2025 at 10:47:58AM +0900, Harry Yoo wrote: > On Mon, Mar 03, 2025 at 11:08:32AM +0000, Lorenzo Stoakes wrote: > > Place checks into a separate function so the mremap() system call is less > > egregiously long, remove unnecessary mremap_to() offset_in_page() check and > > just check that earlier so we keep all such basic checks together. > > > > Separate out the VMA in-place expansion, hugetlb and expand/move logic into > > separate, readable functions. > > > > De-duplicate code where possible, add comments and ensure that all error > > handling explicitly specifies the error at the point of it occurring rather > > than setting a prefixed error value and implicitly setting (which is bug > > prone). > > > > This lays the groundwork for subsequent patches further simplifying and > > extending the mremap() implementation. > > > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > --- > > Nice refactoring—reviewing it was a nice learning experience. > > Looks good to me, > Reviewed-by: Harry Yoo <harry.yoo@oracle.com> Thanks! > > -- > Cheers, > Harry > > > mm/mremap.c | 405 ++++++++++++++++++++++++++++++++-------------------- > > 1 file changed, 251 insertions(+), 154 deletions(-)
* Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [250303 06:08]:
> Place checks into a separate function so the mremap() system call is less
> egregiously long, remove unnecessary mremap_to() offset_in_page() check and
> just check that earlier so we keep all such basic checks together.
>
> Separate out the VMA in-place expansion, hugetlb and expand/move logic into
> separate, readable functions.
>
> De-duplicate code where possible, add comments and ensure that all error
> handling explicitly specifies the error at the point of it occurring rather
> than setting a prefixed error value and implicitly setting (which is bug
> prone).
>
> This lays the groundwork for subsequent patches further simplifying and
> extending the mremap() implementation.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> ---
> mm/mremap.c | 405 ++++++++++++++++++++++++++++++++--------------------
> 1 file changed, 251 insertions(+), 154 deletions(-)
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index c3e4c86d0b8d..c4abda8dfc57 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -942,33 +942,14 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
> unsigned long ret;
> unsigned long map_flags = 0;
>
> - if (offset_in_page(new_addr))
> - return -EINVAL;
> -
> + /* Is the new length or address silly? */
> if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
> return -EINVAL;
>
> - /* Ensure the old/new locations do not overlap */
> + /* Ensure the old/new locations do not overlap. */
> if (addr + old_len > new_addr && new_addr + new_len > addr)
> return -EINVAL;
>
> - /*
> - * move_vma() need us to stay 4 maps below the threshold, otherwise
> - * it will bail out at the very beginning.
> - * That is a problem if we have already unmaped the regions here
> - * (new_addr, and old_addr), because userspace will not know the
> - * state of the vma's after it gets -ENOMEM.
> - * So, to avoid such scenario we can pre-compute if the whole
> - * operation has high chances to success map-wise.
> - * Worst-scenario case is when both vma's (new_addr and old_addr) get
> - * split in 3 before unmapping it.
> - * That means 2 more maps (1 for each) to the ones we already hold.
> - * Check whether current map count plus 2 still leads us to 4 maps below
> - * the threshold, otherwise return -ENOMEM here to be more safe.
> - */
> - if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
> - return -ENOMEM;
> -
> if (flags & MREMAP_FIXED) {
> /*
> * In mremap_to().
> @@ -1035,6 +1016,218 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
> return 1;
> }
>
> +/* Do the mremap() flags require that the new_addr parameter be specified? */
> +static bool implies_new_addr(unsigned long flags)
> +{
> + return flags & (MREMAP_FIXED | MREMAP_DONTUNMAP);
> +}
> +
> +/*
> + * Are the parameters passed to mremap() valid? If so return 0, otherwise return
> + * error.
> + */
> +static unsigned long check_mremap_params(unsigned long addr,
> + unsigned long flags,
> + unsigned long old_len,
> + unsigned long new_len,
> + unsigned long new_addr)
If you use two tabs for the arguments this will be more compact and more
readable.
> +{
> + /* Ensure no unexpected flag values. */
> + if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
> + return -EINVAL;
> +
> + /* Start address must be page-aligned. */
> + if (offset_in_page(addr))
> + return -EINVAL;
> +
> + /*
> + * We allow a zero old-len as a special case
> + * for DOS-emu "duplicate shm area" thing. But
> + * a zero new-len is nonsensical.
> + */
> + if (!PAGE_ALIGN(new_len))
> + return -EINVAL;
> +
> + /* Remainder of checks are for cases with specific new_addr. */
> + if (!implies_new_addr(flags))
> + return 0;
> +
> + /* The new address must be page-aligned. */
> + if (offset_in_page(new_addr))
> + return -EINVAL;
> +
> + /* A fixed address implies a move. */
> + if (!(flags & MREMAP_MAYMOVE))
> + return -EINVAL;
> +
> + /* MREMAP_DONTUNMAP does not allow resizing in the process. */
> + if (flags & MREMAP_DONTUNMAP && old_len != new_len)
> + return -EINVAL;
> +
> + /*
> + * move_vma() need us to stay 4 maps below the threshold, otherwise
> + * it will bail out at the very beginning.
> + * That is a problem if we have already unmaped the regions here
> + * (new_addr, and old_addr), because userspace will not know the
> + * state of the vma's after it gets -ENOMEM.
> + * So, to avoid such scenario we can pre-compute if the whole
> + * operation has high chances to success map-wise.
> + * Worst-scenario case is when both vma's (new_addr and old_addr) get
> + * split in 3 before unmapping it.
> + * That means 2 more maps (1 for each) to the ones we already hold.
> + * Check whether current map count plus 2 still leads us to 4 maps below
> + * the threshold, otherwise return -ENOMEM here to be more safe.
> + */
> + if ((current->mm->map_count + 2) >= sysctl_max_map_count - 3)
> + return -ENOMEM;
> +
> + return 0;
> +}
> +
> +/*
> + * We know we can expand the VMA in-place by delta pages, so do so.
> + *
> + * If we discover the VMA is locked, update mm_struct statistics accordingly and
> + * indicate so to the caller.
> + */
> +static unsigned long expand_vma_inplace(struct vm_area_struct *vma,
> + unsigned long delta, bool *locked)
> +{
> + struct mm_struct *mm = current->mm;
> + long pages = delta >> PAGE_SHIFT;
> + VMA_ITERATOR(vmi, mm, vma->vm_end);
> + long charged = 0;
> +
> + if (vma->vm_flags & VM_ACCOUNT) {
> + if (security_vm_enough_memory_mm(mm, pages))
> + return -ENOMEM;
> +
> + charged = pages;
> + }
> +
> + /*
> + * Function vma_merge_extend() is called on the
> + * extension we are adding to the already existing vma,
> + * vma_merge_extend() will merge this extension with the
> + * already existing vma (expand operation itself) and
> + * possibly also with the next vma if it becomes
> + * adjacent to the expanded vma and otherwise
> + * compatible.
> + */
> + vma = vma_merge_extend(&vmi, vma, delta);
> + if (!vma) {
> + vm_unacct_memory(charged);
> + return -ENOMEM;
> + }
> +
> + vm_stat_account(mm, vma->vm_flags, pages);
> + if (vma->vm_flags & VM_LOCKED) {
> + mm->locked_vm += pages;
> + *locked = true;
> + }
> +
> + return 0;
> +}
> +
> +static bool align_hugetlb(struct vm_area_struct *vma,
> + unsigned long addr,
> + unsigned long new_addr,
> + unsigned long *old_len_ptr,
> + unsigned long *new_len_ptr,
> + unsigned long *delta_ptr)
> +{
> + unsigned long old_len = *old_len_ptr;
> + unsigned long new_len = *new_len_ptr;
> + struct hstate *h __maybe_unused = hstate_vma(vma);
> +
> + old_len = ALIGN(old_len, huge_page_size(h));
> + new_len = ALIGN(new_len, huge_page_size(h));
> +
> + /* addrs must be huge page aligned */
> + if (addr & ~huge_page_mask(h))
> + return false;
> + if (new_addr & ~huge_page_mask(h))
> + return false;
> +
> + /*
> + * Don't allow remap expansion, because the underlying hugetlb
> + * reservation is not yet capable to handle split reservation.
> + */
> + if (new_len > old_len)
> + return false;
> +
> + *old_len_ptr = old_len;
> + *new_len_ptr = new_len;
> + *delta_ptr = abs_diff(old_len, new_len);
> + return true;
> +}
> +
> +/*
> + * We are mremap()'ing without specifying a fixed address to move to, but are
> + * requesting that the VMA's size be increased.
> + *
> + * Try to do so in-place, if this fails, then move the VMA to a new location to
> + * action the change.
> + */
> +static unsigned long expand_vma(struct vm_area_struct *vma,
> + unsigned long addr, unsigned long old_len,
> + unsigned long new_len, unsigned long flags,
> + bool *locked_ptr, unsigned long *new_addr_ptr,
> + struct vm_userfaultfd_ctx *uf_ptr,
> + struct list_head *uf_unmap_ptr)
> +{
> + unsigned long err;
> + unsigned long map_flags;
> + unsigned long new_addr; /* We ignore any user-supplied one. */
> + pgoff_t pgoff;
> +
> + err = resize_is_valid(vma, addr, old_len, new_len, flags);
> + if (err)
> + return err;
> +
> + /*
> + * [addr, old_len) spans precisely to the end of the VMA, so try to
> + * expand it in-place.
> + */
> + if (old_len == vma->vm_end - addr &&
> + vma_expandable(vma, new_len - old_len)) {
> + err = expand_vma_inplace(vma, new_len - old_len, locked_ptr);
You use delta twice here (new_len - old_len). I assume this is
different in the next patches.
> + if (IS_ERR_VALUE(err))
> + return err;
Doesn't expand_vma_inplace() return 0 on success, error otherwise?
> +
> + /*
> + * We want to populate the newly expanded portion of the VMA to
> + * satisfy the expectation that mlock()'ing a VMA maintains all
> + * of its pages in memory.
> + */
> + if (*locked_ptr)
> + *new_addr_ptr = addr;
> +
> + /* OK we're done! */
> + return addr;
> + }
> +
> + /*
> + * We weren't able to just expand or shrink the area,
> + * we need to create a new one and move it.
> + */
> +
So it's more of an expand_or_move_vma()?
> + /* We're not allowed to move the VMA, so error out. */
> + if (!(flags & MREMAP_MAYMOVE))
> + return -ENOMEM;
and by flags we mean... the flags from the syscall. This gets confusing
with map_flags. At least there's only two and not six flags.
> +
> + /* Find a new location to move the VMA to. */
> + map_flags = (vma->vm_flags & VM_MAYSHARE) ? MAP_SHARED : 0;
> + pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
> + new_addr = get_unmapped_area(vma->vm_file, 0, new_len, pgoff, map_flags);
> + if (IS_ERR_VALUE(new_addr))
> + return new_addr;
> + *new_addr_ptr = new_addr;
> +
> + return move_vma(vma, addr, old_len, new_len, new_addr,
> + locked_ptr, flags, uf_ptr, uf_unmap_ptr);
> +}
> +
> /*
> * Expand (or shrink) an existing mapping, potentially moving it at the
> * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
> @@ -1048,7 +1241,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
> {
> struct mm_struct *mm = current->mm;
> struct vm_area_struct *vma;
> - unsigned long ret = -EINVAL;
> + unsigned long ret;
> + unsigned long delta;
> bool locked = false;
> struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
> LIST_HEAD(uf_unmap_early);
> @@ -1067,70 +1261,38 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
> */
> addr = untagged_addr(addr);
>
> - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
> - return ret;
> -
> - if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
> - return ret;
> -
> - /*
> - * MREMAP_DONTUNMAP is always a move and it does not allow resizing
> - * in the process.
> - */
> - if (flags & MREMAP_DONTUNMAP &&
> - (!(flags & MREMAP_MAYMOVE) || old_len != new_len))
> - return ret;
> -
> -
> - if (offset_in_page(addr))
> + ret = check_mremap_params(addr, flags, old_len, new_len, new_addr);
> + if (ret)
> return ret;
>
> old_len = PAGE_ALIGN(old_len);
> new_len = PAGE_ALIGN(new_len);
> + delta = abs_diff(old_len, new_len);
>
> - /*
> - * We allow a zero old-len as a special case
> - * for DOS-emu "duplicate shm area" thing. But
> - * a zero new-len is nonsensical.
> - */
> - if (!new_len)
> - return ret;
> -
> - if (mmap_write_lock_killable(current->mm))
> + if (mmap_write_lock_killable(mm))
> return -EINTR;
> +
> vma = vma_lookup(mm, addr);
> if (!vma) {
> ret = -EFAULT;
> goto out;
> }
>
> - /* Don't allow remapping vmas when they have already been sealed */
> + /* If mseal()'d, mremap() is prohibited. */
> if (!can_modify_vma(vma)) {
> ret = -EPERM;
> goto out;
> }
This could be delayed to the munmap() call, which will also check to
ensure this would fail.
It doesn't really cost anything though so I don't think it's worth it
here. Maybe something we can keep in mind for the future...
>
> - if (is_vm_hugetlb_page(vma)) {
> - struct hstate *h __maybe_unused = hstate_vma(vma);
> -
> - old_len = ALIGN(old_len, huge_page_size(h));
> - new_len = ALIGN(new_len, huge_page_size(h));
> -
> - /* addrs must be huge page aligned */
> - if (addr & ~huge_page_mask(h))
> - goto out;
> - if (new_addr & ~huge_page_mask(h))
> - goto out;
> -
> - /*
> - * Don't allow remap expansion, because the underlying hugetlb
> - * reservation is not yet capable to handle split reservation.
> - */
> - if (new_len > old_len)
> - goto out;
> + /* Align to hugetlb page size, if required. */
> + if (is_vm_hugetlb_page(vma) &&
> + !align_hugetlb(vma, addr, new_addr, &old_len, &new_len, &delta)) {
> + ret = -EINVAL;
> + goto out;
> }
>
> - if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) {
> + /* Are we RELOCATING the VMA to a SPECIFIC address? */
> + if (implies_new_addr(flags)) {
> ret = mremap_to(addr, old_len, new_addr, new_len,
> &locked, flags, &uf, &uf_unmap_early,
> &uf_unmap);
> @@ -1138,109 +1300,44 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
> }
>
> /*
> - * Always allow a shrinking remap: that just unmaps
> - * the unnecessary pages..
> - * do_vmi_munmap does all the needed commit accounting, and
> - * unlocks the mmap_lock if so directed.
> + * From here on in we are only RESIZING the VMA, attempting to do so
> + * in-place, moving the VMA if we cannot.
> */
> - if (old_len >= new_len) {
> - VMA_ITERATOR(vmi, mm, addr + new_len);
>
> - if (old_len == new_len) {
> - ret = addr;
> - goto out;
> - }
> + /* NO-OP CASE - resizing to the same size. */
> + if (new_len == old_len) {
> + ret = addr;
> + goto out;
> + }
> +
> + /* SHRINK CASE. Can always be done in-place. */
> + if (new_len < old_len) {
> + VMA_ITERATOR(vmi, mm, addr + new_len);
>
> - ret = do_vmi_munmap(&vmi, mm, addr + new_len, old_len - new_len,
> + /*
> + * Simply unmap the shrunken portion of the VMA. This does all
> + * the needed commit accounting, unlocking the mmap lock.
> + */
> + ret = do_vmi_munmap(&vmi, mm, addr + new_len, delta,
> &uf_unmap, true);
> if (ret)
> goto out;
>
> + /* We succeeded, mmap lock released for us. */
> ret = addr;
> goto out_unlocked;
> }
>
> - /*
> - * Ok, we need to grow..
> - */
> - ret = resize_is_valid(vma, addr, old_len, new_len, flags);
> - if (ret)
> - goto out;
> -
> - /* old_len exactly to the end of the area..
> - */
> - if (old_len == vma->vm_end - addr) {
> - unsigned long delta = new_len - old_len;
> -
> - /* can we just expand the current mapping? */
> - if (vma_expandable(vma, delta)) {
> - long pages = delta >> PAGE_SHIFT;
> - VMA_ITERATOR(vmi, mm, vma->vm_end);
> - long charged = 0;
> -
> - if (vma->vm_flags & VM_ACCOUNT) {
> - if (security_vm_enough_memory_mm(mm, pages)) {
> - ret = -ENOMEM;
> - goto out;
> - }
> - charged = pages;
> - }
> -
> - /*
> - * Function vma_merge_extend() is called on the
> - * extension we are adding to the already existing vma,
> - * vma_merge_extend() will merge this extension with the
> - * already existing vma (expand operation itself) and
> - * possibly also with the next vma if it becomes
> - * adjacent to the expanded vma and otherwise
> - * compatible.
> - */
> - vma = vma_merge_extend(&vmi, vma, delta);
> - if (!vma) {
> - vm_unacct_memory(charged);
> - ret = -ENOMEM;
> - goto out;
> - }
> + /* EXPAND case. We try to do in-place, if we can't, then we move it. */
> + ret = expand_vma(vma, addr, old_len, new_len, flags, &locked, &new_addr,
> + &uf, &uf_unmap);
>
> - vm_stat_account(mm, vma->vm_flags, pages);
> - if (vma->vm_flags & VM_LOCKED) {
> - mm->locked_vm += pages;
> - locked = true;
> - new_addr = addr;
> - }
> - ret = addr;
> - goto out;
> - }
> - }
> -
> - /*
> - * We weren't able to just expand or shrink the area,
> - * we need to create a new one and move it..
> - */
> - ret = -ENOMEM;
> - if (flags & MREMAP_MAYMOVE) {
> - unsigned long map_flags = 0;
> - if (vma->vm_flags & VM_MAYSHARE)
> - map_flags |= MAP_SHARED;
> -
> - new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
> - vma->vm_pgoff +
> - ((addr - vma->vm_start) >> PAGE_SHIFT),
> - map_flags);
> - if (IS_ERR_VALUE(new_addr)) {
> - ret = new_addr;
> - goto out;
> - }
> -
> - ret = move_vma(vma, addr, old_len, new_len, new_addr,
> - &locked, flags, &uf, &uf_unmap);
> - }
> out:
> if (offset_in_page(ret))
> locked = false;
> - mmap_write_unlock(current->mm);
> + mmap_write_unlock(mm);
> if (locked && new_len > old_len)
> - mm_populate(new_addr + old_len, new_len - old_len);
> + mm_populate(new_addr + old_len, delta);
> out_unlocked:
> userfaultfd_unmap_complete(mm, &uf_unmap_early);
> mremap_userfaultfd_complete(&uf, addr, ret, old_len);
> --
> 2.48.1
>
Thanks for taking a look! :) I know this one is a bit painful...
On Mon, Mar 03, 2025 at 12:12:07PM -0500, Liam R. Howlett wrote:
> * Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [250303 06:08]:
> > Place checks into a separate function so the mremap() system call is less
> > egregiously long, remove unnecessary mremap_to() offset_in_page() check and
> > just check that earlier so we keep all such basic checks together.
> >
> > Separate out the VMA in-place expansion, hugetlb and expand/move logic into
> > separate, readable functions.
> >
> > De-duplicate code where possible, add comments and ensure that all error
> > handling explicitly specifies the error at the point of it occurring rather
> > than setting a prefixed error value and implicitly setting (which is bug
> > prone).
> >
> > This lays the groundwork for subsequent patches further simplifying and
> > extending the mremap() implementation.
> >
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > ---
> > mm/mremap.c | 405 ++++++++++++++++++++++++++++++++--------------------
> > 1 file changed, 251 insertions(+), 154 deletions(-)
> >
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index c3e4c86d0b8d..c4abda8dfc57 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -942,33 +942,14 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
> > unsigned long ret;
> > unsigned long map_flags = 0;
> >
> > - if (offset_in_page(new_addr))
> > - return -EINVAL;
> > -
> > + /* Is the new length or address silly? */
> > if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
> > return -EINVAL;
> >
> > - /* Ensure the old/new locations do not overlap */
> > + /* Ensure the old/new locations do not overlap. */
> > if (addr + old_len > new_addr && new_addr + new_len > addr)
> > return -EINVAL;
> >
> > - /*
> > - * move_vma() need us to stay 4 maps below the threshold, otherwise
> > - * it will bail out at the very beginning.
> > - * That is a problem if we have already unmaped the regions here
> > - * (new_addr, and old_addr), because userspace will not know the
> > - * state of the vma's after it gets -ENOMEM.
> > - * So, to avoid such scenario we can pre-compute if the whole
> > - * operation has high chances to success map-wise.
> > - * Worst-scenario case is when both vma's (new_addr and old_addr) get
> > - * split in 3 before unmapping it.
> > - * That means 2 more maps (1 for each) to the ones we already hold.
> > - * Check whether current map count plus 2 still leads us to 4 maps below
> > - * the threshold, otherwise return -ENOMEM here to be more safe.
> > - */
> > - if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
> > - return -ENOMEM;
> > -
> > if (flags & MREMAP_FIXED) {
> > /*
> > * In mremap_to().
> > @@ -1035,6 +1016,218 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
> > return 1;
> > }
> >
> > +/* Do the mremap() flags require that the new_addr parameter be specified? */
> > +static bool implies_new_addr(unsigned long flags)
> > +{
> > + return flags & (MREMAP_FIXED | MREMAP_DONTUNMAP);
> > +}
> > +
> > +/*
> > + * Are the parameters passed to mremap() valid? If so return 0, otherwise return
> > + * error.
> > + */
> > +static unsigned long check_mremap_params(unsigned long addr,
> > + unsigned long flags,
> > + unsigned long old_len,
> > + unsigned long new_len,
> > + unsigned long new_addr)
>
> If you use two tabs for the arguments this will be more compact and more
> readable.
Sure, but a later commits eliminates this :)
>
> > +{
> > + /* Ensure no unexpected flag values. */
> > + if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
> > + return -EINVAL;
> > +
> > + /* Start address must be page-aligned. */
> > + if (offset_in_page(addr))
> > + return -EINVAL;
> > +
> > + /*
> > + * We allow a zero old-len as a special case
> > + * for DOS-emu "duplicate shm area" thing. But
> > + * a zero new-len is nonsensical.
> > + */
> > + if (!PAGE_ALIGN(new_len))
> > + return -EINVAL;
> > +
> > + /* Remainder of checks are for cases with specific new_addr. */
> > + if (!implies_new_addr(flags))
> > + return 0;
> > +
> > + /* The new address must be page-aligned. */
> > + if (offset_in_page(new_addr))
> > + return -EINVAL;
> > +
> > + /* A fixed address implies a move. */
> > + if (!(flags & MREMAP_MAYMOVE))
> > + return -EINVAL;
> > +
> > + /* MREMAP_DONTUNMAP does not allow resizing in the process. */
> > + if (flags & MREMAP_DONTUNMAP && old_len != new_len)
> > + return -EINVAL;
> > +
> > + /*
> > + * move_vma() need us to stay 4 maps below the threshold, otherwise
> > + * it will bail out at the very beginning.
> > + * That is a problem if we have already unmaped the regions here
> > + * (new_addr, and old_addr), because userspace will not know the
> > + * state of the vma's after it gets -ENOMEM.
> > + * So, to avoid such scenario we can pre-compute if the whole
> > + * operation has high chances to success map-wise.
> > + * Worst-scenario case is when both vma's (new_addr and old_addr) get
> > + * split in 3 before unmapping it.
> > + * That means 2 more maps (1 for each) to the ones we already hold.
> > + * Check whether current map count plus 2 still leads us to 4 maps below
> > + * the threshold, otherwise return -ENOMEM here to be more safe.
> > + */
> > + if ((current->mm->map_count + 2) >= sysctl_max_map_count - 3)
> > + return -ENOMEM;
> > +
> > + return 0;
> > +}
> > +
> > +/*
> > + * We know we can expand the VMA in-place by delta pages, so do so.
> > + *
> > + * If we discover the VMA is locked, update mm_struct statistics accordingly and
> > + * indicate so to the caller.
> > + */
> > +static unsigned long expand_vma_inplace(struct vm_area_struct *vma,
> > + unsigned long delta, bool *locked)
> > +{
> > + struct mm_struct *mm = current->mm;
> > + long pages = delta >> PAGE_SHIFT;
> > + VMA_ITERATOR(vmi, mm, vma->vm_end);
> > + long charged = 0;
> > +
> > + if (vma->vm_flags & VM_ACCOUNT) {
> > + if (security_vm_enough_memory_mm(mm, pages))
> > + return -ENOMEM;
> > +
> > + charged = pages;
> > + }
> > +
> > + /*
> > + * Function vma_merge_extend() is called on the
> > + * extension we are adding to the already existing vma,
> > + * vma_merge_extend() will merge this extension with the
> > + * already existing vma (expand operation itself) and
> > + * possibly also with the next vma if it becomes
> > + * adjacent to the expanded vma and otherwise
> > + * compatible.
> > + */
> > + vma = vma_merge_extend(&vmi, vma, delta);
> > + if (!vma) {
> > + vm_unacct_memory(charged);
> > + return -ENOMEM;
> > + }
> > +
> > + vm_stat_account(mm, vma->vm_flags, pages);
> > + if (vma->vm_flags & VM_LOCKED) {
> > + mm->locked_vm += pages;
> > + *locked = true;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static bool align_hugetlb(struct vm_area_struct *vma,
> > + unsigned long addr,
> > + unsigned long new_addr,
> > + unsigned long *old_len_ptr,
> > + unsigned long *new_len_ptr,
> > + unsigned long *delta_ptr)
> > +{
> > + unsigned long old_len = *old_len_ptr;
> > + unsigned long new_len = *new_len_ptr;
> > + struct hstate *h __maybe_unused = hstate_vma(vma);
> > +
> > + old_len = ALIGN(old_len, huge_page_size(h));
> > + new_len = ALIGN(new_len, huge_page_size(h));
> > +
> > + /* addrs must be huge page aligned */
> > + if (addr & ~huge_page_mask(h))
> > + return false;
> > + if (new_addr & ~huge_page_mask(h))
> > + return false;
> > +
> > + /*
> > + * Don't allow remap expansion, because the underlying hugetlb
> > + * reservation is not yet capable to handle split reservation.
> > + */
> > + if (new_len > old_len)
> > + return false;
> > +
> > + *old_len_ptr = old_len;
> > + *new_len_ptr = new_len;
> > + *delta_ptr = abs_diff(old_len, new_len);
> > + return true;
> > +}
> > +
> > +/*
> > + * We are mremap()'ing without specifying a fixed address to move to, but are
> > + * requesting that the VMA's size be increased.
> > + *
> > + * Try to do so in-place, if this fails, then move the VMA to a new location to
> > + * action the change.
> > + */
> > +static unsigned long expand_vma(struct vm_area_struct *vma,
> > + unsigned long addr, unsigned long old_len,
> > + unsigned long new_len, unsigned long flags,
> > + bool *locked_ptr, unsigned long *new_addr_ptr,
> > + struct vm_userfaultfd_ctx *uf_ptr,
> > + struct list_head *uf_unmap_ptr)
> > +{
> > + unsigned long err;
> > + unsigned long map_flags;
> > + unsigned long new_addr; /* We ignore any user-supplied one. */
> > + pgoff_t pgoff;
> > +
> > + err = resize_is_valid(vma, addr, old_len, new_len, flags);
> > + if (err)
> > + return err;
> > +
> > + /*
> > + * [addr, old_len) spans precisely to the end of the VMA, so try to
> > + * expand it in-place.
> > + */
> > + if (old_len == vma->vm_end - addr &&
> > + vma_expandable(vma, new_len - old_len)) {
> > + err = expand_vma_inplace(vma, new_len - old_len, locked_ptr);
>
> You use delta twice here (new_len - old_len). I assume this is
> different in the next patches.
>
> > + if (IS_ERR_VALUE(err))
> > + return err;
>
> Doesn't expand_vma_inplace() return 0 on success, error otherwise?
Yeah, this is copying some already dubious logic from the original (trying to
_somewhat_ minimise the delta here).
At any rate, a later commit addresses this!
>
> > +
> > + /*
> > + * We want to populate the newly expanded portion of the VMA to
> > + * satisfy the expectation that mlock()'ing a VMA maintains all
> > + * of its pages in memory.
> > + */
> > + if (*locked_ptr)
> > + *new_addr_ptr = addr;
> > +
> > + /* OK we're done! */
> > + return addr;
> > + }
> > +
> > + /*
> > + * We weren't able to just expand or shrink the area,
> > + * we need to create a new one and move it.
> > + */
> > +
>
> So it's more of an expand_or_move_vma()?
I think that's misleading, because it would be
expand_or_move_and_expand_vma() or expand_in_place_or_move_and_expand()...
Unavoidably we have to abbreviate, I tried to differentiate between the two
cases with the 'in place' stuff.
So we _try_ to do it in place, if not we have to expand elsewhere.
>
> > + /* We're not allowed to move the VMA, so error out. */
> > + if (!(flags & MREMAP_MAYMOVE))
> > + return -ENOMEM;
>
> and by flags we mean... the flags from the syscall. This gets confusing
> with map_flags. At least there's only two and not six flags.
3 flags (MREMAP_FIXED, MREMAP_MAYMOVE, MREMAP_DONTUNMAP) :>)
This becomes clearer later, I'm not sure saying mremap_flags really adds
anything but noise though.
>
> > +
> > + /* Find a new location to move the VMA to. */
> > + map_flags = (vma->vm_flags & VM_MAYSHARE) ? MAP_SHARED : 0;
> > + pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
> > + new_addr = get_unmapped_area(vma->vm_file, 0, new_len, pgoff, map_flags);
> > + if (IS_ERR_VALUE(new_addr))
> > + return new_addr;
> > + *new_addr_ptr = new_addr;
> > +
> > + return move_vma(vma, addr, old_len, new_len, new_addr,
> > + locked_ptr, flags, uf_ptr, uf_unmap_ptr);
> > +}
> > +
> > /*
> > * Expand (or shrink) an existing mapping, potentially moving it at the
> > * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
> > @@ -1048,7 +1241,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
> > {
> > struct mm_struct *mm = current->mm;
> > struct vm_area_struct *vma;
> > - unsigned long ret = -EINVAL;
> > + unsigned long ret;
> > + unsigned long delta;
> > bool locked = false;
> > struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
> > LIST_HEAD(uf_unmap_early);
> > @@ -1067,70 +1261,38 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
> > */
> > addr = untagged_addr(addr);
> >
> > - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
> > - return ret;
> > -
> > - if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
> > - return ret;
> > -
> > - /*
> > - * MREMAP_DONTUNMAP is always a move and it does not allow resizing
> > - * in the process.
> > - */
> > - if (flags & MREMAP_DONTUNMAP &&
> > - (!(flags & MREMAP_MAYMOVE) || old_len != new_len))
> > - return ret;
> > -
> > -
> > - if (offset_in_page(addr))
> > + ret = check_mremap_params(addr, flags, old_len, new_len, new_addr);
> > + if (ret)
> > return ret;
> >
> > old_len = PAGE_ALIGN(old_len);
> > new_len = PAGE_ALIGN(new_len);
> > + delta = abs_diff(old_len, new_len);
> >
> > - /*
> > - * We allow a zero old-len as a special case
> > - * for DOS-emu "duplicate shm area" thing. But
> > - * a zero new-len is nonsensical.
> > - */
> > - if (!new_len)
> > - return ret;
> > -
> > - if (mmap_write_lock_killable(current->mm))
> > + if (mmap_write_lock_killable(mm))
> > return -EINTR;
> > +
> > vma = vma_lookup(mm, addr);
> > if (!vma) {
> > ret = -EFAULT;
> > goto out;
> > }
> >
> > - /* Don't allow remapping vmas when they have already been sealed */
> > + /* If mseal()'d, mremap() is prohibited. */
> > if (!can_modify_vma(vma)) {
> > ret = -EPERM;
> > goto out;
> > }
>
> This could be delayed to the munmap() call, which will also check to
> ensure this would fail.
>
> It doesn't really cost anything though so I don't think it's worth it
> here. Maybe something we can keep in mind for the future...
Happy to address but I think would be better in a later commit, as this one
is more like 'keep things the same but restructure', later commits do
deeper changes.
>
> >
> > - if (is_vm_hugetlb_page(vma)) {
> > - struct hstate *h __maybe_unused = hstate_vma(vma);
> > -
> > - old_len = ALIGN(old_len, huge_page_size(h));
> > - new_len = ALIGN(new_len, huge_page_size(h));
> > -
> > - /* addrs must be huge page aligned */
> > - if (addr & ~huge_page_mask(h))
> > - goto out;
> > - if (new_addr & ~huge_page_mask(h))
> > - goto out;
> > -
> > - /*
> > - * Don't allow remap expansion, because the underlying hugetlb
> > - * reservation is not yet capable to handle split reservation.
> > - */
> > - if (new_len > old_len)
> > - goto out;
> > + /* Align to hugetlb page size, if required. */
> > + if (is_vm_hugetlb_page(vma) &&
> > + !align_hugetlb(vma, addr, new_addr, &old_len, &new_len, &delta)) {
> > + ret = -EINVAL;
> > + goto out;
> > }
> >
> > - if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) {
> > + /* Are we RELOCATING the VMA to a SPECIFIC address? */
> > + if (implies_new_addr(flags)) {
> > ret = mremap_to(addr, old_len, new_addr, new_len,
> > &locked, flags, &uf, &uf_unmap_early,
> > &uf_unmap);
> > @@ -1138,109 +1300,44 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
> > }
> >
> > /*
> > - * Always allow a shrinking remap: that just unmaps
> > - * the unnecessary pages..
> > - * do_vmi_munmap does all the needed commit accounting, and
> > - * unlocks the mmap_lock if so directed.
> > + * From here on in we are only RESIZING the VMA, attempting to do so
> > + * in-place, moving the VMA if we cannot.
> > */
> > - if (old_len >= new_len) {
> > - VMA_ITERATOR(vmi, mm, addr + new_len);
> >
> > - if (old_len == new_len) {
> > - ret = addr;
> > - goto out;
> > - }
> > + /* NO-OP CASE - resizing to the same size. */
> > + if (new_len == old_len) {
> > + ret = addr;
> > + goto out;
> > + }
> > +
> > + /* SHRINK CASE. Can always be done in-place. */
> > + if (new_len < old_len) {
> > + VMA_ITERATOR(vmi, mm, addr + new_len);
> >
> > - ret = do_vmi_munmap(&vmi, mm, addr + new_len, old_len - new_len,
> > + /*
> > + * Simply unmap the shrunken portion of the VMA. This does all
> > + * the needed commit accounting, unlocking the mmap lock.
> > + */
> > + ret = do_vmi_munmap(&vmi, mm, addr + new_len, delta,
> > &uf_unmap, true);
> > if (ret)
> > goto out;
> >
> > + /* We succeeded, mmap lock released for us. */
> > ret = addr;
> > goto out_unlocked;
> > }
> >
> > - /*
> > - * Ok, we need to grow..
> > - */
> > - ret = resize_is_valid(vma, addr, old_len, new_len, flags);
> > - if (ret)
> > - goto out;
> > -
> > - /* old_len exactly to the end of the area..
> > - */
> > - if (old_len == vma->vm_end - addr) {
> > - unsigned long delta = new_len - old_len;
> > -
> > - /* can we just expand the current mapping? */
> > - if (vma_expandable(vma, delta)) {
> > - long pages = delta >> PAGE_SHIFT;
> > - VMA_ITERATOR(vmi, mm, vma->vm_end);
> > - long charged = 0;
> > -
> > - if (vma->vm_flags & VM_ACCOUNT) {
> > - if (security_vm_enough_memory_mm(mm, pages)) {
> > - ret = -ENOMEM;
> > - goto out;
> > - }
> > - charged = pages;
> > - }
> > -
> > - /*
> > - * Function vma_merge_extend() is called on the
> > - * extension we are adding to the already existing vma,
> > - * vma_merge_extend() will merge this extension with the
> > - * already existing vma (expand operation itself) and
> > - * possibly also with the next vma if it becomes
> > - * adjacent to the expanded vma and otherwise
> > - * compatible.
> > - */
> > - vma = vma_merge_extend(&vmi, vma, delta);
> > - if (!vma) {
> > - vm_unacct_memory(charged);
> > - ret = -ENOMEM;
> > - goto out;
> > - }
> > + /* EXPAND case. We try to do in-place, if we can't, then we move it. */
> > + ret = expand_vma(vma, addr, old_len, new_len, flags, &locked, &new_addr,
> > + &uf, &uf_unmap);
> >
> > - vm_stat_account(mm, vma->vm_flags, pages);
> > - if (vma->vm_flags & VM_LOCKED) {
> > - mm->locked_vm += pages;
> > - locked = true;
> > - new_addr = addr;
> > - }
> > - ret = addr;
> > - goto out;
> > - }
> > - }
> > -
> > - /*
> > - * We weren't able to just expand or shrink the area,
> > - * we need to create a new one and move it..
> > - */
> > - ret = -ENOMEM;
> > - if (flags & MREMAP_MAYMOVE) {
> > - unsigned long map_flags = 0;
> > - if (vma->vm_flags & VM_MAYSHARE)
> > - map_flags |= MAP_SHARED;
> > -
> > - new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
> > - vma->vm_pgoff +
> > - ((addr - vma->vm_start) >> PAGE_SHIFT),
> > - map_flags);
> > - if (IS_ERR_VALUE(new_addr)) {
> > - ret = new_addr;
> > - goto out;
> > - }
> > -
> > - ret = move_vma(vma, addr, old_len, new_len, new_addr,
> > - &locked, flags, &uf, &uf_unmap);
> > - }
> > out:
> > if (offset_in_page(ret))
> > locked = false;
> > - mmap_write_unlock(current->mm);
> > + mmap_write_unlock(mm);
> > if (locked && new_len > old_len)
> > - mm_populate(new_addr + old_len, new_len - old_len);
> > + mm_populate(new_addr + old_len, delta);
> > out_unlocked:
> > userfaultfd_unmap_complete(mm, &uf_unmap_early);
> > mremap_userfaultfd_complete(&uf, addr, ret, old_len);
> > --
> > 2.48.1
> >
* Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [250303 12:21]:
> Thanks for taking a look! :) I know this one is a bit painful...
>
> On Mon, Mar 03, 2025 at 12:12:07PM -0500, Liam R. Howlett wrote:
> > * Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [250303 06:08]:
> > > Place checks into a separate function so the mremap() system call is less
> > > egregiously long, remove unnecessary mremap_to() offset_in_page() check and
> > > just check that earlier so we keep all such basic checks together.
> > >
> > > Separate out the VMA in-place expansion, hugetlb and expand/move logic into
> > > separate, readable functions.
> > >
> > > De-duplicate code where possible, add comments and ensure that all error
> > > handling explicitly specifies the error at the point of it occurring rather
> > > than setting a prefixed error value and implicitly setting (which is bug
> > > prone).
> > >
> > > This lays the groundwork for subsequent patches further simplifying and
> > > extending the mremap() implementation.
> > >
> > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > > ---
> > > mm/mremap.c | 405 ++++++++++++++++++++++++++++++++--------------------
> > > 1 file changed, 251 insertions(+), 154 deletions(-)
> > >
> > > diff --git a/mm/mremap.c b/mm/mremap.c
> > > index c3e4c86d0b8d..c4abda8dfc57 100644
> > > --- a/mm/mremap.c
> > > +++ b/mm/mremap.c
> > > @@ -942,33 +942,14 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
> > > unsigned long ret;
> > > unsigned long map_flags = 0;
> > >
> > > - if (offset_in_page(new_addr))
> > > - return -EINVAL;
> > > -
> > > + /* Is the new length or address silly? */
> > > if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
> > > return -EINVAL;
> > >
> > > - /* Ensure the old/new locations do not overlap */
> > > + /* Ensure the old/new locations do not overlap. */
> > > if (addr + old_len > new_addr && new_addr + new_len > addr)
> > > return -EINVAL;
> > >
> > > - /*
> > > - * move_vma() need us to stay 4 maps below the threshold, otherwise
> > > - * it will bail out at the very beginning.
> > > - * That is a problem if we have already unmaped the regions here
> > > - * (new_addr, and old_addr), because userspace will not know the
> > > - * state of the vma's after it gets -ENOMEM.
> > > - * So, to avoid such scenario we can pre-compute if the whole
> > > - * operation has high chances to success map-wise.
> > > - * Worst-scenario case is when both vma's (new_addr and old_addr) get
> > > - * split in 3 before unmapping it.
> > > - * That means 2 more maps (1 for each) to the ones we already hold.
> > > - * Check whether current map count plus 2 still leads us to 4 maps below
> > > - * the threshold, otherwise return -ENOMEM here to be more safe.
> > > - */
> > > - if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
> > > - return -ENOMEM;
> > > -
> > > if (flags & MREMAP_FIXED) {
> > > /*
> > > * In mremap_to().
> > > @@ -1035,6 +1016,218 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
> > > return 1;
> > > }
> > >
> > > +/* Do the mremap() flags require that the new_addr parameter be specified? */
> > > +static bool implies_new_addr(unsigned long flags)
> > > +{
> > > + return flags & (MREMAP_FIXED | MREMAP_DONTUNMAP);
> > > +}
> > > +
> > > +/*
> > > + * Are the parameters passed to mremap() valid? If so return 0, otherwise return
> > > + * error.
> > > + */
> > > +static unsigned long check_mremap_params(unsigned long addr,
> > > + unsigned long flags,
> > > + unsigned long old_len,
> > > + unsigned long new_len,
> > > + unsigned long new_addr)
> >
> > If you use two tabs for the arguments this will be more compact and more
> > readable.
>
> Sure, but a later commits eliminates this :)
Perfect.
>
> >
> > > +{
> > > + /* Ensure no unexpected flag values. */
> > > + if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
> > > + return -EINVAL;
> > > +
> > > + /* Start address must be page-aligned. */
> > > + if (offset_in_page(addr))
> > > + return -EINVAL;
> > > +
> > > + /*
> > > + * We allow a zero old-len as a special case
> > > + * for DOS-emu "duplicate shm area" thing. But
> > > + * a zero new-len is nonsensical.
> > > + */
> > > + if (!PAGE_ALIGN(new_len))
> > > + return -EINVAL;
> > > +
> > > + /* Remainder of checks are for cases with specific new_addr. */
> > > + if (!implies_new_addr(flags))
> > > + return 0;
> > > +
> > > + /* The new address must be page-aligned. */
> > > + if (offset_in_page(new_addr))
> > > + return -EINVAL;
> > > +
> > > + /* A fixed address implies a move. */
> > > + if (!(flags & MREMAP_MAYMOVE))
> > > + return -EINVAL;
> > > +
> > > + /* MREMAP_DONTUNMAP does not allow resizing in the process. */
> > > + if (flags & MREMAP_DONTUNMAP && old_len != new_len)
> > > + return -EINVAL;
> > > +
> > > + /*
> > > + * move_vma() need us to stay 4 maps below the threshold, otherwise
> > > + * it will bail out at the very beginning.
> > > + * That is a problem if we have already unmaped the regions here
> > > + * (new_addr, and old_addr), because userspace will not know the
> > > + * state of the vma's after it gets -ENOMEM.
> > > + * So, to avoid such scenario we can pre-compute if the whole
> > > + * operation has high chances to success map-wise.
> > > + * Worst-scenario case is when both vma's (new_addr and old_addr) get
> > > + * split in 3 before unmapping it.
> > > + * That means 2 more maps (1 for each) to the ones we already hold.
> > > + * Check whether current map count plus 2 still leads us to 4 maps below
> > > + * the threshold, otherwise return -ENOMEM here to be more safe.
> > > + */
> > > + if ((current->mm->map_count + 2) >= sysctl_max_map_count - 3)
> > > + return -ENOMEM;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +/*
> > > + * We know we can expand the VMA in-place by delta pages, so do so.
> > > + *
> > > + * If we discover the VMA is locked, update mm_struct statistics accordingly and
> > > + * indicate so to the caller.
> > > + */
> > > +static unsigned long expand_vma_inplace(struct vm_area_struct *vma,
> > > + unsigned long delta, bool *locked)
> > > +{
> > > + struct mm_struct *mm = current->mm;
> > > + long pages = delta >> PAGE_SHIFT;
> > > + VMA_ITERATOR(vmi, mm, vma->vm_end);
> > > + long charged = 0;
> > > +
> > > + if (vma->vm_flags & VM_ACCOUNT) {
> > > + if (security_vm_enough_memory_mm(mm, pages))
> > > + return -ENOMEM;
> > > +
> > > + charged = pages;
> > > + }
> > > +
> > > + /*
> > > + * Function vma_merge_extend() is called on the
> > > + * extension we are adding to the already existing vma,
> > > + * vma_merge_extend() will merge this extension with the
> > > + * already existing vma (expand operation itself) and
> > > + * possibly also with the next vma if it becomes
> > > + * adjacent to the expanded vma and otherwise
> > > + * compatible.
> > > + */
> > > + vma = vma_merge_extend(&vmi, vma, delta);
> > > + if (!vma) {
> > > + vm_unacct_memory(charged);
> > > + return -ENOMEM;
> > > + }
> > > +
> > > + vm_stat_account(mm, vma->vm_flags, pages);
> > > + if (vma->vm_flags & VM_LOCKED) {
> > > + mm->locked_vm += pages;
> > > + *locked = true;
> > > + }
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static bool align_hugetlb(struct vm_area_struct *vma,
> > > + unsigned long addr,
> > > + unsigned long new_addr,
> > > + unsigned long *old_len_ptr,
> > > + unsigned long *new_len_ptr,
> > > + unsigned long *delta_ptr)
> > > +{
> > > + unsigned long old_len = *old_len_ptr;
> > > + unsigned long new_len = *new_len_ptr;
> > > + struct hstate *h __maybe_unused = hstate_vma(vma);
> > > +
> > > + old_len = ALIGN(old_len, huge_page_size(h));
> > > + new_len = ALIGN(new_len, huge_page_size(h));
> > > +
> > > + /* addrs must be huge page aligned */
> > > + if (addr & ~huge_page_mask(h))
> > > + return false;
> > > + if (new_addr & ~huge_page_mask(h))
> > > + return false;
> > > +
> > > + /*
> > > + * Don't allow remap expansion, because the underlying hugetlb
> > > + * reservation is not yet capable to handle split reservation.
> > > + */
> > > + if (new_len > old_len)
> > > + return false;
> > > +
> > > + *old_len_ptr = old_len;
> > > + *new_len_ptr = new_len;
> > > + *delta_ptr = abs_diff(old_len, new_len);
> > > + return true;
> > > +}
> > > +
> > > +/*
> > > + * We are mremap()'ing without specifying a fixed address to move to, but are
> > > + * requesting that the VMA's size be increased.
> > > + *
> > > + * Try to do so in-place, if this fails, then move the VMA to a new location to
> > > + * action the change.
> > > + */
> > > +static unsigned long expand_vma(struct vm_area_struct *vma,
> > > + unsigned long addr, unsigned long old_len,
> > > + unsigned long new_len, unsigned long flags,
> > > + bool *locked_ptr, unsigned long *new_addr_ptr,
> > > + struct vm_userfaultfd_ctx *uf_ptr,
> > > + struct list_head *uf_unmap_ptr)
> > > +{
> > > + unsigned long err;
> > > + unsigned long map_flags;
> > > + unsigned long new_addr; /* We ignore any user-supplied one. */
> > > + pgoff_t pgoff;
> > > +
> > > + err = resize_is_valid(vma, addr, old_len, new_len, flags);
> > > + if (err)
> > > + return err;
> > > +
> > > + /*
> > > + * [addr, old_len) spans precisely to the end of the VMA, so try to
> > > + * expand it in-place.
> > > + */
> > > + if (old_len == vma->vm_end - addr &&
> > > + vma_expandable(vma, new_len - old_len)) {
> > > + err = expand_vma_inplace(vma, new_len - old_len, locked_ptr);
> >
> > You use delta twice here (new_len - old_len). I assume this is
> > different in the next patches.
> >
> > > + if (IS_ERR_VALUE(err))
> > > + return err;
> >
> > Doesn't expand_vma_inplace() return 0 on success, error otherwise?
>
> Yeah, this is copying some already dubious logic from the original (trying to
> _somewhat_ minimise the delta here).
>
> At any rate, a later commit addresses this!
Thanks.
>
> >
> > > +
> > > + /*
> > > + * We want to populate the newly expanded portion of the VMA to
> > > + * satisfy the expectation that mlock()'ing a VMA maintains all
> > > + * of its pages in memory.
> > > + */
> > > + if (*locked_ptr)
> > > + *new_addr_ptr = addr;
> > > +
> > > + /* OK we're done! */
> > > + return addr;
> > > + }
> > > +
> > > + /*
> > > + * We weren't able to just expand or shrink the area,
> > > + * we need to create a new one and move it.
> > > + */
> > > +
> >
> > So it's more of an expand_or_move_vma()?
>
> I think that's misleading, because it would be
> expand_or_move_and_expand_vma() or expand_in_place_or_move_and_expand()...
>
> Unavoidably we have to abbreviate, I tried to differentiate between the two
> cases with the 'in place' stuff.
>
> So we _try_ to do it in place, if not we have to expand elsewhere.
Fair enough.
>
> >
> > > + /* We're not allowed to move the VMA, so error out. */
> > > + if (!(flags & MREMAP_MAYMOVE))
> > > + return -ENOMEM;
> >
> > and by flags we mean... the flags from the syscall. This gets confusing
> > with map_flags. At least there's only two and not six flags.
>
> 3 flags (MREMAP_FIXED, MREMAP_MAYMOVE, MREMAP_DONTUNMAP) :>)
>
> This becomes clearer later, I'm not sure saying mremap_flags really adds
> anything but noise though.
Okay.
>
> >
> > > +
> > > + /* Find a new location to move the VMA to. */
> > > + map_flags = (vma->vm_flags & VM_MAYSHARE) ? MAP_SHARED : 0;
> > > + pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
> > > + new_addr = get_unmapped_area(vma->vm_file, 0, new_len, pgoff, map_flags);
> > > + if (IS_ERR_VALUE(new_addr))
> > > + return new_addr;
> > > + *new_addr_ptr = new_addr;
> > > +
> > > + return move_vma(vma, addr, old_len, new_len, new_addr,
> > > + locked_ptr, flags, uf_ptr, uf_unmap_ptr);
> > > +}
> > > +
> > > /*
> > > * Expand (or shrink) an existing mapping, potentially moving it at the
> > > * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
> > > @@ -1048,7 +1241,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
> > > {
> > > struct mm_struct *mm = current->mm;
> > > struct vm_area_struct *vma;
> > > - unsigned long ret = -EINVAL;
> > > + unsigned long ret;
> > > + unsigned long delta;
> > > bool locked = false;
> > > struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
> > > LIST_HEAD(uf_unmap_early);
> > > @@ -1067,70 +1261,38 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
> > > */
> > > addr = untagged_addr(addr);
> > >
> > > - if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
> > > - return ret;
> > > -
> > > - if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
> > > - return ret;
> > > -
> > > - /*
> > > - * MREMAP_DONTUNMAP is always a move and it does not allow resizing
> > > - * in the process.
> > > - */
> > > - if (flags & MREMAP_DONTUNMAP &&
> > > - (!(flags & MREMAP_MAYMOVE) || old_len != new_len))
> > > - return ret;
> > > -
> > > -
> > > - if (offset_in_page(addr))
> > > + ret = check_mremap_params(addr, flags, old_len, new_len, new_addr);
> > > + if (ret)
> > > return ret;
> > >
> > > old_len = PAGE_ALIGN(old_len);
> > > new_len = PAGE_ALIGN(new_len);
> > > + delta = abs_diff(old_len, new_len);
> > >
> > > - /*
> > > - * We allow a zero old-len as a special case
> > > - * for DOS-emu "duplicate shm area" thing. But
> > > - * a zero new-len is nonsensical.
> > > - */
> > > - if (!new_len)
> > > - return ret;
> > > -
> > > - if (mmap_write_lock_killable(current->mm))
> > > + if (mmap_write_lock_killable(mm))
> > > return -EINTR;
> > > +
> > > vma = vma_lookup(mm, addr);
> > > if (!vma) {
> > > ret = -EFAULT;
> > > goto out;
> > > }
> > >
> > > - /* Don't allow remapping vmas when they have already been sealed */
> > > + /* If mseal()'d, mremap() is prohibited. */
> > > if (!can_modify_vma(vma)) {
> > > ret = -EPERM;
> > > goto out;
> > > }
> >
> > This could be delayed to the munmap() call, which will also check to
> > ensure this would fail.
> >
> > It doesn't really cost anything though so I don't think it's worth it
> > here. Maybe something we can keep in mind for the future...
>
> Happy to address but I think would be better in a later commit, as this one
> is more like 'keep things the same but restructure', later commits do
> deeper changes.
Right, yes. Me too.
...
Thanks,
Liam
© 2016 - 2026 Red Hat, Inc.