mm/userfaultfd.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-)
mwriteprotect_range() errors out if [start, end) doesn't fall in one
VMA. We are facing a use case where multiple VMAs are present in one
range of interest. For example, the following pseudocode reproduces the
error which we are trying to fix:
- Allocate memory of size 16 pages with PROT_NONE with mmap
- Register userfaultfd
- Change protection of the first half (1 to 8 pages) of memory to
PROT_READ | PROT_WRITE. This breaks the memory area in two VMAs.
- Now UFFDIO_WRITEPROTECT_MODE_WP on the whole memory of 16 pages errors
out.
This is a simple use case where user may or may not know if the memory
area has been divided into multiple VMAs.
Reported-by: Paul Gofman <pgofman@codeweavers.com>
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
---
mm/userfaultfd.c | 36 +++++++++++++++++++-----------------
1 file changed, 19 insertions(+), 17 deletions(-)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 65ad172add27..46e0a014af68 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -738,9 +738,11 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool enable_wp,
atomic_t *mmap_changing)
{
+ unsigned long end = start + len;
struct vm_area_struct *dst_vma;
unsigned long page_mask;
int err;
+ VMA_ITERATOR(vmi, dst_mm, start);
/*
* Sanitize the command parameters:
@@ -762,26 +764,26 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
if (mmap_changing && atomic_read(mmap_changing))
goto out_unlock;
- err = -ENOENT;
- dst_vma = find_dst_vma(dst_mm, start, len);
-
- if (!dst_vma)
- goto out_unlock;
- if (!userfaultfd_wp(dst_vma))
- goto out_unlock;
- if (!vma_can_userfault(dst_vma, dst_vma->vm_flags))
- goto out_unlock;
+ for_each_vma_range(vmi, dst_vma, end) {
+ err = -ENOENT;
- if (is_vm_hugetlb_page(dst_vma)) {
- err = -EINVAL;
- page_mask = vma_kernel_pagesize(dst_vma) - 1;
- if ((start & page_mask) || (len & page_mask))
- goto out_unlock;
- }
+ if (!dst_vma->vm_userfaultfd_ctx.ctx)
+ break;
+ if (!userfaultfd_wp(dst_vma))
+ break;
+ if (!vma_can_userfault(dst_vma, dst_vma->vm_flags))
+ break;
- uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp);
+ if (is_vm_hugetlb_page(dst_vma)) {
+ err = -EINVAL;
+ page_mask = vma_kernel_pagesize(dst_vma) - 1;
+ if ((start & page_mask) || (len & page_mask))
+ break;
+ }
- err = 0;
+ uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp);
+ err = 0;
+ }
out_unlock:
mmap_read_unlock(dst_mm);
return err;
--
2.39.1
On 13.02.23 11:43, Muhammad Usama Anjum wrote: > mwriteprotect_range() errors out if [start, end) doesn't fall in one > VMA. We are facing a use case where multiple VMAs are present in one > range of interest. For example, the following pseudocode reproduces the > error which we are trying to fix: > > - Allocate memory of size 16 pages with PROT_NONE with mmap > - Register userfaultfd > - Change protection of the first half (1 to 8 pages) of memory to > PROT_READ | PROT_WRITE. This breaks the memory area in two VMAs. > - Now UFFDIO_WRITEPROTECT_MODE_WP on the whole memory of 16 pages errors > out. > > This is a simple use case where user may or may not know if the memory > area has been divided into multiple VMAs. > > Reported-by: Paul Gofman <pgofman@codeweavers.com> > Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com> > --- > mm/userfaultfd.c | 36 +++++++++++++++++++----------------- > 1 file changed, 19 insertions(+), 17 deletions(-) > > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c > index 65ad172add27..46e0a014af68 100644 > --- a/mm/userfaultfd.c > +++ b/mm/userfaultfd.c > @@ -738,9 +738,11 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, > unsigned long len, bool enable_wp, > atomic_t *mmap_changing) > { > + unsigned long end = start + len; > struct vm_area_struct *dst_vma; > unsigned long page_mask; > int err; > + VMA_ITERATOR(vmi, dst_mm, start); > > /* > * Sanitize the command parameters: > @@ -762,26 +764,26 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, > if (mmap_changing && atomic_read(mmap_changing)) > goto out_unlock; > > - err = -ENOENT; > - dst_vma = find_dst_vma(dst_mm, start, len); > - > - if (!dst_vma) > - goto out_unlock; > - if (!userfaultfd_wp(dst_vma)) > - goto out_unlock; > - if (!vma_can_userfault(dst_vma, dst_vma->vm_flags)) > - goto out_unlock; > + for_each_vma_range(vmi, dst_vma, end) { > + err = -ENOENT; > > - if (is_vm_hugetlb_page(dst_vma)) { > - err = -EINVAL; > - page_mask = vma_kernel_pagesize(dst_vma) - 1; > - if ((start & page_mask) || (len & page_mask)) > - goto out_unlock; > - } > + if (!dst_vma->vm_userfaultfd_ctx.ctx) > + break; > + if (!userfaultfd_wp(dst_vma)) > + break; > + if (!vma_can_userfault(dst_vma, dst_vma->vm_flags)) > + break; > > - uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); > + if (is_vm_hugetlb_page(dst_vma)) { > + err = -EINVAL; > + page_mask = vma_kernel_pagesize(dst_vma) - 1; > + if ((start & page_mask) || (len & page_mask)) > + break; > + } > > - err = 0; > + uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); I suspect you should be adjusting the range to only cover that specific VMA here. -- Thanks, David / dhildenb
Hi David, Thank you for quick review! On 2/13/23 4:44 PM, David Hildenbrand wrote: > On 13.02.23 11:43, Muhammad Usama Anjum wrote: >> mwriteprotect_range() errors out if [start, end) doesn't fall in one >> VMA. We are facing a use case where multiple VMAs are present in one >> range of interest. For example, the following pseudocode reproduces the >> error which we are trying to fix: >> >> - Allocate memory of size 16 pages with PROT_NONE with mmap >> - Register userfaultfd >> - Change protection of the first half (1 to 8 pages) of memory to >> PROT_READ | PROT_WRITE. This breaks the memory area in two VMAs. >> - Now UFFDIO_WRITEPROTECT_MODE_WP on the whole memory of 16 pages errors >> out. >> >> This is a simple use case where user may or may not know if the memory >> area has been divided into multiple VMAs. >> >> Reported-by: Paul Gofman <pgofman@codeweavers.com> >> Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com> >> --- >> mm/userfaultfd.c | 36 +++++++++++++++++++----------------- >> 1 file changed, 19 insertions(+), 17 deletions(-) >> >> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c >> index 65ad172add27..46e0a014af68 100644 >> --- a/mm/userfaultfd.c >> +++ b/mm/userfaultfd.c >> @@ -738,9 +738,11 @@ int mwriteprotect_range(struct mm_struct *dst_mm, >> unsigned long start, >> unsigned long len, bool enable_wp, >> atomic_t *mmap_changing) >> { >> + unsigned long end = start + len; >> struct vm_area_struct *dst_vma; >> unsigned long page_mask; >> int err; >> + VMA_ITERATOR(vmi, dst_mm, start); >> /* >> * Sanitize the command parameters: >> @@ -762,26 +764,26 @@ int mwriteprotect_range(struct mm_struct *dst_mm, >> unsigned long start, >> if (mmap_changing && atomic_read(mmap_changing)) >> goto out_unlock; >> - err = -ENOENT; >> - dst_vma = find_dst_vma(dst_mm, start, len); >> - >> - if (!dst_vma) >> - goto out_unlock; >> - if (!userfaultfd_wp(dst_vma)) >> - goto out_unlock; >> - if (!vma_can_userfault(dst_vma, dst_vma->vm_flags)) >> - goto out_unlock; >> + for_each_vma_range(vmi, dst_vma, end) { >> + err = -ENOENT; >> - if (is_vm_hugetlb_page(dst_vma)) { >> - err = -EINVAL; >> - page_mask = vma_kernel_pagesize(dst_vma) - 1; >> - if ((start & page_mask) || (len & page_mask)) >> - goto out_unlock; >> - } >> + if (!dst_vma->vm_userfaultfd_ctx.ctx) >> + break; >> + if (!userfaultfd_wp(dst_vma)) >> + break; >> + if (!vma_can_userfault(dst_vma, dst_vma->vm_flags)) >> + break; >> - uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); >> + if (is_vm_hugetlb_page(dst_vma)) { >> + err = -EINVAL; >> + page_mask = vma_kernel_pagesize(dst_vma) - 1; >> + if ((start & page_mask) || (len & page_mask)) >> + break; >> + } >> - err = 0; >> + uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); > > I suspect you should be adjusting the range to only cover that specific VMA > here. Sorry, you are right. I don't know why it is still working with the blunder. Will send a v2. Thanks, Usama -- BR, Muhammad Usama Anjum
On 13.02.23 16:04, Muhammad Usama Anjum wrote: > Hi David, > > Thank you for quick review! > > On 2/13/23 4:44 PM, David Hildenbrand wrote: >> On 13.02.23 11:43, Muhammad Usama Anjum wrote: >>> mwriteprotect_range() errors out if [start, end) doesn't fall in one >>> VMA. We are facing a use case where multiple VMAs are present in one >>> range of interest. For example, the following pseudocode reproduces the >>> error which we are trying to fix: >>> >>> - Allocate memory of size 16 pages with PROT_NONE with mmap >>> - Register userfaultfd >>> - Change protection of the first half (1 to 8 pages) of memory to >>> PROT_READ | PROT_WRITE. This breaks the memory area in two VMAs. >>> - Now UFFDIO_WRITEPROTECT_MODE_WP on the whole memory of 16 pages errors >>> out. >>> >>> This is a simple use case where user may or may not know if the memory >>> area has been divided into multiple VMAs. >>> >>> Reported-by: Paul Gofman <pgofman@codeweavers.com> >>> Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com> >>> --- >>> mm/userfaultfd.c | 36 +++++++++++++++++++----------------- >>> 1 file changed, 19 insertions(+), 17 deletions(-) >>> >>> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c >>> index 65ad172add27..46e0a014af68 100644 >>> --- a/mm/userfaultfd.c >>> +++ b/mm/userfaultfd.c >>> @@ -738,9 +738,11 @@ int mwriteprotect_range(struct mm_struct *dst_mm, >>> unsigned long start, >>> unsigned long len, bool enable_wp, >>> atomic_t *mmap_changing) >>> { >>> + unsigned long end = start + len; >>> struct vm_area_struct *dst_vma; >>> unsigned long page_mask; >>> int err; >>> + VMA_ITERATOR(vmi, dst_mm, start); >>> /* >>> * Sanitize the command parameters: >>> @@ -762,26 +764,26 @@ int mwriteprotect_range(struct mm_struct *dst_mm, >>> unsigned long start, >>> if (mmap_changing && atomic_read(mmap_changing)) >>> goto out_unlock; >>> - err = -ENOENT; >>> - dst_vma = find_dst_vma(dst_mm, start, len); >>> - >>> - if (!dst_vma) >>> - goto out_unlock; >>> - if (!userfaultfd_wp(dst_vma)) >>> - goto out_unlock; >>> - if (!vma_can_userfault(dst_vma, dst_vma->vm_flags)) >>> - goto out_unlock; >>> + for_each_vma_range(vmi, dst_vma, end) { >>> + err = -ENOENT; >>> - if (is_vm_hugetlb_page(dst_vma)) { >>> - err = -EINVAL; >>> - page_mask = vma_kernel_pagesize(dst_vma) - 1; >>> - if ((start & page_mask) || (len & page_mask)) >>> - goto out_unlock; >>> - } >>> + if (!dst_vma->vm_userfaultfd_ctx.ctx) >>> + break; >>> + if (!userfaultfd_wp(dst_vma)) >>> + break; >>> + if (!vma_can_userfault(dst_vma, dst_vma->vm_flags)) >>> + break; >>> - uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); >>> + if (is_vm_hugetlb_page(dst_vma)) { >>> + err = -EINVAL; >>> + page_mask = vma_kernel_pagesize(dst_vma) - 1; >>> + if ((start & page_mask) || (len & page_mask)) >>> + break; >>> + } >>> - err = 0; >>> + uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp); >> >> I suspect you should be adjusting the range to only cover that specific VMA >> here. > Sorry, you are right. I don't know why it is still working with the > blunder. Will send a v2. Maybe worth adding some sanity checks (VM_WARN_ONCE()) in there (e.g., change_protection()) to catch that. -- Thanks, David / dhildenb
© 2016 - 2025 Red Hat, Inc.