[PATCH 3/4] linux-user: fix reserved_va page leak in do_munmap

Matthew Lugg posted 4 patches 1 month ago
Maintainers: Laurent Vivier <laurent@vivier.eu>, "Alex Bennée" <alex.bennee@linaro.org>
[PATCH 3/4] linux-user: fix reserved_va page leak in do_munmap
Posted by Matthew Lugg 1 month ago
The previous logic here had an off-by-one error: assuming 4k pages on
host and guest, if `len == 4097` (indicating to unmap 2 pages), then
`last = start + 4096`, so `real_last = start + 4095`, so ultimately
`real_len = 4096`. I do not believe this could cause any observable bugs
in guests, because `target_munmap` page-aligns the length it passes in.
However, calls to this function in `target_mremap` do not page-align the
length, so those calls could "drop" pages, leading to a part of the
reserved region becoming unmapped. At worst, a host allocation could get
mapped into that hole, then clobbered by a new guest mapping.

A simple fix didn't feel ideal here, because I think this function was
not written as well as it could be. Instead, the logic is simpler if we
use `end = start + len` instead of `last = start + len - 1` (overflow
does not cause any problem here), and use offsets in the loops (avoiding
overflows since the offset is never larger than the host page size).

Signed-off-by: Matthew Lugg <mlugg@mlugg.co.uk>
---
 linux-user/mmap.c | 63 ++++++++++++++++-------------------------------
 1 file changed, 21 insertions(+), 42 deletions(-)

diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 4c5fe832ad..e1ed9085c7 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -1014,59 +1014,38 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
 static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
 {
     int host_page_size = qemu_real_host_page_size();
+    abi_ulong end;
     abi_ulong real_start;
-    abi_ulong real_last;
-    abi_ulong real_len;
-    abi_ulong last;
-    abi_ulong a;
+    abi_ulong real_end;
+    abi_ulong off;
     void *host_start;
     int prot;
 
-    last = start + len - 1;
+    end = ROUND_UP(start + len, TARGET_PAGE_SIZE);
+
     real_start = start & -host_page_size;
-    real_last = ROUND_UP(last, host_page_size) - 1;
+    real_end = ROUND_UP(end, host_page_size);
 
-    /*
-     * If guest pages remain on the first or last host pages,
-     * adjust the deallocation to retain those guest pages.
-     * The single page special case is required for the last page,
-     * lest real_start overflow to zero.
-     */
-    if (real_last - real_start < host_page_size) {
-        prot = 0;
-        for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
-            prot |= page_get_flags(a);
-        }
-        for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
-            prot |= page_get_flags(a + 1);
-        }
-        if (prot != 0) {
-            return 0;
-        }
-    } else {
-        for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
-            prot |= page_get_flags(a);
-        }
-        if (prot != 0) {
-            real_start += host_page_size;
-        }
+    /* end or real_end may have overflowed to 0, but that's okay. */
 
-        for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
-            prot |= page_get_flags(a + 1);
-        }
-        if (prot != 0) {
-            real_last -= host_page_size;
-        }
+    /* If [real_start,start) contains a mapped guest page, retain the first page. */
+    for (prot = 0, off = 0; off < start - real_start; off += TARGET_PAGE_SIZE) {
+        prot |= page_get_flags(real_start + off);
+    }
+    if (prot != 0) {
+        real_start += host_page_size;
+    }
 
-        if (real_last < real_start) {
-            return 0;
-        }
+    /* If [end,real_end) contains a mapped guest page, retain the last page. */
+    for (prot = 0, off = 0; off < real_end - end; off += TARGET_PAGE_SIZE) {
+        prot |= page_get_flags(end + off);
+    }
+    if (prot != 0) {
+        real_end -= host_page_size;
     }
 
-    real_len = real_last - real_start + 1;
     host_start = g2h_untagged(real_start);
-
-    return do_munmap(host_start, real_len);
+    return do_munmap(host_start, real_end - real_start);
 }
 
 int target_munmap(abi_ulong start, abi_ulong len)
-- 
2.51.0
Re: [PATCH 3/4] linux-user: fix reserved_va page leak in do_munmap
Posted by Richard Henderson 3 weeks, 2 days ago
On 10/11/25 15:03, Matthew Lugg wrote:
> The previous logic here had an off-by-one error: assuming 4k pages on
> host and guest, if `len == 4097` (indicating to unmap 2 pages), then
> `last = start + 4096`, so `real_last = start + 4095`, so ultimately
> `real_len = 4096`. I do not believe this could cause any observable bugs
> in guests, because `target_munmap` page-aligns the length it passes in.
> However, calls to this function in `target_mremap` do not page-align the
> length, so those calls could "drop" pages, leading to a part of the
> reserved region becoming unmapped. At worst, a host allocation could get
> mapped into that hole, then clobbered by a new guest mapping.
> 
> A simple fix didn't feel ideal here, because I think this function was
> not written as well as it could be. Instead, the logic is simpler if we
> use `end = start + len` instead of `last = start + len - 1` (overflow
> does not cause any problem here), and use offsets in the loops (avoiding
> overflows since the offset is never larger than the host page size).

No, it is not simpler with 'end', because end == 0 is 'valid' in the sense that the range 
goes from [start, (abi_ptr)-1].  But having end <= start is awkward in the extreme.

Thus we prefer the inclusive range [start, last] to the exclusive range [start, end).

Not everything has been converted away from 'end', but we certainly should not regress 
existing code.


r~
Re: [PATCH 3/4] linux-user: fix reserved_va page leak in do_munmap
Posted by Peter Maydell 3 weeks, 4 days ago
On Sat, 11 Oct 2025 at 21:20, Matthew Lugg <mlugg@mlugg.co.uk> wrote:
>
> The previous logic here had an off-by-one error: assuming 4k pages on
> host and guest, if `len == 4097` (indicating to unmap 2 pages), then
> `last = start + 4096`, so `real_last = start + 4095`, so ultimately
> `real_len = 4096`. I do not believe this could cause any observable bugs
> in guests, because `target_munmap` page-aligns the length it passes in.
> However, calls to this function in `target_mremap` do not page-align the
> length, so those calls could "drop" pages, leading to a part of the
> reserved region becoming unmapped. At worst, a host allocation could get
> mapped into that hole, then clobbered by a new guest mapping.
>
> A simple fix didn't feel ideal here, because I think this function was
> not written as well as it could be. Instead, the logic is simpler if we
> use `end = start + len` instead of `last = start + len - 1` (overflow
> does not cause any problem here), and use offsets in the loops (avoiding
> overflows since the offset is never larger than the host page size).

I don't really understand this code, I'm just looking at
it fresh, so my comment below might be wrong.

> -    /*
> -     * If guest pages remain on the first or last host pages,
> -     * adjust the deallocation to retain those guest pages.
> -     * The single page special case is required for the last page,
> -     * lest real_start overflow to zero.
> -     */

This comment says we need the special case for
"real_last - real_start < host_page_size" to avoid an overflow.

> -    if (real_last - real_start < host_page_size) {
> -        prot = 0;

We delete the special case...

> -        for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
> -            prot |= page_get_flags(a);
> -        }
> -        for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
> -            prot |= page_get_flags(a + 1);
> -        }
> -        if (prot != 0) {
> -            return 0;
> -        }
> -    } else {
> -        for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
> -            prot |= page_get_flags(a);
> -        }
> -        if (prot != 0) {
> -            real_start += host_page_size;
> -        }
> +    /* end or real_end may have overflowed to 0, but that's okay. */
>
> -        for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
> -            prot |= page_get_flags(a + 1);
> -        }
> -        if (prot != 0) {
> -            real_last -= host_page_size;
> -        }
> +    /* If [real_start,start) contains a mapped guest page, retain the first page. */
> +    for (prot = 0, off = 0; off < start - real_start; off += TARGET_PAGE_SIZE) {
> +        prot |= page_get_flags(real_start + off);
> +    }
> +    if (prot != 0) {
> +        real_start += host_page_size;

...and now if real_start was the last page in the
address space, this addition will overflow it to zero.

> +    }
>
> -        if (real_last < real_start) {
> -            return 0;
> -        }
> +    /* If [end,real_end) contains a mapped guest page, retain the last page. */
> +    for (prot = 0, off = 0; off < real_end - end; off += TARGET_PAGE_SIZE) {
> +        prot |= page_get_flags(end + off);
> +    }
> +    if (prot != 0) {
> +        real_end -= host_page_size;
>      }
>
> -    real_len = real_last - real_start + 1;
>      host_start = g2h_untagged(real_start);
> -
> -    return do_munmap(host_start, real_len);
> +    return do_munmap(host_start, real_end - real_start);
>  }
>
>  int target_munmap(abi_ulong start, abi_ulong len)

thanks
-- PMM