[PATCH v3 22/33] linux-user: Split out mmap_h_lt_g

Richard Henderson posted 33 patches 10 months, 4 weeks ago
Maintainers: Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>, Riku Voipio <riku.voipio@iki.fi>, Warner Losh <imp@bsdimp.com>, Kyle Evans <kevans@freebsd.org>, Stefan Berger <stefanb@linux.vnet.ibm.com>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Yanan Wang <wangyanan55@huawei.com>, Laurent Vivier <laurent@vivier.eu>, Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, David Hildenbrand <david@redhat.com>, Peter Maydell <peter.maydell@linaro.org>, Nicholas Piggin <npiggin@gmail.com>, Daniel Henrique Barboza <danielhb413@gmail.com>, "Cédric Le Goater" <clg@kaod.org>, "Alex Bennée" <alex.bennee@linaro.org>, Yoshinori Sato <ysato@users.sourceforge.jp>
There is a newer version of this series
[PATCH v3 22/33] linux-user: Split out mmap_h_lt_g
Posted by Richard Henderson 10 months, 4 weeks ago
Work much harder to get alignment and mapping beyond the end
of the file correct.  Both of which are excercised by our
test-mmap for alpha (8k pages) on any 4k page host.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 linux-user/mmap.c | 156 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 125 insertions(+), 31 deletions(-)

diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 00003b8329..8b0a26e50d 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -552,6 +552,128 @@ static abi_long mmap_h_eq_g(abi_ulong start, abi_ulong len,
     return mmap_end(start, last, start, last, flags, page_flags);
 }
 
+/*
+ * Special case host page size < target page size.
+ *
+ * The two special cases are increased guest alignment, and mapping
+ * past the end of a file.
+ *
+ * When mapping files into a memory area larger than the file,
+ * accesses to pages beyond the file size will cause a SIGBUS.
+ *
+ * For example, if mmaping a file of 100 bytes on a host with 4K
+ * pages emulating a target with 8K pages, the target expects to
+ * be able to access the first 8K. But the host will trap us on
+ * any access beyond 4K.
+ *
+ * When emulating a target with a larger page-size than the hosts,
+ * we may need to truncate file maps at EOF and add extra anonymous
+ * pages up to the targets page boundary.
+ *
+ * This workaround only works for files that do not change.
+ * If the file is later extended (e.g. ftruncate), the SIGBUS
+ * vanishes and the proper behaviour is that changes within the
+ * anon page should be reflected in the file.
+ *
+ * However, this case is rather common with executable images,
+ * so the workaround is important for even trivial tests, whereas
+ * the mmap of of a file being extended is less common.
+ */
+static abi_long mmap_h_lt_g(abi_ulong start, abi_ulong len, int host_prot,
+                            int mmap_flags, int page_flags, int fd,
+                            off_t offset, int host_page_size)
+{
+    void *p, *want_p = g2h_untagged(start);
+    off_t fileend_adj = 0;
+    int flags = mmap_flags;
+    abi_ulong last, pass_last;
+
+    if (!(flags & MAP_ANONYMOUS)) {
+        struct stat sb;
+
+        if (fstat(fd, &sb) == -1) {
+            return -1;
+        }
+        if (offset >= sb.st_size) {
+            /*
+             * The entire map is beyond the end of the file.
+             * Transform it to an anonymous mapping.
+             */
+            flags |= MAP_ANONYMOUS;
+            fd = -1;
+            offset = 0;
+        } else if (offset + len > sb.st_size) {
+            /*
+             * A portion of the map is beyond the end of the file.
+             * Truncate the file portion of the allocation.
+             */
+            fileend_adj = offset + len - sb.st_size;
+        }
+    }
+
+    if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
+        if (fileend_adj) {
+            p = mmap(want_p, len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
+        } else {
+            p = mmap(want_p, len, host_prot, flags, fd, offset);
+        }
+        if (p != want_p) {
+            if (p != MAP_FAILED) {
+                munmap(p, len);
+                errno = EEXIST;
+            }
+            return -1;
+        }
+
+        if (fileend_adj) {
+            void *t = mmap(p, len - fileend_adj, host_prot,
+                           (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED,
+                           fd, offset);
+            assert(t != MAP_FAILED);
+        }
+    } else {
+        size_t host_len, part_len;
+
+        /*
+         * Take care to align the host memory.  Perform a larger anonymous
+         * allocation and extract the aligned portion.  Remap the file on
+         * top of that.
+         */
+        host_len = len + TARGET_PAGE_SIZE - host_page_size;
+        p = mmap(want_p, host_len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
+        if (p == MAP_FAILED) {
+            return -1;
+        }
+
+        part_len = (uintptr_t)p & (TARGET_PAGE_SIZE - 1);
+        if (part_len) {
+            part_len = TARGET_PAGE_SIZE - part_len;
+            munmap(p, part_len);
+            p += part_len;
+            host_len -= part_len;
+        }
+        if (len < host_len) {
+            munmap(p + len, host_len - len);
+        }
+
+        if (!(flags & MAP_ANONYMOUS)) {
+            void *t = mmap(p, len - fileend_adj, host_prot,
+                           flags | MAP_FIXED, fd, offset);
+            assert(t != MAP_FAILED);
+        }
+
+        start = h2g(p);
+    }
+
+    last = start + len - 1;
+    if (fileend_adj) {
+        pass_last = ROUND_UP(last - fileend_adj, host_page_size) - 1;
+    } else {
+        pass_last = last;
+    }
+    return mmap_end(start, last, start, pass_last, mmap_flags, page_flags);
+}
+
 static abi_long target_mmap__locked(abi_ulong start, abi_ulong len,
                                     int target_prot, int flags, int page_flags,
                                     int fd, off_t offset)
@@ -596,37 +718,9 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len,
     if (host_page_size == TARGET_PAGE_SIZE) {
         return mmap_h_eq_g(start, len, host_prot, flags,
                            page_flags, fd, offset);
-    }
-
-    /*
-     * When mapping files into a memory area larger than the file, accesses
-     * to pages beyond the file size will cause a SIGBUS.
-     *
-     * For example, if mmaping a file of 100 bytes on a host with 4K pages
-     * emulating a target with 8K pages, the target expects to be able to
-     * access the first 8K. But the host will trap us on any access beyond
-     * 4K.
-     *
-     * When emulating a target with a larger page-size than the hosts, we
-     * may need to truncate file maps at EOF and add extra anonymous pages
-     * up to the targets page boundary.
-     */
-    if (host_page_size < TARGET_PAGE_SIZE && !(flags & MAP_ANONYMOUS)) {
-        struct stat sb;
-
-        if (fstat(fd, &sb) == -1) {
-            return -1;
-        }
-
-        /* Are we trying to create a map beyond EOF?.  */
-        if (offset + len > sb.st_size) {
-            /*
-             * If so, truncate the file map at eof aligned with
-             * the hosts real pagesize. Additional anonymous maps
-             * will be created beyond EOF.
-             */
-            len = ROUND_UP(sb.st_size - offset, host_page_size);
-        }
+    } else if (host_page_size < TARGET_PAGE_SIZE) {
+        return mmap_h_lt_g(start, len, host_prot, flags,
+                           page_flags, fd, offset, host_page_size);
     }
 
     if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
-- 
2.34.1
Re: [PATCH v3 22/33] linux-user: Split out mmap_h_lt_g
Posted by Ilya Leoshkevich 10 months ago
On Tue, Jan 02, 2024 at 12:57:57PM +1100, Richard Henderson wrote:
> Work much harder to get alignment and mapping beyond the end
> of the file correct.  Both of which are excercised by our
> test-mmap for alpha (8k pages) on any 4k page host.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  linux-user/mmap.c | 156 +++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 125 insertions(+), 31 deletions(-)

[...]

> +        if (fileend_adj) {
> +            void *t = mmap(p, len - fileend_adj, host_prot,
> +                           (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED,
> +                           fd, offset);
> +            assert(t != MAP_FAILED);

Is it possible to recover here? Of course, we are remapping the memory
we've mapped a few lines earlier, but asserting the syscall result
looks a bit odd.

[...]

> +        if (!(flags & MAP_ANONYMOUS)) {
> +            void *t = mmap(p, len - fileend_adj, host_prot,
> +                           flags | MAP_FIXED, fd, offset);
> +            assert(t != MAP_FAILED);

Same here.
Re: [PATCH v3 22/33] linux-user: Split out mmap_h_lt_g
Posted by Richard Henderson 9 months, 2 weeks ago
On 1/29/24 05:26, Ilya Leoshkevich wrote:
> On Tue, Jan 02, 2024 at 12:57:57PM +1100, Richard Henderson wrote:
>> Work much harder to get alignment and mapping beyond the end
>> of the file correct.  Both of which are excercised by our
>> test-mmap for alpha (8k pages) on any 4k page host.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   linux-user/mmap.c | 156 +++++++++++++++++++++++++++++++++++++---------
>>   1 file changed, 125 insertions(+), 31 deletions(-)
> 
> [...]
> 
>> +        if (fileend_adj) {
>> +            void *t = mmap(p, len - fileend_adj, host_prot,
>> +                           (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED,
>> +                           fd, offset);
>> +            assert(t != MAP_FAILED);
> 
> Is it possible to recover here? Of course, we are remapping the memory
> we've mapped a few lines earlier, but asserting the syscall result
> looks a bit odd.

This first one we cannot recover from, because we've already (potentially) overwritten the 
previous memory mapping.

>> +        if (!(flags & MAP_ANONYMOUS)) {
>> +            void *t = mmap(p, len - fileend_adj, host_prot,
>> +                           flags | MAP_FIXED, fd, offset);
>> +            assert(t != MAP_FAILED);
> 
> Same here.

This one we could, because the memory was previously unmapped.


r~
Re: [PATCH v3 22/33] linux-user: Split out mmap_h_lt_g
Posted by Richard Henderson 9 months, 2 weeks ago
On 1/29/24 05:26, Ilya Leoshkevich wrote:
> On Tue, Jan 02, 2024 at 12:57:57PM +1100, Richard Henderson wrote:
>> Work much harder to get alignment and mapping beyond the end
>> of the file correct.  Both of which are excercised by our
>> test-mmap for alpha (8k pages) on any 4k page host.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   linux-user/mmap.c | 156 +++++++++++++++++++++++++++++++++++++---------
>>   1 file changed, 125 insertions(+), 31 deletions(-)
> 
> [...]
> 
>> +        if (fileend_adj) {
>> +            void *t = mmap(p, len - fileend_adj, host_prot,
>> +                           (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED,
>> +                           fd, offset);
>> +            assert(t != MAP_FAILED);
> 
> Is it possible to recover here? Of course, we are remapping the memory
> we've mapped a few lines earlier, but asserting the syscall result
> looks a bit odd.
> 

Can you think of a failure mode?  I couldn't.
That's why I added the assert.

I suppose there's the always present threat of running out of vmas...


r~
Re: [PATCH v3 22/33] linux-user: Split out mmap_h_lt_g
Posted by Ilya Leoshkevich 9 months, 2 weeks ago
On Tue, 2024-02-13 at 09:54 -1000, Richard Henderson wrote:
> On 1/29/24 05:26, Ilya Leoshkevich wrote:
> > On Tue, Jan 02, 2024 at 12:57:57PM +1100, Richard Henderson wrote:
> > > Work much harder to get alignment and mapping beyond the end
> > > of the file correct.  Both of which are excercised by our
> > > test-mmap for alpha (8k pages) on any 4k page host.
> > > 
> > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> > > ---
> > >   linux-user/mmap.c | 156 +++++++++++++++++++++++++++++++++++++--
> > > -------
> > >   1 file changed, 125 insertions(+), 31 deletions(-)
> > 
> > [...]
> > 
> > > +        if (fileend_adj) {
> > > +            void *t = mmap(p, len - fileend_adj, host_prot,
> > > +                           (flags & ~MAP_FIXED_NOREPLACE) |
> > > MAP_FIXED,
> > > +                           fd, offset);
> > > +            assert(t != MAP_FAILED);
> > 
> > Is it possible to recover here? Of course, we are remapping the
> > memory
> > we've mapped a few lines earlier, but asserting the syscall result
> > looks a bit odd.
> > 
> 
> Can you think of a failure mode?  I couldn't.
> That's why I added the assert.
> 
> I suppose there's the always present threat of running out of vmas...

Right, and this should be easy to trigger by using ulimit -v.

> 
> 
> r~
> 
Re: [PATCH v3 22/33] linux-user: Split out mmap_h_lt_g
Posted by Pierrick Bouvier 10 months, 3 weeks ago
On 1/2/24 05:57, Richard Henderson wrote:
> Work much harder to get alignment and mapping beyond the end
> of the file correct.  Both of which are excercised by our
> test-mmap for alpha (8k pages) on any 4k page host.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   linux-user/mmap.c | 156 +++++++++++++++++++++++++++++++++++++---------
>   1 file changed, 125 insertions(+), 31 deletions(-)
> 
> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
> index 00003b8329..8b0a26e50d 100644
> --- a/linux-user/mmap.c
> +++ b/linux-user/mmap.c
> @@ -552,6 +552,128 @@ static abi_long mmap_h_eq_g(abi_ulong start, abi_ulong len,
>       return mmap_end(start, last, start, last, flags, page_flags);
>   }
>   
> +/*
> + * Special case host page size < target page size.
> + *
> + * The two special cases are increased guest alignment, and mapping
> + * past the end of a file.
> + *
> + * When mapping files into a memory area larger than the file,
> + * accesses to pages beyond the file size will cause a SIGBUS.
> + *
> + * For example, if mmaping a file of 100 bytes on a host with 4K
> + * pages emulating a target with 8K pages, the target expects to
> + * be able to access the first 8K. But the host will trap us on
> + * any access beyond 4K.
> + *
> + * When emulating a target with a larger page-size than the hosts,
> + * we may need to truncate file maps at EOF and add extra anonymous
> + * pages up to the targets page boundary.
> + *
> + * This workaround only works for files that do not change.
> + * If the file is later extended (e.g. ftruncate), the SIGBUS
> + * vanishes and the proper behaviour is that changes within the
> + * anon page should be reflected in the file.
> + *
> + * However, this case is rather common with executable images,
> + * so the workaround is important for even trivial tests, whereas
> + * the mmap of of a file being extended is less common.
> + */
> +static abi_long mmap_h_lt_g(abi_ulong start, abi_ulong len, int host_prot,
> +                            int mmap_flags, int page_flags, int fd,
> +                            off_t offset, int host_page_size)
> +{
> +    void *p, *want_p = g2h_untagged(start);
> +    off_t fileend_adj = 0;
> +    int flags = mmap_flags;
> +    abi_ulong last, pass_last;
> +
> +    if (!(flags & MAP_ANONYMOUS)) {
> +        struct stat sb;
> +
> +        if (fstat(fd, &sb) == -1) {
> +            return -1;
> +        }
> +        if (offset >= sb.st_size) {
> +            /*
> +             * The entire map is beyond the end of the file.
> +             * Transform it to an anonymous mapping.
> +             */
> +            flags |= MAP_ANONYMOUS;
> +            fd = -1;
> +            offset = 0;
> +        } else if (offset + len > sb.st_size) {
> +            /*
> +             * A portion of the map is beyond the end of the file.
> +             * Truncate the file portion of the allocation.
> +             */
> +            fileend_adj = offset + len - sb.st_size;
> +        }
> +    }
> +
> +    if (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE)) {
> +        if (fileend_adj) {
> +            p = mmap(want_p, len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
> +        } else {
> +            p = mmap(want_p, len, host_prot, flags, fd, offset);
> +        }
> +        if (p != want_p) {
> +            if (p != MAP_FAILED) {
> +                munmap(p, len);
> +                errno = EEXIST;
> +            }
> +            return -1;
> +        }
> +
> +        if (fileend_adj) {
> +            void *t = mmap(p, len - fileend_adj, host_prot,
> +                           (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED,
> +                           fd, offset);
> +            assert(t != MAP_FAILED);
> +        }
> +    } else {
> +        size_t host_len, part_len;
> +
> +        /*
> +         * Take care to align the host memory.  Perform a larger anonymous
> +         * allocation and extract the aligned portion.  Remap the file on
> +         * top of that.
> +         */
> +        host_len = len + TARGET_PAGE_SIZE - host_page_size;
> +        p = mmap(want_p, host_len, host_prot, flags | MAP_ANONYMOUS, -1, 0);
> +        if (p == MAP_FAILED) {
> +            return -1;
> +        }
> +
> +        part_len = (uintptr_t)p & (TARGET_PAGE_SIZE - 1);
> +        if (part_len) {
> +            part_len = TARGET_PAGE_SIZE - part_len;
> +            munmap(p, part_len);
> +            p += part_len;
> +            host_len -= part_len;
> +        }
> +        if (len < host_len) {
> +            munmap(p + len, host_len - len);
> +        }
> +
> +        if (!(flags & MAP_ANONYMOUS)) {
> +            void *t = mmap(p, len - fileend_adj, host_prot,
> +                           flags | MAP_FIXED, fd, offset);
> +            assert(t != MAP_FAILED);
> +        }
> +
> +        start = h2g(p);
> +    }
> +
> +    last = start + len - 1;
> +    if (fileend_adj) {
> +        pass_last = ROUND_UP(last - fileend_adj, host_page_size) - 1;
> +    } else {
> +        pass_last = last;
> +    }
> +    return mmap_end(start, last, start, pass_last, mmap_flags, page_flags);
> +}
> +
>   static abi_long target_mmap__locked(abi_ulong start, abi_ulong len,
>                                       int target_prot, int flags, int page_flags,
>                                       int fd, off_t offset)
> @@ -596,37 +718,9 @@ static abi_long target_mmap__locked(abi_ulong start, abi_ulong len,
>       if (host_page_size == TARGET_PAGE_SIZE) {
>           return mmap_h_eq_g(start, len, host_prot, flags,
>                              page_flags, fd, offset);
> -    }
> -
> -    /*
> -     * When mapping files into a memory area larger than the file, accesses
> -     * to pages beyond the file size will cause a SIGBUS.
> -     *
> -     * For example, if mmaping a file of 100 bytes on a host with 4K pages
> -     * emulating a target with 8K pages, the target expects to be able to
> -     * access the first 8K. But the host will trap us on any access beyond
> -     * 4K.
> -     *
> -     * When emulating a target with a larger page-size than the hosts, we
> -     * may need to truncate file maps at EOF and add extra anonymous pages
> -     * up to the targets page boundary.
> -     */
> -    if (host_page_size < TARGET_PAGE_SIZE && !(flags & MAP_ANONYMOUS)) {
> -        struct stat sb;
> -
> -        if (fstat(fd, &sb) == -1) {
> -            return -1;
> -        }
> -
> -        /* Are we trying to create a map beyond EOF?.  */
> -        if (offset + len > sb.st_size) {
> -            /*
> -             * If so, truncate the file map at eof aligned with
> -             * the hosts real pagesize. Additional anonymous maps
> -             * will be created beyond EOF.
> -             */
> -            len = ROUND_UP(sb.st_size - offset, host_page_size);
> -        }
> +    } else if (host_page_size < TARGET_PAGE_SIZE) {
> +        return mmap_h_lt_g(start, len, host_prot, flags,
> +                           page_flags, fd, offset, host_page_size);
>       }
>   
>       if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>