[PATCH v1 06/13] util/mmap-alloc: Factor out reserving of a memory region to mmap_reserve()

David Hildenbrand posted 13 patches 5 years, 9 months ago
Maintainers: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Paolo Bonzini <pbonzini@redhat.com>, Stefan Weil <sw@weilnetz.de>, Richard Henderson <rth@twiddle.net>, Eduardo Habkost <ehabkost@redhat.com>
There is a newer version of this series
[PATCH v1 06/13] util/mmap-alloc: Factor out reserving of a memory region to mmap_reserve()
Posted by David Hildenbrand 5 years, 9 months ago
We want to reserve a memory region without actually populating memory.
Let's factor that out.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Greg Kurz <groug@kaod.org>
Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 util/mmap-alloc.c | 58 +++++++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 25 deletions(-)

diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 82f02a2cec..43a26f38a8 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -82,6 +82,38 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
     return qemu_real_host_page_size;
 }
 
+/*
+ * Reserve a new memory region of the requested size to be used for mapping
+ * from the given fd (if any).
+ */
+static void *mmap_reserve(size_t size, int fd)
+{
+    int flags = MAP_PRIVATE;
+
+#if defined(__powerpc64__) && defined(__linux__)
+    /*
+     * On ppc64 mappings in the same segment (aka slice) must share the same
+     * page size. Since we will be re-allocating part of this segment
+     * from the supplied fd, we should make sure to use the same page size, to
+     * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
+     * avoid allocating backing store memory.
+     * We do this unless we are using the system page size, in which case
+     * anonymous memory is OK.
+     */
+    if (fd == -1 || qemu_fd_getpagesize(fd) == qemu_real_host_page_size) {
+        fd = -1;
+        flags |= MAP_ANONYMOUS;
+    } else {
+        flags |= MAP_NORESERVE;
+    }
+#else
+    fd = -1;
+    flags |= MAP_ANONYMOUS;
+#endif
+
+    return mmap(0, size, PROT_NONE, flags, fd, 0);
+}
+
 static inline size_t mmap_pagesize(int fd)
 {
 #if defined(__powerpc64__) && defined(__linux__)
@@ -101,7 +133,6 @@ void *qemu_ram_mmap(int fd,
     const size_t pagesize = mmap_pagesize(fd);
     int flags;
     int map_sync_flags = 0;
-    int guardfd;
     size_t offset;
     size_t total;
     void *guardptr;
@@ -113,30 +144,7 @@ void *qemu_ram_mmap(int fd,
      */
     total = size + align;
 
-#if defined(__powerpc64__) && defined(__linux__)
-    /* On ppc64 mappings in the same segment (aka slice) must share the same
-     * page size. Since we will be re-allocating part of this segment
-     * from the supplied fd, we should make sure to use the same page size, to
-     * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
-     * avoid allocating backing store memory.
-     * We do this unless we are using the system page size, in which case
-     * anonymous memory is OK.
-     */
-    flags = MAP_PRIVATE;
-    if (fd == -1 || pagesize == qemu_real_host_page_size) {
-        guardfd = -1;
-        flags |= MAP_ANONYMOUS;
-    } else {
-        guardfd = fd;
-        flags |= MAP_NORESERVE;
-    }
-#else
-    guardfd = -1;
-    flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#endif
-
-    guardptr = mmap(0, total, PROT_NONE, flags, guardfd, 0);
-
+    guardptr = mmap_reserve(total, fd);
     if (guardptr == MAP_FAILED) {
         return MAP_FAILED;
     }
-- 
2.24.1


Re: [PATCH v1 06/13] util/mmap-alloc: Factor out reserving of a memory region to mmap_reserve()
Posted by Richard Henderson 5 years, 9 months ago
On 2/3/20 6:31 PM, David Hildenbrand wrote:
> We want to reserve a memory region without actually populating memory.
> Let's factor that out.
> 
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Cc: Greg Kurz <groug@kaod.org>
> Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
> Cc: Eduardo Habkost <ehabkost@redhat.com>
> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  util/mmap-alloc.c | 58 +++++++++++++++++++++++++++--------------------
>  1 file changed, 33 insertions(+), 25 deletions(-)
> 
> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> index 82f02a2cec..43a26f38a8 100644
> --- a/util/mmap-alloc.c
> +++ b/util/mmap-alloc.c
> @@ -82,6 +82,38 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
>      return qemu_real_host_page_size;
>  }
>  
> +/*
> + * Reserve a new memory region of the requested size to be used for mapping
> + * from the given fd (if any).
> + */
> +static void *mmap_reserve(size_t size, int fd)
> +{
> +    int flags = MAP_PRIVATE;
> +
> +#if defined(__powerpc64__) && defined(__linux__)
> +    /*
> +     * On ppc64 mappings in the same segment (aka slice) must share the same
> +     * page size. Since we will be re-allocating part of this segment
> +     * from the supplied fd, we should make sure to use the same page size, to
> +     * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
> +     * avoid allocating backing store memory.
> +     * We do this unless we are using the system page size, in which case
> +     * anonymous memory is OK.
> +     */
> +    if (fd == -1 || qemu_fd_getpagesize(fd) == qemu_real_host_page_size) {
> +        fd = -1;
> +        flags |= MAP_ANONYMOUS;
> +    } else {
> +        flags |= MAP_NORESERVE;
> +    }
> +#else
> +    fd = -1;
> +    flags |= MAP_ANONYMOUS;
> +#endif

Because this is just code movement,
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

But is there a reason not to add MAP_NORESERVE all of the time?
It seems to match intent, in that we're reserving vma but not planning to use
the memory, anonymous or not.


r~

Re: [PATCH v1 06/13] util/mmap-alloc: Factor out reserving of a memory region to mmap_reserve()
Posted by David Hildenbrand 5 years, 9 months ago
On 06.02.20 12:55, Richard Henderson wrote:
> On 2/3/20 6:31 PM, David Hildenbrand wrote:
>> We want to reserve a memory region without actually populating memory.
>> Let's factor that out.
>>
>> Cc: "Michael S. Tsirkin" <mst@redhat.com>
>> Cc: Greg Kurz <groug@kaod.org>
>> Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
>> Cc: Eduardo Habkost <ehabkost@redhat.com>
>> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>>  util/mmap-alloc.c | 58 +++++++++++++++++++++++++++--------------------
>>  1 file changed, 33 insertions(+), 25 deletions(-)
>>
>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
>> index 82f02a2cec..43a26f38a8 100644
>> --- a/util/mmap-alloc.c
>> +++ b/util/mmap-alloc.c
>> @@ -82,6 +82,38 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
>>      return qemu_real_host_page_size;
>>  }
>>  
>> +/*
>> + * Reserve a new memory region of the requested size to be used for mapping
>> + * from the given fd (if any).
>> + */
>> +static void *mmap_reserve(size_t size, int fd)
>> +{
>> +    int flags = MAP_PRIVATE;
>> +
>> +#if defined(__powerpc64__) && defined(__linux__)
>> +    /*
>> +     * On ppc64 mappings in the same segment (aka slice) must share the same
>> +     * page size. Since we will be re-allocating part of this segment
>> +     * from the supplied fd, we should make sure to use the same page size, to
>> +     * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
>> +     * avoid allocating backing store memory.
>> +     * We do this unless we are using the system page size, in which case
>> +     * anonymous memory is OK.
>> +     */
>> +    if (fd == -1 || qemu_fd_getpagesize(fd) == qemu_real_host_page_size) {
>> +        fd = -1;
>> +        flags |= MAP_ANONYMOUS;
>> +    } else {
>> +        flags |= MAP_NORESERVE;
>> +    }
>> +#else
>> +    fd = -1;
>> +    flags |= MAP_ANONYMOUS;
>> +#endif
> 
> Because this is just code movement,
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> 
> But is there a reason not to add MAP_NORESERVE all of the time?
> It seems to match intent, in that we're reserving vma but not planning to use
> the memory, anonymous or not.

AFAIK, if you mmap something PROT_NONE, it's an implicit MAP_NORESERVE.
I keep setting in conditionally, because I am not sure if any POSIX
system (or older kernel) might choke on MAP_NORESERVE. e.g.,

"In kernels before 2.6, this flag had effect only for private writable
mappings." sounds like it would get ignored. But also sounds like it's
somewhat Linux specific.

-- 
Thanks,

David / dhildenb


Re: [PATCH v1 06/13] util/mmap-alloc: Factor out reserving of a memory region to mmap_reserve()
Posted by Murilo Opsfelder Araújo 5 years, 9 months ago
Hello, David.

On 2/3/20 3:31 PM, David Hildenbrand wrote:
> We want to reserve a memory region without actually populating memory.
> Let's factor that out.
> 
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Cc: Greg Kurz <groug@kaod.org>
> Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
> Cc: Eduardo Habkost <ehabkost@redhat.com>
> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>

Acked-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>

> ---
>   util/mmap-alloc.c | 58 +++++++++++++++++++++++++++--------------------
>   1 file changed, 33 insertions(+), 25 deletions(-)
> 
> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> index 82f02a2cec..43a26f38a8 100644
> --- a/util/mmap-alloc.c
> +++ b/util/mmap-alloc.c
> @@ -82,6 +82,38 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
>       return qemu_real_host_page_size;
>   }
>   
> +/*
> + * Reserve a new memory region of the requested size to be used for mapping
> + * from the given fd (if any).
> + */
> +static void *mmap_reserve(size_t size, int fd)
> +{
> +    int flags = MAP_PRIVATE;
> +
> +#if defined(__powerpc64__) && defined(__linux__)
> +    /*
> +     * On ppc64 mappings in the same segment (aka slice) must share the same
> +     * page size. Since we will be re-allocating part of this segment
> +     * from the supplied fd, we should make sure to use the same page size, to
> +     * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
> +     * avoid allocating backing store memory.
> +     * We do this unless we are using the system page size, in which case
> +     * anonymous memory is OK.
> +     */
> +    if (fd == -1 || qemu_fd_getpagesize(fd) == qemu_real_host_page_size) {
> +        fd = -1;
> +        flags |= MAP_ANONYMOUS;
> +    } else {
> +        flags |= MAP_NORESERVE;
> +    }
> +#else
> +    fd = -1;
> +    flags |= MAP_ANONYMOUS;
> +#endif
> +
> +    return mmap(0, size, PROT_NONE, flags, fd, 0);
> +}
> +
>   static inline size_t mmap_pagesize(int fd)
>   {
>   #if defined(__powerpc64__) && defined(__linux__)
> @@ -101,7 +133,6 @@ void *qemu_ram_mmap(int fd,
>       const size_t pagesize = mmap_pagesize(fd);
>       int flags;
>       int map_sync_flags = 0;
> -    int guardfd;
>       size_t offset;
>       size_t total;
>       void *guardptr;
> @@ -113,30 +144,7 @@ void *qemu_ram_mmap(int fd,
>        */
>       total = size + align;
>   
> -#if defined(__powerpc64__) && defined(__linux__)
> -    /* On ppc64 mappings in the same segment (aka slice) must share the same
> -     * page size. Since we will be re-allocating part of this segment
> -     * from the supplied fd, we should make sure to use the same page size, to
> -     * this end we mmap the supplied fd.  In this case, set MAP_NORESERVE to
> -     * avoid allocating backing store memory.
> -     * We do this unless we are using the system page size, in which case
> -     * anonymous memory is OK.
> -     */
> -    flags = MAP_PRIVATE;
> -    if (fd == -1 || pagesize == qemu_real_host_page_size) {
> -        guardfd = -1;
> -        flags |= MAP_ANONYMOUS;
> -    } else {
> -        guardfd = fd;
> -        flags |= MAP_NORESERVE;
> -    }
> -#else
> -    guardfd = -1;
> -    flags = MAP_PRIVATE | MAP_ANONYMOUS;
> -#endif
> -
> -    guardptr = mmap(0, total, PROT_NONE, flags, guardfd, 0);
> -
> +    guardptr = mmap_reserve(total, fd);
>       if (guardptr == MAP_FAILED) {
>           return MAP_FAILED;
>       }
> 

-- 
Murilo