From: Zhang Yi <yi.z.zhang@linux.intel.com>
When a file supporting DAX is used as vNVDIMM backend, mmap it with
MAP_SYNC flag in addition which can ensure file system metadata
synced in each guest writes to the backend file, without other QEMU
actions (e.g., periodic fsync() by QEMU).
Current, We have below different possible use cases:
1. pmem=on is set, shared=on is set, MAP_SYNC supported:
a: backend is a dax supporting file.
- MAP_SYNC will active.
b: backend is not a dax supporting file.
- mmap will trigger a warning. then MAP_SYNC flag will be ignored
2. The rest of cases:
- we will never pass the MAP_SYNC to mmap2
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Signed-off-by: Zhang Yi <yi.z.zhang@linux.intel.com>
---
util/mmap-alloc.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 44 insertions(+), 1 deletion(-)
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 97bbeed..2f21efd 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -10,6 +10,13 @@
* later. See the COPYING file in the top-level directory.
*/
+#ifdef CONFIG_LINUX
+#include <linux/mman.h>
+#else /* !CONFIG_LINUX */
+#define MAP_SYNC 0x0
+#define MAP_SHARED_VALIDATE 0x0
+#endif /* CONFIG_LINUX */
+
#include "qemu/osdep.h"
#include "qemu/mmap-alloc.h"
#include "qemu/host-utils.h"
@@ -101,6 +108,8 @@ void *qemu_ram_mmap(int fd,
#else
void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#endif
+ int mmap_flags;
+ int map_sync_flags = 0;
size_t offset;
void *ptr1;
@@ -111,13 +120,47 @@ void *qemu_ram_mmap(int fd,
assert(is_power_of_2(align));
/* Always align to host page size */
assert(align >= getpagesize());
+ mmap_flags = shared ? MAP_SHARED : MAP_PRIVATE;
+ if (shared && is_pmem) {
+ map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
+ mmap_flags |= map_sync_flags;
+ }
offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
MAP_FIXED |
(fd == -1 ? MAP_ANONYMOUS : 0) |
- (shared ? MAP_SHARED : MAP_PRIVATE),
+ mmap_flags,
fd, 0);
+
+
+ if (ptr1 == MAP_FAILED && map_sync_flags) {
+ if (errno == ENOTSUP) {
+ char *proc_link, *file_name;
+ int len;
+ proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
+ file_name = g_malloc0(PATH_MAX);
+ len = readlink(proc_link, file_name, PATH_MAX - 1);
+ if (len < 0) {
+ len = 0;
+ }
+ file_name[len] = '\0';
+ fprintf(stderr, "Warning: requesting persistence across crashes "
+ "for backend file %s failed. Proceeding without "
+ "persistence, data might become corrupted in case of host "
+ "crash.\n", file_name);
+ g_free(proc_link);
+ g_free(file_name);
+ }
+ /* if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
+ * we will remove these flags to handle compatibility.
+ */
+ ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
+ MAP_FIXED |
+ (fd == -1 ? MAP_ANONYMOUS : 0) |
+ MAP_SHARED,
+ fd, 0);
+ }
if (ptr1 == MAP_FAILED) {
munmap(ptr, total);
return MAP_FAILED;
--
2.7.4
Hi,
I found out that this series missed QEMU 4.0 and I was going to
queue for 4.1, but unfortunately this patch conflicts with:
commit 2044c3e7116eeac0449dcb4a4130cc8f8b9310da
Author: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
Date: Wed Jan 30 21:36:04 2019 -0200
mmap-alloc: unfold qemu_ram_mmap()
Unfold parts of qemu_ram_mmap() for the sake of understanding, moving
declarations to the top, and keeping architecture-specifics in the
ifdef-else blocks. No changes in the function behaviour.
Give ptr and ptr1 meaningful names:
ptr -> guardptr : pointer to the PROT_NONE guard region
ptr1 -> ptr : pointer to the mapped memory returned to caller
Signed-off-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
I'm queueing patches 1-3 into machine-next[1], so only patches 4
and 5 need to be refreshed and resubmitted.
[1] https://github.com/ehabkost/qemu.git machine-next
On Fri, Feb 08, 2019 at 06:11:11PM +0800, Zhang, Yi wrote:
> From: Zhang Yi <yi.z.zhang@linux.intel.com>
>
> When a file supporting DAX is used as vNVDIMM backend, mmap it with
> MAP_SYNC flag in addition which can ensure file system metadata
> synced in each guest writes to the backend file, without other QEMU
> actions (e.g., periodic fsync() by QEMU).
>
> Current, We have below different possible use cases:
>
> 1. pmem=on is set, shared=on is set, MAP_SYNC supported:
> a: backend is a dax supporting file.
> - MAP_SYNC will active.
> b: backend is not a dax supporting file.
> - mmap will trigger a warning. then MAP_SYNC flag will be ignored
>
> 2. The rest of cases:
> - we will never pass the MAP_SYNC to mmap2
>
> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> Signed-off-by: Zhang Yi <yi.z.zhang@linux.intel.com>
> ---
> util/mmap-alloc.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 44 insertions(+), 1 deletion(-)
>
> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> index 97bbeed..2f21efd 100644
> --- a/util/mmap-alloc.c
> +++ b/util/mmap-alloc.c
> @@ -10,6 +10,13 @@
> * later. See the COPYING file in the top-level directory.
> */
>
> +#ifdef CONFIG_LINUX
> +#include <linux/mman.h>
> +#else /* !CONFIG_LINUX */
> +#define MAP_SYNC 0x0
> +#define MAP_SHARED_VALIDATE 0x0
> +#endif /* CONFIG_LINUX */
> +
> #include "qemu/osdep.h"
> #include "qemu/mmap-alloc.h"
> #include "qemu/host-utils.h"
> @@ -101,6 +108,8 @@ void *qemu_ram_mmap(int fd,
> #else
> void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> #endif
> + int mmap_flags;
> + int map_sync_flags = 0;
> size_t offset;
> void *ptr1;
>
> @@ -111,13 +120,47 @@ void *qemu_ram_mmap(int fd,
> assert(is_power_of_2(align));
> /* Always align to host page size */
> assert(align >= getpagesize());
> + mmap_flags = shared ? MAP_SHARED : MAP_PRIVATE;
> + if (shared && is_pmem) {
> + map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
> + mmap_flags |= map_sync_flags;
> + }
>
> offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
> ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
> MAP_FIXED |
> (fd == -1 ? MAP_ANONYMOUS : 0) |
> - (shared ? MAP_SHARED : MAP_PRIVATE),
> + mmap_flags,
> fd, 0);
> +
> +
> + if (ptr1 == MAP_FAILED && map_sync_flags) {
> + if (errno == ENOTSUP) {
> + char *proc_link, *file_name;
> + int len;
> + proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
> + file_name = g_malloc0(PATH_MAX);
> + len = readlink(proc_link, file_name, PATH_MAX - 1);
> + if (len < 0) {
> + len = 0;
> + }
> + file_name[len] = '\0';
> + fprintf(stderr, "Warning: requesting persistence across crashes "
> + "for backend file %s failed. Proceeding without "
> + "persistence, data might become corrupted in case of host "
> + "crash.\n", file_name);
> + g_free(proc_link);
> + g_free(file_name);
> + }
> + /* if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
> + * we will remove these flags to handle compatibility.
> + */
> + ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
> + MAP_FIXED |
> + (fd == -1 ? MAP_ANONYMOUS : 0) |
> + MAP_SHARED,
> + fd, 0);
> + }
> if (ptr1 == MAP_FAILED) {
> munmap(ptr, total);
> return MAP_FAILED;
> --
> 2.7.4
>
>
--
Eduardo
On Thu, Apr 18, 2019 at 07:05:16PM -0300, Eduardo Habkost wrote:
> Hi,
>
> I found out that this series missed QEMU 4.0 and I was going to
> queue for 4.1, but unfortunately this patch conflicts with:
>
> commit 2044c3e7116eeac0449dcb4a4130cc8f8b9310da
> Author: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
> Date: Wed Jan 30 21:36:04 2019 -0200
>
> mmap-alloc: unfold qemu_ram_mmap()
[...]
> On Fri, Feb 08, 2019 at 06:11:11PM +0800, Zhang, Yi wrote:
> > From: Zhang Yi <yi.z.zhang@linux.intel.com>
> >
> > When a file supporting DAX is used as vNVDIMM backend, mmap it with
> > MAP_SYNC flag in addition which can ensure file system metadata
> > synced in each guest writes to the backend file, without other QEMU
> > actions (e.g., periodic fsync() by QEMU).
> >
> > Current, We have below different possible use cases:
> >
> > 1. pmem=on is set, shared=on is set, MAP_SYNC supported:
> > a: backend is a dax supporting file.
> > - MAP_SYNC will active.
> > b: backend is not a dax supporting file.
> > - mmap will trigger a warning. then MAP_SYNC flag will be ignored
> >
> > 2. The rest of cases:
> > - we will never pass the MAP_SYNC to mmap2
> >
> > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> > Signed-off-by: Zhang Yi <yi.z.zhang@linux.intel.com>
Untested patch rebase is below. Can Intel help test it and
submit v14?
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Signed-off-by: Zhang Yi <yi.z.zhang@linux.intel.com>
[ehabkost: Rebased patch to latest code on master]
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
util/mmap-alloc.c | 41 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 39 insertions(+), 2 deletions(-)
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 9713f4b960..7063a44c7d 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -10,6 +10,13 @@
* later. See the COPYING file in the top-level directory.
*/
+#ifdef CONFIG_LINUX
+#include <linux/mman.h>
+#else /* !CONFIG_LINUX */
+#define MAP_SYNC 0x0
+#define MAP_SHARED_VALIDATE 0x0
+#endif /* CONFIG_LINUX */
+
#include "qemu/osdep.h"
#include "qemu/mmap-alloc.h"
#include "qemu/host-utils.h"
@@ -82,6 +89,7 @@ void *qemu_ram_mmap(int fd,
bool is_pmem)
{
int flags;
+ int map_sync_flags = 0;
int guardfd;
size_t offset;
size_t pagesize;
@@ -132,10 +140,39 @@ void *qemu_ram_mmap(int fd,
flags = MAP_FIXED;
flags |= fd == -1 ? MAP_ANONYMOUS : 0;
flags |= shared ? MAP_SHARED : MAP_PRIVATE;
+ if (shared && is_pmem) {
+ map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
+ }
offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
- ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, flags, fd, 0);
-
+ ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
+ flags | map_sync_flags, fd, 0);
+
+ if (ptr == MAP_FAILED && map_sync_flags) {
+ if (errno == ENOTSUP) {
+ char *proc_link, *file_name;
+ int len;
+ proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
+ file_name = g_malloc0(PATH_MAX);
+ len = readlink(proc_link, file_name, PATH_MAX - 1);
+ if (len < 0) {
+ len = 0;
+ }
+ file_name[len] = '\0';
+ fprintf(stderr, "Warning: requesting persistence across crashes "
+ "for backend file %s failed. Proceeding without "
+ "persistence, data might become corrupted in case of host "
+ "crash.\n", file_name);
+ g_free(proc_link);
+ g_free(file_name);
+ }
+ /*
+ * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
+ * we will remove these flags to handle compatibility.
+ */
+ ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
+ flags, fd, 0);
+ }
if (ptr == MAP_FAILED) {
munmap(guardptr, total);
return MAP_FAILED;
--
2.18.0.rc1.1.g3f1ff2140
On Thu, Apr 18, 2019 at 07:33:01PM -0300, Eduardo Habkost wrote: >On Thu, Apr 18, 2019 at 07:05:16PM -0300, Eduardo Habkost wrote: >> Hi, >> >> I found out that this series missed QEMU 4.0 and I was going to >> queue for 4.1, but unfortunately this patch conflicts with: >> >> commit 2044c3e7116eeac0449dcb4a4130cc8f8b9310da >> Author: Murilo Opsfelder Araujo <muriloo@linux.ibm.com> >> Date: Wed Jan 30 21:36:04 2019 -0200 >> >> mmap-alloc: unfold qemu_ram_mmap() >[...] >> On Fri, Feb 08, 2019 at 06:11:11PM +0800, Zhang, Yi wrote: >> > From: Zhang Yi <yi.z.zhang@linux.intel.com> >> > >> > When a file supporting DAX is used as vNVDIMM backend, mmap it with >> > MAP_SYNC flag in addition which can ensure file system metadata >> > synced in each guest writes to the backend file, without other QEMU >> > actions (e.g., periodic fsync() by QEMU). >> > >> > Current, We have below different possible use cases: >> > >> > 1. pmem=on is set, shared=on is set, MAP_SYNC supported: >> > a: backend is a dax supporting file. >> > - MAP_SYNC will active. >> > b: backend is not a dax supporting file. >> > - mmap will trigger a warning. then MAP_SYNC flag will be ignored >> > >> > 2. The rest of cases: >> > - we will never pass the MAP_SYNC to mmap2 >> > >> > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> >> > Signed-off-by: Zhang Yi <yi.z.zhang@linux.intel.com> > >Untested patch rebase is below. Can Intel help test it and >submit v14? > Thanks Eduardo, the patch looks good to me and I have tested it. -- Wei Yang Help you, Help me
© 2016 - 2025 Red Hat, Inc.