From nobody Mon Apr 29 09:44:45 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1517988991989352.09202465582405; Tue, 6 Feb 2018 23:36:31 -0800 (PST) Received: from localhost ([::1]:58521 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKHP-0006dX-2R for importer@patchew.org; Wed, 07 Feb 2018 02:36:31 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43856) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKFA-0004p3-6v for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:13 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ejKF7-0000Vf-3S for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:12 -0500 Received: from mga09.intel.com ([134.134.136.24]:13680) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1ejKF6-0000Td-MH for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:09 -0500 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Feb 2018 23:34:07 -0800 Received: from hz-desktop.sh.intel.com (HELO localhost) ([10.239.13.35]) by fmsmga002.fm.intel.com with ESMTP; 06 Feb 2018 23:34:05 -0800 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.46,471,1511856000"; d="scan'208";a="17873904" From: Haozhong Zhang To: qemu-devel@nongnu.org Date: Wed, 7 Feb 2018 15:33:24 +0800 Message-Id: <20180207073331.14158-2-haozhong.zhang@intel.com> X-Mailer: git-send-email 2.14.1 In-Reply-To: <20180207073331.14158-1-haozhong.zhang@intel.com> References: <20180207073331.14158-1-haozhong.zhang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.24 Subject: [Qemu-devel] [PATCH v2 1/8] memory, exec: switch file ram allocation functions to 'flags' parameters X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Haozhong Zhang , Xiao Guangrong , mst@redhat.com, Juan Quintela , dgilbert@redhat.com, Stefan Hajnoczi , Paolo Bonzini , Igor Mammedov , Dan Williams , Eduardo Habkost Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" As more flag parameters besides the existing 'share' are going to be added to following functions memory_region_init_ram_from_file qemu_ram_alloc_from_fd qemu_ram_alloc_from_file , let's switch them to use the 'flags' parameters so as to ease future flag additions. The existing 'share' flag is converted to the QEMU_RAM_SHARE bit in flags, and other flag bits are ignored by above functions right now. Signed-off-by: Haozhong Zhang --- backends/hostmem-file.c | 3 ++- exec.c | 7 ++++--- include/exec/memory.h | 10 ++++++++-- include/exec/ram_addr.h | 25 +++++++++++++++++++++++-- memory.c | 8 +++++--- numa.c | 2 +- 6 files changed, 43 insertions(+), 12 deletions(-) diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index 134b08d63a..30df843d90 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -58,7 +58,8 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Err= or **errp) path =3D object_get_canonical_path(OBJECT(backend)); memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), path, - backend->size, fb->align, backend->share, + backend->size, fb->align, + backend->share ? QEMU_RAM_SHARE : 0, fb->mem_path, errp); g_free(path); } diff --git a/exec.c b/exec.c index 5e56efefeb..16b373a86b 100644 --- a/exec.c +++ b/exec.c @@ -2000,12 +2000,13 @@ static void ram_block_add(RAMBlock *new_block, Erro= r **errp, bool shared) =20 #ifdef __linux__ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, - bool share, int fd, + uint64_t flags, int fd, Error **errp) { RAMBlock *new_block; Error *local_err =3D NULL; int64_t file_size; + bool share =3D flags & QEMU_RAM_SHARE; =20 if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen"); @@ -2061,7 +2062,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, Mem= oryRegion *mr, =20 =20 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, - bool share, const char *mem_path, + uint64_t flags, const char *mem_path, Error **errp) { int fd; @@ -2073,7 +2074,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, M= emoryRegion *mr, return NULL; } =20 - block =3D qemu_ram_alloc_from_fd(size, mr, share, fd, errp); + block =3D qemu_ram_alloc_from_fd(size, mr, flags, fd, errp); if (!block) { if (created) { unlink(mem_path); diff --git a/include/exec/memory.h b/include/exec/memory.h index 1b02bbd334..d87258b6ae 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -479,6 +479,9 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, void *host), Error **errp); #ifdef __linux__ + +#define QEMU_RAM_SHARE (1UL << 0) + /** * memory_region_init_ram_from_file: Initialize RAM memory region with a * mmap-ed backend. @@ -490,7 +493,10 @@ void memory_region_init_resizeable_ram(MemoryRegion *m= r, * @size: size of the region. * @align: alignment of the region base address; if 0, the default alignme= nt * (getpagesize()) will be used. - * @share: %true if memory must be mmaped with the MAP_SHARED flag + * @flags: specify properties of this memory region, which can be one or b= it-or + * of following values: + * - QEMU_RAM_SHARE: memory must be mmaped with the MAP_SHARED flag + * Other bits are ignored. * @path: the path in which to allocate the RAM. * @errp: pointer to Error*, to store an error if it happens. * @@ -502,7 +508,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, const char *name, uint64_t size, uint64_t align, - bool share, + uint64_t flags, const char *path, Error **errp); =20 diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index cf2446a176..b8b01d1eb9 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -72,12 +72,33 @@ static inline unsigned long int ramblock_recv_bitmap_of= fset(void *host_addr, =20 long qemu_getrampagesize(void); unsigned long last_ram_page(void); + +/** + * qemu_ram_alloc_from_file, + * qemu_ram_alloc_from_fd: Allocate a ram block from the specified back + * file or device + * + * Parameters: + * @size: the size in bytes of the ram block + * @mr: the memory region where the ram block is + * @flags: specify the properties of the ram block, which can be one + * or bit-or of following values + * - QEMU_RAM_SHARE: mmap the back file or device with MAP_SHARED + * Other bits are ignored. + * @mem_path or @fd: specify the back file or device + * @errp: pointer to Error*, to store an error if it happens + * + * Return: + * On success, return a pointer to the ram block. + * On failure, return NULL. + */ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, - bool share, const char *mem_path, + uint64_t flags, const char *mem_path, Error **errp); RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, - bool share, int fd, + uint64_t flags, int fd, Error **errp); + RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr, Error **errp); RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, diff --git a/memory.c b/memory.c index 3211fdc15f..229f64b24b 100644 --- a/memory.c +++ b/memory.c @@ -1581,7 +1581,7 @@ void memory_region_init_ram_from_file(MemoryRegion *m= r, const char *name, uint64_t size, uint64_t align, - bool share, + uint64_t flags, const char *path, Error **errp) { @@ -1590,7 +1590,7 @@ void memory_region_init_ram_from_file(MemoryRegion *m= r, mr->terminates =3D true; mr->destructor =3D memory_region_destructor_ram; mr->align =3D align; - mr->ram_block =3D qemu_ram_alloc_from_file(size, mr, share, path, errp= ); + mr->ram_block =3D qemu_ram_alloc_from_file(size, mr, flags, path, errp= ); mr->dirty_log_mask =3D tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; } =20 @@ -1606,7 +1606,9 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, mr->ram =3D true; mr->terminates =3D true; mr->destructor =3D memory_region_destructor_ram; - mr->ram_block =3D qemu_ram_alloc_from_fd(size, mr, share, fd, errp); + mr->ram_block =3D qemu_ram_alloc_from_fd(size, mr, + share ? QEMU_RAM_SHARE : 0, + fd, errp); mr->dirty_log_mask =3D tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; } #endif diff --git a/numa.c b/numa.c index 83675a03f3..fa202a376d 100644 --- a/numa.c +++ b/numa.c @@ -456,7 +456,7 @@ static void allocate_system_memory_nonnuma(MemoryRegion= *mr, Object *owner, if (mem_path) { #ifdef __linux__ Error *err =3D NULL; - memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, fal= se, + memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, 0, mem_path, &err); if (err) { error_report_err(err); --=20 2.14.1 From nobody Mon Apr 29 09:44:45 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1517988993876798.7840459138408; Tue, 6 Feb 2018 23:36:33 -0800 (PST) Received: from localhost ([::1]:58522 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKHO-0006da-SO for importer@patchew.org; Wed, 07 Feb 2018 02:36:30 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43857) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKFA-0004p4-7E for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:14 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ejKF8-0000Wn-7m for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:12 -0500 Received: from mga09.intel.com ([134.134.136.24]:13680) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1ejKF7-0000Td-SE for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:10 -0500 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Feb 2018 23:34:09 -0800 Received: from hz-desktop.sh.intel.com (HELO localhost) ([10.239.13.35]) by fmsmga002.fm.intel.com with ESMTP; 06 Feb 2018 23:34:07 -0800 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.46,471,1511856000"; d="scan'208";a="17873910" From: Haozhong Zhang To: qemu-devel@nongnu.org Date: Wed, 7 Feb 2018 15:33:25 +0800 Message-Id: <20180207073331.14158-3-haozhong.zhang@intel.com> X-Mailer: git-send-email 2.14.1 In-Reply-To: <20180207073331.14158-1-haozhong.zhang@intel.com> References: <20180207073331.14158-1-haozhong.zhang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.24 Subject: [Qemu-devel] [PATCH v2 2/8] hostmem-file: add the 'pmem' option X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Haozhong Zhang , Xiao Guangrong , mst@redhat.com, Juan Quintela , dgilbert@redhat.com, Stefan Hajnoczi , Paolo Bonzini , Igor Mammedov , Dan Williams , Eduardo Habkost Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" When QEMU emulates vNVDIMM labels and migrates vNVDIMM devices, it needs to know whether the backend storage is a real persistent memory, in order to decide whether special operations should be performed to ensure the data persistence. This boolean option 'pmem' allows users to specify whether the backend storage of memory-backend-file is a real persistent memory. If 'pmem=3Don', QEMU will set the flag RAM_PMEM in the RAM block of the corresponding memory region. Signed-off-by: Haozhong Zhang --- backends/hostmem-file.c | 26 +++++++++++++++++++++++++- docs/nvdimm.txt | 14 ++++++++++++++ exec.c | 16 +++++++++++++++- include/exec/memory.h | 2 ++ include/exec/ram_addr.h | 3 +++ qemu-options.hx | 9 ++++++++- 6 files changed, 67 insertions(+), 3 deletions(-) diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index 30df843d90..5d706d471f 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -34,6 +34,7 @@ struct HostMemoryBackendFile { bool discard_data; char *mem_path; uint64_t align; + bool is_pmem; }; =20 static void @@ -59,7 +60,8 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Err= or **errp) memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), path, backend->size, fb->align, - backend->share ? QEMU_RAM_SHARE : 0, + (backend->share ? QEMU_RAM_SHARE : 0) | + (fb->is_pmem ? QEMU_RAM_PMEM : 0), fb->mem_path, errp); g_free(path); } @@ -131,6 +133,25 @@ static void file_memory_backend_set_align(Object *o, V= isitor *v, error_propagate(errp, local_err); } =20 +static bool file_memory_backend_get_pmem(Object *o, Error **errp) +{ + return MEMORY_BACKEND_FILE(o)->is_pmem; +} + +static void file_memory_backend_set_pmem(Object *o, bool value, Error **er= rp) +{ + HostMemoryBackend *backend =3D MEMORY_BACKEND(o); + HostMemoryBackendFile *fb =3D MEMORY_BACKEND_FILE(o); + + if (host_memory_backend_mr_inited(backend)) { + error_setg(errp, "cannot change property 'pmem' of %s '%s'", + object_get_typename(o), backend->id); + return; + } + + fb->is_pmem =3D value; +} + static void file_backend_unparent(Object *obj) { HostMemoryBackend *backend =3D MEMORY_BACKEND(obj); @@ -162,6 +183,9 @@ file_backend_class_init(ObjectClass *oc, void *data) file_memory_backend_get_align, file_memory_backend_set_align, NULL, NULL, &error_abort); + object_class_property_add_bool(oc, "pmem", + file_memory_backend_get_pmem, file_memory_backend_set_pmem, + &error_abort); } =20 static void file_backend_instance_finalize(Object *o) diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt index e903d8bb09..bcb2032672 100644 --- a/docs/nvdimm.txt +++ b/docs/nvdimm.txt @@ -153,3 +153,17 @@ guest NVDIMM region mapping structure. This unarmed f= lag indicates guest software that this vNVDIMM device contains a region that cannot accept persistent writes. In result, for example, the guest Linux NVDIMM driver, marks such vNVDIMM device as read-only. + +If the vNVDIMM backend is on the host persistent memory that can be +accessed in SNIA NVM Programming Model [1] (e.g., Intel NVDIMM), it's +suggested to set the 'pmem' option of memory-backend-file to 'on'. When +'pmem=3Don' and QEMU is built with libpmem [2] support (configured with +--enable-libpmem), QEMU will take necessary operations to guarantee +the persistence of its own writes to the vNVDIMM backend (e.g., in +vNVDIMM label emulation and live migration). + +References +---------- + +[1] SNIA NVM Programming Model: https://www.snia.org/sites/default/files/t= echnical_work/final/NVMProgrammingModel_v1.2.pdf +[2] PMDK: http://pmem.io/pmdk/ diff --git a/exec.c b/exec.c index 16b373a86b..1d83441afe 100644 --- a/exec.c +++ b/exec.c @@ -99,6 +99,9 @@ static MemoryRegion io_mem_unassigned; */ #define RAM_RESIZEABLE (1 << 2) =20 +/* RAM is backed by the persistent memory. */ +#define RAM_PMEM (1 << 3) + #endif =20 #ifdef TARGET_PAGE_BITS_VARY @@ -2007,6 +2010,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, Mem= oryRegion *mr, Error *local_err =3D NULL; int64_t file_size; bool share =3D flags & QEMU_RAM_SHARE; + bool is_pmem =3D flags & QEMU_RAM_PMEM; =20 if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen"); @@ -2043,7 +2047,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, Mem= oryRegion *mr, new_block->mr =3D mr; new_block->used_length =3D size; new_block->max_length =3D size; - new_block->flags =3D share ? RAM_SHARED : 0; + new_block->flags =3D (share ? RAM_SHARED : 0) | + (is_pmem ? RAM_PMEM : 0); new_block->host =3D file_ram_alloc(new_block, size, fd, !file_size, er= rp); if (!new_block->host) { g_free(new_block); @@ -3847,3 +3852,12 @@ void mtree_print_dispatch(fprintf_function mon, void= *f, } =20 #endif + +bool ramblock_is_pmem(RAMBlock *rb) +{ +#if !defined(CONFIG_USER_ONLY) + return rb->flags & RAM_PMEM; +#else + return false; +#endif +} diff --git a/include/exec/memory.h b/include/exec/memory.h index d87258b6ae..018334312a 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -481,6 +481,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, #ifdef __linux__ =20 #define QEMU_RAM_SHARE (1UL << 0) +#define QEMU_RAM_PMEM (1UL << 1) =20 /** * memory_region_init_ram_from_file: Initialize RAM memory region with a @@ -496,6 +497,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, * @flags: specify properties of this memory region, which can be one or b= it-or * of following values: * - QEMU_RAM_SHARE: memory must be mmaped with the MAP_SHARED flag + * - QEMU_RAM_PMEM: the backend @path is persistent memory * Other bits are ignored. * @path: the path in which to allocate the RAM. * @errp: pointer to Error*, to store an error if it happens. diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index b8b01d1eb9..f8d8614e4d 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -70,6 +70,8 @@ static inline unsigned long int ramblock_recv_bitmap_offs= et(void *host_addr, return host_addr_offset >> TARGET_PAGE_BITS; } =20 +bool ramblock_is_pmem(RAMBlock *rb); + long qemu_getrampagesize(void); unsigned long last_ram_page(void); =20 @@ -84,6 +86,7 @@ unsigned long last_ram_page(void); * @flags: specify the properties of the ram block, which can be one * or bit-or of following values * - QEMU_RAM_SHARE: mmap the back file or device with MAP_SHARED + * - QEMU_RAM_PMEM: the backend @mem_path or @fd is persistent me= mory * Other bits are ignored. * @mem_path or @fd: specify the back file or device * @errp: pointer to Error*, to store an error if it happens diff --git a/qemu-options.hx b/qemu-options.hx index 61497ce136..12aa842ab9 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3957,7 +3957,7 @@ property must be set. These objects are placed in the =20 @table @option =20 -@item -object memory-backend-file,id=3D@var{id},size=3D@var{size},mem-path= =3D@var{dir},share=3D@var{on|off},discard-data=3D@var{on|off},merge=3D@var{= on|off},dump=3D@var{on|off},prealloc=3D@var{on|off},host-nodes=3D@var{host-= nodes},policy=3D@var{default|preferred|bind|interleave},align=3D@var{align} +@item -object memory-backend-file,id=3D@var{id},size=3D@var{size},mem-path= =3D@var{dir},share=3D@var{on|off},discard-data=3D@var{on|off},merge=3D@var{= on|off},dump=3D@var{on|off},prealloc=3D@var{on|off},host-nodes=3D@var{host-= nodes},policy=3D@var{default|preferred|bind|interleave},align=3D@var{align}= ,pmem=3D@var{on|off} =20 Creates a memory file backend object, which can be used to back the guest RAM with huge pages. @@ -4025,6 +4025,13 @@ requires an alignment different than the default one= used by QEMU, eg the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In such cases, users can specify the required alignment via this option. =20 +The @option{pmem} option specifies whether the backend store specified +by @option{mem-path} is on the persistent memory that can be accessed +in the SNIA NVM programming model (e.g. Intel NVDIMM). +If @option{pmem}=3D@var{on}, QEMU will take necessary operations to +guarantee the persistence of its own writes to @option{mem-path} +(e.g. in vNVDIMM label emulation and live migration). + @item -object memory-backend-ram,id=3D@var{id},merge=3D@var{on|off},dump= =3D@var{on|off},share=3D@var{on|off},prealloc=3D@var{on|off},size=3D@var{si= ze},host-nodes=3D@var{host-nodes},policy=3D@var{default|preferred|bind|inte= rleave} =20 Creates a memory backend object, which can be used to back the guest RAM. --=20 2.14.1 From nobody Mon Apr 29 09:44:45 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1517989160904700.3315897648171; Tue, 6 Feb 2018 23:39:20 -0800 (PST) Received: from localhost ([::1]:58638 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKK8-0001Tb-3i for importer@patchew.org; Wed, 07 Feb 2018 02:39:20 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43879) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKFB-0004qq-Ql for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:14 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ejKFA-0000YE-L1 for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:13 -0500 Received: from mga09.intel.com ([134.134.136.24]:13680) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1ejKFA-0000Td-9L for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:12 -0500 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Feb 2018 23:34:11 -0800 Received: from hz-desktop.sh.intel.com (HELO localhost) ([10.239.13.35]) by fmsmga002.fm.intel.com with ESMTP; 06 Feb 2018 23:34:09 -0800 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.46,471,1511856000"; d="scan'208";a="17873923" From: Haozhong Zhang To: qemu-devel@nongnu.org Date: Wed, 7 Feb 2018 15:33:26 +0800 Message-Id: <20180207073331.14158-4-haozhong.zhang@intel.com> X-Mailer: git-send-email 2.14.1 In-Reply-To: <20180207073331.14158-1-haozhong.zhang@intel.com> References: <20180207073331.14158-1-haozhong.zhang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.24 Subject: [Qemu-devel] [PATCH v2 3/8] configure: add libpmem support X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Haozhong Zhang , Xiao Guangrong , mst@redhat.com, Juan Quintela , dgilbert@redhat.com, Stefan Hajnoczi , Paolo Bonzini , Igor Mammedov , Dan Williams , Eduardo Habkost Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Add a pair of configure options --{enable,disable}-libpmem to control whether QEMU is compiled with PMDK libpmem [1]. QEMU may write to the host persistent memory (e.g. in vNVDIMM label emulation and live migration), so it must take the proper operations to ensure the persistence of its own writes. Depending on the CPU models and available instructions, the optimal operation can vary [2]. PMDK libpmem have already implemented those operations on multiple CPU models (x86 and ARM) and the logic to select the optimal ones, so QEMU can just use libpmem rather than re-implement them. [1] PMDK (formerly known as NMVL), https://github.com/pmem/pmdk/ [2] https://github.com/pmem/pmdk/blob/38bfa652721a37fd94c0130ce0e3f5d8baa3e= d40/src/libpmem/pmem.c#L33 Signed-off-by: Haozhong Zhang --- configure | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/configure b/configure index 302fdc92ff..595967e5df 100755 --- a/configure +++ b/configure @@ -436,6 +436,7 @@ jemalloc=3D"no" replication=3D"yes" vxhs=3D"" libxml2=3D"" +libpmem=3D"" =20 supported_cpu=3D"no" supported_os=3D"no" @@ -1341,6 +1342,10 @@ for opt do ;; --disable-git-update) git_update=3Dno ;; + --enable-libpmem) libpmem=3Dyes + ;; + --disable-libpmem) libpmem=3Dno + ;; *) echo "ERROR: unknown option $opt" echo "Try '$0 --help' for more information" @@ -1592,6 +1597,7 @@ disabled with --disable-FEATURE, default is enabled i= f available: crypto-afalg Linux AF_ALG crypto backend driver vhost-user vhost-user support capstone capstone disassembler support + libpmem libpmem support =20 NOTE: The object files are built at the place where configure is launched EOF @@ -5205,6 +5211,30 @@ if compile_prog "" "" ; then have_utmpx=3Dyes fi =20 +########################################## +# check for libpmem + +if test "$libpmem" !=3D "no"; then + cat > $TMPC < +int main(void) +{ + pmem_is_pmem(0, 0); + return 0; +} +EOF + libpmem_libs=3D"-lpmem" + if compile_prog "" "$libpmem_libs" ; then + libs_softmmu=3D"$libpmem_libs $libs_softmmu" + libpmem=3D"yes" + else + if test "$libpmem" =3D "yes" ; then + feature_not_found "libpmem" "Install nvml or pmdk" + fi + libpmem=3D"no" + fi +fi + ########################################## # End of CC checks # After here, no more $cc or $ld runs @@ -5657,6 +5687,7 @@ echo "avx2 optimization $avx2_opt" echo "replication support $replication" echo "VxHS block device $vxhs" echo "capstone $capstone" +echo "libpmem support $libpmem" =20 if test "$sdl_too_old" =3D "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -6374,6 +6405,10 @@ if test "$vxhs" =3D "yes" ; then echo "VXHS_LIBS=3D$vxhs_libs" >> $config_host_mak fi =20 +if test "$libpmem" =3D "yes" ; then + echo "CONFIG_LIBPMEM=3Dy" >> $config_host_mak +fi + if test "$tcg_interpreter" =3D "yes"; then QEMU_INCLUDES=3D"-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES" elif test "$ARCH" =3D "sparc64" ; then --=20 2.14.1 From nobody Mon Apr 29 09:44:45 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1517989305021920.5642840919519; Tue, 6 Feb 2018 23:41:45 -0800 (PST) Received: from localhost ([::1]:58812 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKMS-0004H0-82 for importer@patchew.org; Wed, 07 Feb 2018 02:41:44 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43900) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKFD-0004st-Pb for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:18 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ejKFC-0000Zr-Ru for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:15 -0500 Received: from mga09.intel.com ([134.134.136.24]:13680) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1ejKFC-0000Td-KJ for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:14 -0500 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Feb 2018 23:34:14 -0800 Received: from hz-desktop.sh.intel.com (HELO localhost) ([10.239.13.35]) by fmsmga002.fm.intel.com with ESMTP; 06 Feb 2018 23:34:12 -0800 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.46,471,1511856000"; d="scan'208";a="17873975" From: Haozhong Zhang To: qemu-devel@nongnu.org Date: Wed, 7 Feb 2018 15:33:27 +0800 Message-Id: <20180207073331.14158-5-haozhong.zhang@intel.com> X-Mailer: git-send-email 2.14.1 In-Reply-To: <20180207073331.14158-1-haozhong.zhang@intel.com> References: <20180207073331.14158-1-haozhong.zhang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.24 Subject: [Qemu-devel] [PATCH v2 4/8] mem/nvdimm: ensure write persistence to PMEM in label emulation X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Haozhong Zhang , Xiao Guangrong , mst@redhat.com, Juan Quintela , dgilbert@redhat.com, Stefan Hajnoczi , Paolo Bonzini , Igor Mammedov , Dan Williams , Eduardo Habkost Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Guest writes to vNVDIMM labels are intercepted and performed on the backend by QEMU. When the backend is a real persistent memort, QEMU needs to take proper operations to ensure its write persistence on the persistent memory. Otherwise, a host power failure may result in the loss of guest label configurations. Signed-off-by: Haozhong Zhang --- hw/mem/nvdimm.c | 9 ++++++++- include/qemu/pmem.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 include/qemu/pmem.h diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 61e677f92f..18861d1a7a 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -23,6 +23,7 @@ */ =20 #include "qemu/osdep.h" +#include "qemu/pmem.h" #include "qapi/error.h" #include "qapi/visitor.h" #include "qapi-visit.h" @@ -156,11 +157,17 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvd= imm, const void *buf, { MemoryRegion *mr; PCDIMMDevice *dimm =3D PC_DIMM(nvdimm); + bool is_pmem =3D object_property_get_bool(OBJECT(dimm->hostmem), + "pmem", NULL); uint64_t backend_offset; =20 nvdimm_validate_rw_label_data(nvdimm, size, offset); =20 - memcpy(nvdimm->label_data + offset, buf, size); + if (!is_pmem) { + memcpy(nvdimm->label_data + offset, buf, size); + } else { + pmem_memcpy_persist(nvdimm->label_data + offset, buf, size); + } =20 mr =3D host_memory_backend_get_memory(dimm->hostmem, &error_abort); backend_offset =3D memory_region_size(mr) - nvdimm->label_size + offse= t; diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h new file mode 100644 index 0000000000..9017596ff0 --- /dev/null +++ b/include/qemu/pmem.h @@ -0,0 +1,31 @@ +/* + * Stub functions for libpmem. + * + * Copyright (c) 2018 Intel Corporation. + * + * Author: Haozhong Zhang + * + * This work is licensed under the terms of the GNU GPL, version 2 or late= r. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_PMEM_H +#define QEMU_PMEM_H + +#ifdef CONFIG_LIBPMEM +#include +#else /* !CONFIG_LIBPMEM */ + +#include + +/* Stubs */ + +static inline void * +pmem_memcpy_persist(void *pmemdest, const void *src, size_t len) +{ + return memcpy(pmemdest, src, len); +} + +#endif /* CONFIG_LIBPMEM */ + +#endif /* !QEMU_PMEM_H */ --=20 2.14.1 From nobody Mon Apr 29 09:44:45 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1517989160607616.1534275657099; Tue, 6 Feb 2018 23:39:20 -0800 (PST) Received: from localhost ([::1]:58639 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKK7-0001Uo-Pz for importer@patchew.org; Wed, 07 Feb 2018 02:39:19 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43921) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKFG-0004tt-M7 for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:22 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ejKFF-0000bZ-H4 for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:18 -0500 Received: from mga09.intel.com ([134.134.136.24]:13680) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1ejKFF-0000Td-5R for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:17 -0500 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Feb 2018 23:34:16 -0800 Received: from hz-desktop.sh.intel.com (HELO localhost) ([10.239.13.35]) by fmsmga002.fm.intel.com with ESMTP; 06 Feb 2018 23:34:14 -0800 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.46,471,1511856000"; d="scan'208";a="17873997" From: Haozhong Zhang To: qemu-devel@nongnu.org Date: Wed, 7 Feb 2018 15:33:28 +0800 Message-Id: <20180207073331.14158-6-haozhong.zhang@intel.com> X-Mailer: git-send-email 2.14.1 In-Reply-To: <20180207073331.14158-1-haozhong.zhang@intel.com> References: <20180207073331.14158-1-haozhong.zhang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.24 Subject: [Qemu-devel] [PATCH v2 5/8] migration/ram: ensure write persistence on loading zero pages to PMEM X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Haozhong Zhang , Xiao Guangrong , mst@redhat.com, Juan Quintela , dgilbert@redhat.com, Stefan Hajnoczi , Paolo Bonzini , Igor Mammedov , Dan Williams , Eduardo Habkost Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" When loading a zero page, check whether it will be loaded to persistent memory If yes, load it by libpmem function pmem_memset_nodrain(). Combined with a call to pmem_drain() at the end of RAM loading, we can guarantee all those zero pages are persistently loaded. Depending on the host HW/SW configurations, pmem_drain() can be "sfence". Therefore, we do not call pmem_drain() after each pmem_memset_nodrain(), or use pmem_memset_persist() (equally pmem_memset_nodrain() + pmem_drain()), in order to avoid unnecessary overhead. Signed-off-by: Haozhong Zhang --- include/qemu/pmem.h | 9 +++++++++ migration/ram.c | 34 +++++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h index 9017596ff0..861d8ecc21 100644 --- a/include/qemu/pmem.h +++ b/include/qemu/pmem.h @@ -26,6 +26,15 @@ pmem_memcpy_persist(void *pmemdest, const void *src, siz= e_t len) return memcpy(pmemdest, src, len); } =20 +static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len) +{ + return memset(pmemdest, c, len); +} + +static inline void pmem_drain(void) +{ +} + #endif /* CONFIG_LIBPMEM */ =20 #endif /* !QEMU_PMEM_H */ diff --git a/migration/ram.c b/migration/ram.c index cb1950f3eb..5a0e503818 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -49,6 +49,7 @@ #include "qemu/rcu_queue.h" #include "migration/colo.h" #include "migration/block.h" +#include "qemu/pmem.h" =20 /***********************************************************/ /* ram save/restore */ @@ -2467,6 +2468,20 @@ static inline void *host_from_ram_block_offset(RAMBl= ock *block, return block->host + offset; } =20 +static void ram_handle_compressed_common(void *host, uint8_t ch, uint64_t = size, + bool is_pmem) +{ + if (!ch && is_zero_range(host, size)) { + return; + } + + if (!is_pmem) { + memset(host, ch, size); + } else { + pmem_memset_nodrain(host, ch, size); + } +} + /** * ram_handle_compressed: handle the zero page case * @@ -2479,9 +2494,7 @@ static inline void *host_from_ram_block_offset(RAMBlo= ck *block, */ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) { - if (ch !=3D 0 || !is_zero_range(host, size)) { - memset(host, ch, size); - } + return ram_handle_compressed_common(host, ch, size, false); } =20 static void *do_data_decompress(void *opaque) @@ -2823,6 +2836,7 @@ static int ram_load(QEMUFile *f, void *opaque, int ve= rsion_id) bool postcopy_running =3D postcopy_is_running(); /* ADVISE is earlier, it shows the source has the postcopy capability = on */ bool postcopy_advised =3D postcopy_is_advised(); + bool need_pmem_drain =3D false; =20 seq_iter++; =20 @@ -2848,6 +2862,8 @@ static int ram_load(QEMUFile *f, void *opaque, int ve= rsion_id) ram_addr_t addr, total_ram_bytes; void *host =3D NULL; uint8_t ch; + RAMBlock *block =3D NULL; + bool is_pmem =3D false; =20 addr =3D qemu_get_be64(f); flags =3D addr & ~TARGET_PAGE_MASK; @@ -2864,7 +2880,7 @@ static int ram_load(QEMUFile *f, void *opaque, int ve= rsion_id) =20 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE | RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) { - RAMBlock *block =3D ram_block_from_stream(f, flags); + block =3D ram_block_from_stream(f, flags); =20 host =3D host_from_ram_block_offset(block, addr); if (!host) { @@ -2874,6 +2890,9 @@ static int ram_load(QEMUFile *f, void *opaque, int ve= rsion_id) } ramblock_recv_bitmap_set(block, host); trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host); + + is_pmem =3D ramblock_is_pmem(block); + need_pmem_drain =3D need_pmem_drain || is_pmem; } =20 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { @@ -2927,7 +2946,7 @@ static int ram_load(QEMUFile *f, void *opaque, int ve= rsion_id) =20 case RAM_SAVE_FLAG_ZERO: ch =3D qemu_get_byte(f); - ram_handle_compressed(host, ch, TARGET_PAGE_SIZE); + ram_handle_compressed_common(host, ch, TARGET_PAGE_SIZE, is_pm= em); break; =20 case RAM_SAVE_FLAG_PAGE: @@ -2970,6 +2989,11 @@ static int ram_load(QEMUFile *f, void *opaque, int v= ersion_id) } =20 wait_for_decompress_done(); + + if (need_pmem_drain) { + pmem_drain(); + } + rcu_read_unlock(); trace_ram_load_complete(ret, seq_iter); return ret; --=20 2.14.1 From nobody Mon Apr 29 09:44:45 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1517989423831878.2578144954724; Tue, 6 Feb 2018 23:43:43 -0800 (PST) Received: from localhost ([::1]:59074 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKON-0006C8-3O for importer@patchew.org; Wed, 07 Feb 2018 02:43:43 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43934) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKFK-0004y6-23 for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:23 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ejKFH-0000cv-QK for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:22 -0500 Received: from mga09.intel.com ([134.134.136.24]:13680) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1ejKFH-0000Td-H3 for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:19 -0500 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Feb 2018 23:34:19 -0800 Received: from hz-desktop.sh.intel.com (HELO localhost) ([10.239.13.35]) by fmsmga002.fm.intel.com with ESMTP; 06 Feb 2018 23:34:17 -0800 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.46,471,1511856000"; d="scan'208";a="17874007" From: Haozhong Zhang To: qemu-devel@nongnu.org Date: Wed, 7 Feb 2018 15:33:29 +0800 Message-Id: <20180207073331.14158-7-haozhong.zhang@intel.com> X-Mailer: git-send-email 2.14.1 In-Reply-To: <20180207073331.14158-1-haozhong.zhang@intel.com> References: <20180207073331.14158-1-haozhong.zhang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.24 Subject: [Qemu-devel] [PATCH v2 6/8] migration/ram: ensure write persistence on loading normal pages to PMEM X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Haozhong Zhang , Xiao Guangrong , mst@redhat.com, Juan Quintela , dgilbert@redhat.com, Stefan Hajnoczi , Paolo Bonzini , Igor Mammedov , Dan Williams , Eduardo Habkost Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" When loading a normal page to persistent memory, load its data by libpmem function pmem_memcpy_nodrain() instead of memcpy(). Combined with a call to pmem_drain() at the end of memory loading, we can guarantee all those normal pages are persistenly loaded to PMEM. Signed-off-by: Haozhong Zhang --- include/migration/qemu-file-types.h | 1 + include/qemu/pmem.h | 6 ++++++ migration/qemu-file.c | 41 ++++++++++++++++++++++++++++-----= ---- migration/ram.c | 6 +++++- 4 files changed, 43 insertions(+), 11 deletions(-) diff --git a/include/migration/qemu-file-types.h b/include/migration/qemu-f= ile-types.h index bd6d7dd7f9..bb5c547498 100644 --- a/include/migration/qemu-file-types.h +++ b/include/migration/qemu-file-types.h @@ -34,6 +34,7 @@ void qemu_put_be16(QEMUFile *f, unsigned int v); void qemu_put_be32(QEMUFile *f, unsigned int v); void qemu_put_be64(QEMUFile *f, uint64_t v); size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); +size_t qemu_get_buffer_to_pmem(QEMUFile *f, uint8_t *buf, size_t size); =20 int qemu_get_byte(QEMUFile *f); =20 diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h index 861d8ecc21..77ee1fc4eb 100644 --- a/include/qemu/pmem.h +++ b/include/qemu/pmem.h @@ -26,6 +26,12 @@ pmem_memcpy_persist(void *pmemdest, const void *src, siz= e_t len) return memcpy(pmemdest, src, len); } =20 +static inline void * +pmem_memcpy_nodrain(void *pmemdest, const void *src, size_t len) +{ + return memcpy(pmemdest, src, len); +} + static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len) { return memset(pmemdest, c, len); diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 2ab2bf362d..7e573010d9 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -26,6 +26,7 @@ #include "qemu-common.h" #include "qemu/error-report.h" #include "qemu/iov.h" +#include "qemu/pmem.h" #include "migration.h" #include "qemu-file.h" #include "trace.h" @@ -471,15 +472,8 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, si= ze_t size, size_t offset) return size; } =20 -/* - * Read 'size' bytes of data from the file into buf. - * 'size' can be larger than the internal buffer. - * - * It will return size bytes unless there was an error, in which case it w= ill - * return as many as it managed to read (assuming blocking fd's which - * all current QEMUFile are) - */ -size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) +static size_t +qemu_get_buffer_common(QEMUFile *f, uint8_t *buf, size_t size, bool is_pme= m) { size_t pending =3D size; size_t done =3D 0; @@ -492,7 +486,11 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size= _t size) if (res =3D=3D 0) { return done; } - memcpy(buf, src, res); + if (!is_pmem) { + memcpy(buf, src, res); + } else { + pmem_memcpy_nodrain(buf, src, res); + } qemu_file_skip(f, res); buf +=3D res; pending -=3D res; @@ -501,6 +499,29 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size= _t size) return done; } =20 +/* + * Read 'size' bytes of data from the file into buf. + * 'size' can be larger than the internal buffer. + * + * It will return size bytes unless there was an error, in which case it w= ill + * return as many as it managed to read (assuming blocking fd's which + * all current QEMUFile are) + */ +size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) +{ + return qemu_get_buffer_common(f, buf, size, false); +} + +/* + * Mostly the same as qemu_get_buffer(), except that + * 1) it's for the case that 'buf' is in the persistent memory, and + * 2) it takes necessary operations to ensure the data persistence in 'buf= '. + */ +size_t qemu_get_buffer_to_pmem(QEMUFile *f, uint8_t *buf, size_t size) +{ + return qemu_get_buffer_common(f, buf, size, true); +} + /* * Read 'size' bytes of data from the file. * 'size' can be larger than the internal buffer. diff --git a/migration/ram.c b/migration/ram.c index 5a0e503818..5a79bbff64 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2950,7 +2950,11 @@ static int ram_load(QEMUFile *f, void *opaque, int v= ersion_id) break; =20 case RAM_SAVE_FLAG_PAGE: - qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + if (!is_pmem) { + qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + } else { + qemu_get_buffer_to_pmem(f, host, TARGET_PAGE_SIZE); + } break; =20 case RAM_SAVE_FLAG_COMPRESS_PAGE: --=20 2.14.1 From nobody Mon Apr 29 09:44:45 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1517989102130339.6707395456672; Tue, 6 Feb 2018 23:38:22 -0800 (PST) Received: from localhost ([::1]:58619 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKJB-0000HK-Ag for importer@patchew.org; Wed, 07 Feb 2018 02:38:21 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43945) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKFL-0004zP-B8 for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:24 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ejKFK-0000eW-CP for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:23 -0500 Received: from mga09.intel.com ([134.134.136.24]:13680) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1ejKFJ-0000Td-VF for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:22 -0500 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Feb 2018 23:34:21 -0800 Received: from hz-desktop.sh.intel.com (HELO localhost) ([10.239.13.35]) by fmsmga002.fm.intel.com with ESMTP; 06 Feb 2018 23:34:19 -0800 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.46,471,1511856000"; d="scan'208";a="17874021" From: Haozhong Zhang To: qemu-devel@nongnu.org Date: Wed, 7 Feb 2018 15:33:30 +0800 Message-Id: <20180207073331.14158-8-haozhong.zhang@intel.com> X-Mailer: git-send-email 2.14.1 In-Reply-To: <20180207073331.14158-1-haozhong.zhang@intel.com> References: <20180207073331.14158-1-haozhong.zhang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.24 Subject: [Qemu-devel] [PATCH v2 7/8] migration/ram: ensure write persistence on loading compressed pages to PMEM X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Haozhong Zhang , Xiao Guangrong , mst@redhat.com, Juan Quintela , dgilbert@redhat.com, Stefan Hajnoczi , Paolo Bonzini , Igor Mammedov , Dan Williams , Eduardo Habkost Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" When loading a compressed page to persistent memory, flush CPU cache after the data is decompressed. Combined with a call to pmem_drain() at the end of memory loading, we can guarantee those compressed pages are persistently loaded to PMEM. Signed-off-by: Haozhong Zhang --- include/qemu/pmem.h | 4 ++++ migration/ram.c | 16 +++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h index 77ee1fc4eb..20e3f6e71d 100644 --- a/include/qemu/pmem.h +++ b/include/qemu/pmem.h @@ -37,6 +37,10 @@ static inline void *pmem_memset_nodrain(void *pmemdest, = int c, size_t len) return memset(pmemdest, c, len); } =20 +static inline void pmem_flush(const void *addr, size_t len) +{ +} + static inline void pmem_drain(void) { } diff --git a/migration/ram.c b/migration/ram.c index 5a79bbff64..924d2b9537 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -274,6 +274,7 @@ struct DecompressParam { void *des; uint8_t *compbuf; int len; + bool is_pmem; }; typedef struct DecompressParam DecompressParam; =20 @@ -2502,7 +2503,7 @@ static void *do_data_decompress(void *opaque) DecompressParam *param =3D opaque; unsigned long pagesize; uint8_t *des; - int len; + int len, rc; =20 qemu_mutex_lock(¶m->mutex); while (!param->quit) { @@ -2518,8 +2519,11 @@ static void *do_data_decompress(void *opaque) * not a problem because the dirty page will be retransferred * and uncompress() won't break the data in other pages. */ - uncompress((Bytef *)des, &pagesize, - (const Bytef *)param->compbuf, len); + rc =3D uncompress((Bytef *)des, &pagesize, + (const Bytef *)param->compbuf, len); + if (rc =3D=3D Z_OK && param->is_pmem) { + pmem_flush(des, len); + } =20 qemu_mutex_lock(&decomp_done_lock); param->done =3D true; @@ -2605,7 +2609,8 @@ static void compress_threads_load_cleanup(void) } =20 static void decompress_data_with_multi_threads(QEMUFile *f, - void *host, int len) + void *host, int len, + bool is_pmem) { int idx, thread_count; =20 @@ -2619,6 +2624,7 @@ static void decompress_data_with_multi_threads(QEMUFi= le *f, qemu_get_buffer(f, decomp_param[idx].compbuf, len); decomp_param[idx].des =3D host; decomp_param[idx].len =3D len; + decomp_param[idx].is_pmem =3D is_pmem; qemu_cond_signal(&decomp_param[idx].cond); qemu_mutex_unlock(&decomp_param[idx].mutex); break; @@ -2964,7 +2970,7 @@ static int ram_load(QEMUFile *f, void *opaque, int ve= rsion_id) ret =3D -EINVAL; break; } - decompress_data_with_multi_threads(f, host, len); + decompress_data_with_multi_threads(f, host, len, is_pmem); break; =20 case RAM_SAVE_FLAG_XBZRLE: --=20 2.14.1 From nobody Mon Apr 29 09:44:45 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1517989255437854.8598876902137; Tue, 6 Feb 2018 23:40:55 -0800 (PST) Received: from localhost ([::1]:58773 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKLa-0003YY-Ei for importer@patchew.org; Wed, 07 Feb 2018 02:40:50 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43962) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejKFO-00053w-HT for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:27 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ejKFN-0000gY-8P for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:26 -0500 Received: from mga09.intel.com ([134.134.136.24]:13680) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1ejKFM-0000Td-Rr for qemu-devel@nongnu.org; Wed, 07 Feb 2018 02:34:25 -0500 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 06 Feb 2018 23:34:24 -0800 Received: from hz-desktop.sh.intel.com (HELO localhost) ([10.239.13.35]) by fmsmga002.fm.intel.com with ESMTP; 06 Feb 2018 23:34:22 -0800 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.46,471,1511856000"; d="scan'208";a="17874031" From: Haozhong Zhang To: qemu-devel@nongnu.org Date: Wed, 7 Feb 2018 15:33:31 +0800 Message-Id: <20180207073331.14158-9-haozhong.zhang@intel.com> X-Mailer: git-send-email 2.14.1 In-Reply-To: <20180207073331.14158-1-haozhong.zhang@intel.com> References: <20180207073331.14158-1-haozhong.zhang@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.24 Subject: [Qemu-devel] [PATCH v2 8/8] migration/ram: ensure write persistence on loading xbzrle pages to PMEM X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Haozhong Zhang , Xiao Guangrong , mst@redhat.com, Juan Quintela , dgilbert@redhat.com, Stefan Hajnoczi , Paolo Bonzini , Igor Mammedov , Dan Williams , Eduardo Habkost Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" When loading a xbzrle encoded page to persistent memory, load the data via libpmem function pmem_memcpy_nodrain() instead of memcpy(). Combined with a call to pmem_drain() at the end of memory loading, we can guarantee those xbzrle encoded pages are persistently loaded to PMEM. Signed-off-by: Haozhong Zhang --- migration/ram.c | 15 ++++++++++----- migration/xbzrle.c | 20 ++++++++++++++++++-- migration/xbzrle.h | 1 + 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 924d2b9537..87f977617d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2388,10 +2388,10 @@ static void ram_save_pending(QEMUFile *f, void *opa= que, uint64_t max_size, } } =20 -static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host, bool is_p= mem) { unsigned int xh_len; - int xh_flags; + int xh_flags, rc; uint8_t *loaded_data; =20 /* extract RLE header */ @@ -2413,8 +2413,13 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr,= void *host) qemu_get_buffer_in_place(f, &loaded_data, xh_len); =20 /* decode RLE */ - if (xbzrle_decode_buffer(loaded_data, xh_len, host, - TARGET_PAGE_SIZE) =3D=3D -1) { + if (!is_pmem) { + rc =3D xbzrle_decode_buffer(loaded_data, xh_len, host, TARGET_PAGE= _SIZE); + } else { + rc =3D xbzrle_decode_buffer_to_pmem(loaded_data, xh_len, host, + TARGET_PAGE_SIZE); + } + if (rc =3D=3D -1) { error_report("Failed to load XBZRLE page - decode error!"); return -1; } @@ -2974,7 +2979,7 @@ static int ram_load(QEMUFile *f, void *opaque, int ve= rsion_id) break; =20 case RAM_SAVE_FLAG_XBZRLE: - if (load_xbzrle(f, addr, host) < 0) { + if (load_xbzrle(f, addr, host, is_pmem) < 0) { error_report("Failed to decompress XBZRLE page at " RAM_ADDR_FMT, addr); ret =3D -EINVAL; diff --git a/migration/xbzrle.c b/migration/xbzrle.c index 1ba482ded9..499d8e1bfb 100644 --- a/migration/xbzrle.c +++ b/migration/xbzrle.c @@ -12,6 +12,7 @@ */ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/pmem.h" #include "xbzrle.h" =20 /* @@ -126,7 +127,8 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new= _buf, int slen, return d; } =20 -int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) +static int xbzrle_decode_buffer_common(uint8_t *src, int slen, uint8_t *ds= t, + int dlen, bool is_pmem) { int i =3D 0, d =3D 0; int ret; @@ -167,10 +169,24 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint= 8_t *dst, int dlen) return -1; } =20 - memcpy(dst + d, src + i, count); + if (!is_pmem) { + memcpy(dst + d, src + i, count); + } else { + pmem_memcpy_nodrain(dst + d, src + i, count); + } d +=3D count; i +=3D count; } =20 return d; } + +int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) +{ + return xbzrle_decode_buffer_common(src, slen, dst, dlen, false); +} + +int xbzrle_decode_buffer_to_pmem(uint8_t *src, int slen, uint8_t *dst, int= dlen) +{ + return xbzrle_decode_buffer_common(src, slen, dst, dlen, true); +} diff --git a/migration/xbzrle.h b/migration/xbzrle.h index a0db507b9c..ac5ae32666 100644 --- a/migration/xbzrle.h +++ b/migration/xbzrle.h @@ -18,4 +18,5 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_b= uf, int slen, uint8_t *dst, int dlen); =20 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); +int xbzrle_decode_buffer_to_pmem(uint8_t *src, int slen, uint8_t *dst, int= dlen); #endif --=20 2.14.1