From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229477124582.2851942500487; Fri, 28 Jul 2017 01:11:17 -0700 (PDT) Received: from localhost ([::1]:46671 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Md-0004YE-Nh for importer@patchew.org; Fri, 28 Jul 2017 04:11:15 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46501) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0IQ-0000TS-9L for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:06:55 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0IP-0000pX-BG for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:06:54 -0400 Received: from mx1.redhat.com ([209.132.183.28]:38130) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0IP-0000pC-51 for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:06:53 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 17E68D3D4A; Fri, 28 Jul 2017 08:06:52 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id D5E046031D; Fri, 28 Jul 2017 08:06:48 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 17E68D3D4A Authentication-Results: ext-mx01.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx01.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:10 +0800 Message-Id: <1501229198-30588-2-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.25]); Fri, 28 Jul 2017 08:06:52 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 01/29] migration: fix incorrect postcopy recved_bitmap X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" The bitmap setup during postcopy is incorrectly when the pgaes are huge pages. Fix it. Signed-off-by: Peter Xu --- migration/postcopy-ram.c | 2 +- migration/ram.c | 8 ++++++++ migration/ram.h | 2 ++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 276ce12..952b73a 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -578,7 +578,7 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void *= host_addr, ret =3D ioctl(userfault_fd, UFFDIO_ZEROPAGE, &zero_struct); } if (!ret) { - ramblock_recv_bitmap_set(host_addr, rb); + ramblock_recv_bitmap_set_range(rb, host_addr, pagesize / getpagesi= ze()); } return ret; } diff --git a/migration/ram.c b/migration/ram.c index 107ee9d..c93973c 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -168,6 +168,14 @@ void ramblock_recv_bitmap_set(void *host_addr, RAMBloc= k *rb) set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receive= dmap); } =20 +void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, + size_t len) +{ + bitmap_set(rb->receivedmap, + ramblock_recv_bitmap_offset(host_addr, rb), + len); +} + void ramblock_recv_bitmap_clear(void *host_addr, RAMBlock *rb) { clear_bit(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap); diff --git a/migration/ram.h b/migration/ram.h index b711552..84e8623 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -55,6 +55,8 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64= _t size); =20 int ramblock_recv_bitmap_test(void *host_addr, RAMBlock *rb); void ramblock_recv_bitmap_set(void *host_addr, RAMBlock *rb); +void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, + size_t len); void ramblock_recv_bitmap_clear(void *host_addr, RAMBlock *rb); =20 #endif --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229331832577.5048528294324; Fri, 28 Jul 2017 01:08:51 -0700 (PDT) Received: from localhost ([::1]:46658 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0KI-0001bW-3i for importer@patchew.org; Fri, 28 Jul 2017 04:08:50 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46532) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0IS-0000Tm-Q3 for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:06:57 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0IR-0000qo-UG for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:06:56 -0400 Received: from mx1.redhat.com ([209.132.183.28]:55155) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0IR-0000qP-Nv for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:06:55 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id AAD7D5587B; Fri, 28 Jul 2017 08:06:54 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 8411E600C2; Fri, 28 Jul 2017 08:06:52 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com AAD7D5587B Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:11 +0800 Message-Id: <1501229198-30588-3-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.30]); Fri, 28 Jul 2017 08:06:54 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 02/29] migration: fix comment disorder in RAMState X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Comments for "migration_dirty_pages" and "bitmap_mutex" are switched. Fix it. Signed-off-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert --- migration/ram.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index c93973c..c12358d 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -222,9 +222,9 @@ struct RAMState { uint64_t iterations_prev; /* Iterations since start */ uint64_t iterations; - /* protects modification of the bitmap */ - uint64_t migration_dirty_pages; /* number of dirty bits in the bitmap */ + uint64_t migration_dirty_pages; + /* protects modification of the bitmap */ QemuMutex bitmap_mutex; /* The RAMBlock used in the last src_page_requests */ RAMBlock *last_req_rb; --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229340175731.9627460685349; Fri, 28 Jul 2017 01:09:00 -0700 (PDT) Received: from localhost ([::1]:46659 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0KQ-0001ks-Qu for importer@patchew.org; Fri, 28 Jul 2017 04:08:58 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46551) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0IV-0000VR-9L for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:02 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0IU-0000s2-K9 for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:06:59 -0400 Received: from mx1.redhat.com ([209.132.183.28]:53646) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0IU-0000rn-Ec for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:06:58 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 4B0CFC00AFDB; Fri, 28 Jul 2017 08:06:57 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 21E70600C2; Fri, 28 Jul 2017 08:06:54 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 4B0CFC00AFDB Authentication-Results: ext-mx07.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx07.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:12 +0800 Message-Id: <1501229198-30588-4-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.31]); Fri, 28 Jul 2017 08:06:57 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 03/29] io: fix qio_channel_socket_accept err handling X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" When accept failed, we should setup errp with the reason. More importantly, the caller may assume errp be non-NULL when error happens, and not setting the errp may crash QEMU. Signed-off-by: Peter Xu --- io/channel-socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io/channel-socket.c b/io/channel-socket.c index 53386b7..7bc308e 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -344,6 +344,7 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, if (errno =3D=3D EINTR) { goto retry; } + error_setg_errno(errp, errno, "Unable to accept connection"); goto error; } =20 --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229483584831.7804696800393; Fri, 28 Jul 2017 01:11:23 -0700 (PDT) Received: from localhost ([::1]:46672 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Mk-0004cy-50 for importer@patchew.org; Fri, 28 Jul 2017 04:11:22 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46590) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0IY-0000Xw-OH for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:03 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0IX-0000tB-7j for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:02 -0400 Received: from mx1.redhat.com ([209.132.183.28]:55232) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0IX-0000sq-16 for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:01 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id E0E47F8021; Fri, 28 Jul 2017 08:06:59 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id B5086600C2; Fri, 28 Jul 2017 08:06:57 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com E0E47F8021 Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:13 +0800 Message-Id: <1501229198-30588-5-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.39]); Fri, 28 Jul 2017 08:07:00 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 04/29] bitmap: introduce bitmap_invert() X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" It is used to invert the whole bitmap. Signed-off-by: Peter Xu --- include/qemu/bitmap.h | 10 ++++++++++ util/bitmap.c | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index c318da1..460d899 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -82,6 +82,7 @@ int slow_bitmap_andnot(unsigned long *dst, const unsigned= long *bitmap1, const unsigned long *bitmap2, long bits); int slow_bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, long bits); +void slow_bitmap_invert(unsigned long *bitmap, long nbits); =20 static inline unsigned long *bitmap_try_new(long nbits) { @@ -216,6 +217,15 @@ static inline int bitmap_intersects(const unsigned lon= g *src1, } } =20 +static inline void bitmap_invert(unsigned long *bitmap, long nbits) +{ + if (small_nbits(nbits)) { + *bitmap ^=3D BITMAP_LAST_WORD_MASK(nbits); + } else { + slow_bitmap_invert(bitmap, nbits); + } +} + void bitmap_set(unsigned long *map, long i, long len); void bitmap_set_atomic(unsigned long *map, long i, long len); void bitmap_clear(unsigned long *map, long start, long nr); diff --git a/util/bitmap.c b/util/bitmap.c index efced9a..9b7408c 100644 --- a/util/bitmap.c +++ b/util/bitmap.c @@ -355,3 +355,16 @@ int slow_bitmap_intersects(const unsigned long *bitmap= 1, } return 0; } + +void slow_bitmap_invert(unsigned long *bitmap, long nbits) +{ + long k, lim =3D nbits/BITS_PER_LONG; + + for (k =3D 0; k < lim; k++) { + bitmap[k] ^=3D ULONG_MAX; + } + + if (nbits % BITS_PER_LONG) { + bitmap[k] ^=3D BITMAP_LAST_WORD_MASK(nbits); + } +} --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229548412810.8311924484393; Fri, 28 Jul 2017 01:12:28 -0700 (PDT) Received: from localhost ([::1]:46675 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Nm-0005jC-SX for importer@patchew.org; Fri, 28 Jul 2017 04:12:26 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46672) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Ih-0000dE-83 for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:12 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Id-0000xh-Vp for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:11 -0400 Received: from mx1.redhat.com ([209.132.183.28]:45298) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Id-0000x8-Pz for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:07 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id BD42C207AAF; Fri, 28 Jul 2017 08:07:06 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 57F73600C2; Fri, 28 Jul 2017 08:07:00 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com BD42C207AAF Authentication-Results: ext-mx09.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx09.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:14 +0800 Message-Id: <1501229198-30588-6-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.38]); Fri, 28 Jul 2017 08:07:06 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 05/29] bitmap: introduce bitmap_count_one() X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Count how many bits set in the bitmap. Signed-off-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert --- include/qemu/bitmap.h | 10 ++++++++++ util/bitmap.c | 15 +++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index 460d899..9c18da0 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -83,6 +83,7 @@ int slow_bitmap_andnot(unsigned long *dst, const unsigned= long *bitmap1, int slow_bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, long bits); void slow_bitmap_invert(unsigned long *bitmap, long nbits); +long slow_bitmap_count_one(const unsigned long *bitmap, long nbits); =20 static inline unsigned long *bitmap_try_new(long nbits) { @@ -226,6 +227,15 @@ static inline void bitmap_invert(unsigned long *bitmap= , long nbits) } } =20 +static inline long bitmap_count_one(const unsigned long *bitmap, long nbit= s) +{ + if (small_nbits(nbits)) { + return (ctpopl(*bitmap & BITMAP_LAST_WORD_MASK(nbits))); + } else { + return slow_bitmap_count_one(bitmap, nbits); + } +} + void bitmap_set(unsigned long *map, long i, long len); void bitmap_set_atomic(unsigned long *map, long i, long len); void bitmap_clear(unsigned long *map, long start, long nr); diff --git a/util/bitmap.c b/util/bitmap.c index 9b7408c..73a1063 100644 --- a/util/bitmap.c +++ b/util/bitmap.c @@ -368,3 +368,18 @@ void slow_bitmap_invert(unsigned long *bitmap, long nb= its) bitmap[k] ^=3D BITMAP_LAST_WORD_MASK(nbits); } } + +long slow_bitmap_count_one(const unsigned long *bitmap, long nbits) +{ + long k, lim =3D nbits/BITS_PER_LONG, result =3D 0; + + for (k =3D 0; k < lim; k++) { + result +=3D ctpopl(bitmap[k]); + } + + if (nbits % BITS_PER_LONG) { + result +=3D ctpopl(bitmap[k] & BITMAP_LAST_WORD_MASK(nbits)); + } + + return result; +} --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229694991464.7959255922183; Fri, 28 Jul 2017 01:14:54 -0700 (PDT) Received: from localhost ([::1]:46685 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Q9-0000tg-Gx for importer@patchew.org; Fri, 28 Jul 2017 04:14:53 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46701) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Ik-0000fs-Bo for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:16 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Ij-000123-LW for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:14 -0400 Received: from mx1.redhat.com ([209.132.183.28]:45381) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Ij-00011V-FD for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:13 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 6CCF1C107C7E; Fri, 28 Jul 2017 08:07:12 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 35FA1600C2; Fri, 28 Jul 2017 08:07:06 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 6CCF1C107C7E Authentication-Results: ext-mx07.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx07.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:15 +0800 Message-Id: <1501229198-30588-7-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.31]); Fri, 28 Jul 2017 08:07:12 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 06/29] migration: dump str in migrate_set_state trace X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Strings are more readable for debugging. Signed-off-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert --- migration/migration.c | 3 ++- migration/trace-events | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 6803187..bdc4445 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -914,8 +914,9 @@ void qmp_migrate_start_postcopy(Error **errp) =20 void migrate_set_state(int *state, int old_state, int new_state) { + assert(new_state < MIGRATION_STATUS__MAX); if (atomic_cmpxchg(state, old_state, new_state) =3D=3D old_state) { - trace_migrate_set_state(new_state); + trace_migrate_set_state(MigrationStatus_lookup[new_state]); migrate_generate_event(new_state); } } diff --git a/migration/trace-events b/migration/trace-events index cb2c4b5..08d00fa 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -80,7 +80,7 @@ ram_save_queue_pages(const char *rbname, size_t start, si= ze_t len) "%s: start: % # migration/migration.c await_return_path_close_on_source_close(void) "" await_return_path_close_on_source_joining(void) "" -migrate_set_state(int new_state) "new state %d" +migrate_set_state(const char *new_state) "new state %s" migrate_fd_cleanup(void) "" migrate_fd_error(const char *error_desc) "error=3D%s" migrate_fd_cancel(void) "" --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229844694755.9351431599426; Fri, 28 Jul 2017 01:17:24 -0700 (PDT) Received: from localhost ([::1]:46698 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0SY-00033h-Gk for importer@patchew.org; Fri, 28 Jul 2017 04:17:22 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46745) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0In-0000i2-MU for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:22 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Im-000141-JA for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:17 -0400 Received: from mx1.redhat.com ([209.132.183.28]:45520) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Im-00013G-AL for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:16 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 49620234A7D; Fri, 28 Jul 2017 08:07:15 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id D8958600C2; Fri, 28 Jul 2017 08:07:12 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 49620234A7D Authentication-Results: ext-mx09.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx09.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:16 +0800 Message-Id: <1501229198-30588-8-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.38]); Fri, 28 Jul 2017 08:07:15 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 07/29] migration: better error handling with QEMUFile X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" If the postcopy down due to some reason, we can always see this on dst: qemu-system-x86_64: RP: Received invalid message 0x0000 length 0x0000 However in most cases that's not the real issue. The problem is that qemu_get_be16() has no way to show whether the returned data is valid or not, and we are _always_ assuming it is valid. That's possibly not wise. The best approach to solve this would be: refactoring QEMUFile interface to allow the APIs to return error if there is. However it needs quite a bit of work and testing. For now, let's explicitly check the validity first before using the data in all places for qemu_get_*(). This patch tries to fix most of the cases I can see. Only if we are with this, can we make sure we are processing the valid data, and also can we make sure we can capture the channel down events correctly. Signed-off-by: Peter Xu --- migration/migration.c | 5 +++++ migration/ram.c | 22 ++++++++++++++++++---- migration/savevm.c | 29 +++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index bdc4445..5b2602e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1543,6 +1543,11 @@ static void *source_return_path_thread(void *opaque) header_type =3D qemu_get_be16(rp); header_len =3D qemu_get_be16(rp); =20 + if (qemu_file_get_error(rp)) { + mark_source_rp_bad(ms); + goto out; + } + if (header_type >=3D MIG_RP_MSG_MAX || header_type =3D=3D MIG_RP_MSG_INVALID) { error_report("RP: Received invalid message 0x%04x length 0x%04= x", diff --git a/migration/ram.c b/migration/ram.c index c12358d..7f4cb0f 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2416,7 +2416,7 @@ static int ram_load_postcopy(QEMUFile *f) void *last_host =3D NULL; bool all_zero =3D false; =20 - while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { + while (!(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr; void *host =3D NULL; void *page_buffer =3D NULL; @@ -2425,6 +2425,16 @@ static int ram_load_postcopy(QEMUFile *f) uint8_t ch; =20 addr =3D qemu_get_be64(f); + + /* + * If qemu file error, we should stop here, and then "addr" + * may be invalid + */ + if (qemu_file_get_error(f)) { + ret =3D qemu_file_get_error(f); + break; + } + flags =3D addr & ~TARGET_PAGE_MASK; addr &=3D TARGET_PAGE_MASK; =20 @@ -2505,6 +2515,13 @@ static int ram_load_postcopy(QEMUFile *f) error_report("Unknown combination of migration flags: %#x" " (postcopy mode)", flags); ret =3D -EINVAL; + break; + } + + /* Detect for any possible file errors */ + if (qemu_file_get_error(f)) { + ret =3D qemu_file_get_error(f); + break; } =20 if (place_needed) { @@ -2519,9 +2536,6 @@ static int ram_load_postcopy(QEMUFile *f) place_source, block); } } - if (!ret) { - ret =3D qemu_file_get_error(f); - } } =20 return ret; diff --git a/migration/savevm.c b/migration/savevm.c index fdd15fa..13ae9d6 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1720,6 +1720,11 @@ static int loadvm_process_command(QEMUFile *f) cmd =3D qemu_get_be16(f); len =3D qemu_get_be16(f); =20 + /* Check validity before continue processing of cmds */ + if (qemu_file_get_error(f)) { + return qemu_file_get_error(f); + } + trace_loadvm_process_command(cmd, len); if (cmd >=3D MIG_CMD_MAX || cmd =3D=3D MIG_CMD_INVALID) { error_report("MIG_CMD 0x%x unknown (len 0x%x)", cmd, len); @@ -1855,6 +1860,11 @@ qemu_loadvm_section_start_full(QEMUFile *f, Migratio= nIncomingState *mis) return -EINVAL; } =20 + /* Check validity before load the vmstate */ + if (qemu_file_get_error(f)) { + return qemu_file_get_error(f); + } + ret =3D vmstate_load(f, se); if (ret < 0) { error_report("error while loading state for instance 0x%x of" @@ -1888,6 +1898,11 @@ qemu_loadvm_section_part_end(QEMUFile *f, MigrationI= ncomingState *mis) return -EINVAL; } =20 + /* Check validity before load the vmstate */ + if (qemu_file_get_error(f)) { + return qemu_file_get_error(f); + } + ret =3D vmstate_load(f, se); if (ret < 0) { error_report("error while loading state section id %d(%s)", @@ -1944,8 +1959,14 @@ static int qemu_loadvm_state_main(QEMUFile *f, Migra= tionIncomingState *mis) uint8_t section_type; int ret =3D 0; =20 - while ((section_type =3D qemu_get_byte(f)) !=3D QEMU_VM_EOF) { - ret =3D 0; + while (true) { + section_type =3D qemu_get_byte(f); + + if (qemu_file_get_error(f)) { + ret =3D qemu_file_get_error(f); + break; + } + trace_qemu_loadvm_state_section(section_type); switch (section_type) { case QEMU_VM_SECTION_START: @@ -1969,6 +1990,10 @@ static int qemu_loadvm_state_main(QEMUFile *f, Migra= tionIncomingState *mis) goto out; } break; + case QEMU_VM_EOF: + /* This is the end of migration */ + goto out; + break; default: error_report("Unknown savevm section type %d", section_type); ret =3D -EINVAL; --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229621309999.3021492212404; Fri, 28 Jul 2017 01:13:41 -0700 (PDT) Received: from localhost ([::1]:46680 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Oy-00071t-1V for importer@patchew.org; Fri, 28 Jul 2017 04:13:40 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46797) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Iq-0000kf-Vo for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:22 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Ip-00015k-73 for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:21 -0400 Received: from mx1.redhat.com ([209.132.183.28]:51472) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Io-000159-VS for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:19 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id E38D6ADA39; Fri, 28 Jul 2017 08:07:17 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id B6C1C600C2; Fri, 28 Jul 2017 08:07:15 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com E38D6ADA39 Authentication-Results: ext-mx04.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx04.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:17 +0800 Message-Id: <1501229198-30588-9-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.28]); Fri, 28 Jul 2017 08:07:18 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 08/29] migration: reuse mis->userfault_quit_fd X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" It was only used for quitting the page fault thread before. Let it be something more useful - now we can use it to notify a "wake" for the page fault thread (for any reason), and it only means "quit" if the fault_thread_quit is set. Since we changed what it does, renaming it to userfault_event_fd. Signed-off-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert --- migration/migration.h | 6 ++++-- migration/postcopy-ram.c | 24 ++++++++++++++++-------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/migration/migration.h b/migration/migration.h index 148c9fa..70e3094 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -35,6 +35,8 @@ struct MigrationIncomingState { bool have_fault_thread; QemuThread fault_thread; QemuSemaphore fault_thread_sem; + /* Set this when we want the fault thread to quit */ + bool fault_thread_quit; =20 bool have_listen_thread; QemuThread listen_thread; @@ -42,8 +44,8 @@ struct MigrationIncomingState { =20 /* For the kernel to send us notifications */ int userfault_fd; - /* To tell the fault_thread to quit */ - int userfault_quit_fd; + /* To notify the fault_thread to wake, e.g., when need to quit */ + int userfault_event_fd; QEMUFile *to_src_file; QemuMutex rp_mutex; /* We send replies from multiple threads */ void *postcopy_tmp_page; diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 952b73a..4278fe7 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -305,7 +305,8 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingStat= e *mis) * currently be at 0, we're going to increment it to 1 */ tmp64 =3D 1; - if (write(mis->userfault_quit_fd, &tmp64, 8) =3D=3D 8) { + atomic_set(&mis->fault_thread_quit, 1); + if (write(mis->userfault_event_fd, &tmp64, 8) =3D=3D 8) { trace_postcopy_ram_incoming_cleanup_join(); qemu_thread_join(&mis->fault_thread); } else { @@ -315,7 +316,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingStat= e *mis) } trace_postcopy_ram_incoming_cleanup_closeuf(); close(mis->userfault_fd); - close(mis->userfault_quit_fd); + close(mis->userfault_event_fd); mis->have_fault_thread =3D false; } =20 @@ -438,7 +439,7 @@ static void *postcopy_ram_fault_thread(void *opaque) pfd[0].fd =3D mis->userfault_fd; pfd[0].events =3D POLLIN; pfd[0].revents =3D 0; - pfd[1].fd =3D mis->userfault_quit_fd; + pfd[1].fd =3D mis->userfault_event_fd; pfd[1].events =3D POLLIN; /* Waiting for eventfd to go positive */ pfd[1].revents =3D 0; =20 @@ -448,8 +449,15 @@ static void *postcopy_ram_fault_thread(void *opaque) } =20 if (pfd[1].revents) { - trace_postcopy_ram_fault_thread_quit(); - break; + uint64_t tmp64 =3D 0; + + /* Consume the signal */ + read(mis->userfault_event_fd, &tmp64, 8); + + if (atomic_read(&mis->fault_thread_quit)) { + trace_postcopy_ram_fault_thread_quit(); + break; + } } =20 ret =3D read(mis->userfault_fd, &msg, sizeof(msg)); @@ -528,9 +536,9 @@ int postcopy_ram_enable_notify(MigrationIncomingState *= mis) } =20 /* Now an eventfd we use to tell the fault-thread to quit */ - mis->userfault_quit_fd =3D eventfd(0, EFD_CLOEXEC); - if (mis->userfault_quit_fd =3D=3D -1) { - error_report("%s: Opening userfault_quit_fd: %s", __func__, + mis->userfault_event_fd =3D eventfd(0, EFD_CLOEXEC); + if (mis->userfault_event_fd =3D=3D -1) { + error_report("%s: Opening userfault_event_fd: %s", __func__, strerror(errno)); close(mis->userfault_fd); return -1; --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229628813681.9804809066218; Fri, 28 Jul 2017 01:13:48 -0700 (PDT) Received: from localhost ([::1]:46681 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0P4-000779-8j for importer@patchew.org; Fri, 28 Jul 2017 04:13:46 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46834) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Iu-0000n3-05 for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:24 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0It-00018H-2p for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:24 -0400 Received: from mx1.redhat.com ([209.132.183.28]:32774) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Is-00017e-Nw for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:23 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 8859968695; Fri, 28 Jul 2017 08:07:21 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 5852E600C2; Fri, 28 Jul 2017 08:07:18 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 8859968695 Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:18 +0800 Message-Id: <1501229198-30588-10-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.27]); Fri, 28 Jul 2017 08:07:21 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 09/29] migration: provide postcopy_fault_thread_notify() X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" A general helper to notify the fault thread. Signed-off-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert --- migration/postcopy-ram.c | 35 ++++++++++++++++++++--------------- migration/postcopy-ram.h | 2 ++ 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 4278fe7..9ce391d 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -287,6 +287,21 @@ int postcopy_ram_incoming_init(MigrationIncomingState = *mis, size_t ram_pages) return 0; } =20 +void postcopy_fault_thread_notify(MigrationIncomingState *mis) +{ + uint64_t tmp64 =3D 1; + + /* + * Tell the fault_thread to exit, it's an eventfd that should + * currently be at 0, we're going to increment it to 1 + */ + if (write(mis->userfault_event_fd, &tmp64, 8) !=3D 8) { + /* Not much we can do here, but may as well report it */ + error_report("%s: incrementing userfault_quit_fd: %s", __func__, + strerror(errno)); + } +} + /* * At the end of a migration where postcopy_ram_incoming_init was called. */ @@ -295,25 +310,15 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingSt= ate *mis) trace_postcopy_ram_incoming_cleanup_entry(); =20 if (mis->have_fault_thread) { - uint64_t tmp64; - if (qemu_ram_foreach_block(cleanup_range, mis)) { return -1; } - /* - * Tell the fault_thread to exit, it's an eventfd that should - * currently be at 0, we're going to increment it to 1 - */ - tmp64 =3D 1; + /* Let the fault thread quit */ atomic_set(&mis->fault_thread_quit, 1); - if (write(mis->userfault_event_fd, &tmp64, 8) =3D=3D 8) { - trace_postcopy_ram_incoming_cleanup_join(); - qemu_thread_join(&mis->fault_thread); - } else { - /* Not much we can do here, but may as well report it */ - error_report("%s: incrementing userfault_quit_fd: %s", __func_= _, - strerror(errno)); - } + postcopy_fault_thread_notify(mis); + trace_postcopy_ram_incoming_cleanup_join(); + qemu_thread_join(&mis->fault_thread); + trace_postcopy_ram_incoming_cleanup_closeuf(); close(mis->userfault_fd); close(mis->userfault_event_fd); diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h index 78a3591..4a7644d 100644 --- a/migration/postcopy-ram.h +++ b/migration/postcopy-ram.h @@ -114,4 +114,6 @@ PostcopyState postcopy_state_get(void); /* Set the state and return the old state */ PostcopyState postcopy_state_set(PostcopyState new_state); =20 +void postcopy_fault_thread_notify(MigrationIncomingState *mis); + #endif --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 150122936775412.87010479223602; Fri, 28 Jul 2017 01:09:27 -0700 (PDT) Received: from localhost ([::1]:46660 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Kr-0002Co-3h for importer@patchew.org; Fri, 28 Jul 2017 04:09:25 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46882) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0J0-0000sh-76 for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:31 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Ix-0001AK-2W for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:30 -0400 Received: from mx1.redhat.com ([209.132.183.28]:51608) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Iw-0001A3-Sl for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:27 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id C9021974BB; Fri, 28 Jul 2017 08:07:25 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id EF844600C2; Fri, 28 Jul 2017 08:07:21 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com C9021974BB Authentication-Results: ext-mx04.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx04.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:19 +0800 Message-Id: <1501229198-30588-11-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.28]); Fri, 28 Jul 2017 08:07:25 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 10/29] migration: new property "x-postcopy-fast" X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" This provides a way to start postcopy ASAP when migration starts. To do this, we need both: -global migration.x-postcopy-ram=3Don \ -global migration.x-postcopy-fast=3Don Signed-off-by: Peter Xu --- migration/migration.c | 9 ++++++++- migration/migration.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 5b2602e..efee87e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1936,6 +1936,11 @@ bool migrate_colo_enabled(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; } =20 +static bool postcopy_should_start(MigrationState *s) +{ + return atomic_read(&s->start_postcopy) || s->start_postcopy_fast; +} + /* * Master migration thread on the source VM. * It drives the migration and pumps the data down the outgoing channel. @@ -2013,7 +2018,7 @@ static void *migration_thread(void *opaque) if (migrate_postcopy_ram() && s->state !=3D MIGRATION_STATUS_POSTCOPY_ACTIVE && pend_nonpost <=3D threshold_size && - atomic_read(&s->start_postcopy)) { + postcopy_should_start(s)) { =20 if (!postcopy_start(s, &old_vm_running)) { current_active_state =3D MIGRATION_STATUS_POSTCOPY= _ACTIVE; @@ -2170,6 +2175,8 @@ static Property migration_properties[] =3D { send_configuration, true), DEFINE_PROP_BOOL("send-section-footer", MigrationState, send_section_footer, true), + DEFINE_PROP_BOOL("x-postcopy-fast", MigrationState, + start_postcopy_fast, false), =20 /* Migration parameters */ DEFINE_PROP_INT64("x-compress-level", MigrationState, diff --git a/migration/migration.h b/migration/migration.h index 70e3094..e902bae 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -113,6 +113,8 @@ struct MigrationState =20 /* Flag set once the migration has been asked to enter postcopy */ bool start_postcopy; + /* Set the flag if we want to start postcopy ASAP when migration start= s */ + bool start_postcopy_fast; /* Flag set after postcopy has sent the device state */ bool postcopy_after_devices; =20 --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 150122951579716.89731569895207; Fri, 28 Jul 2017 01:11:55 -0700 (PDT) Received: from localhost ([::1]:46673 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0NG-00057d-AD for importer@patchew.org; Fri, 28 Jul 2017 04:11:54 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46907) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0J0-0000tF-UH for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:32 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Iz-0001BV-OO for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:30 -0400 Received: from mx1.redhat.com ([209.132.183.28]:34426) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Iz-0001An-Fr for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:29 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 6BC1544833; Fri, 28 Jul 2017 08:07:28 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 40F1D5B80A; Fri, 28 Jul 2017 08:07:26 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 6BC1544833 Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:20 +0800 Message-Id: <1501229198-30588-12-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.30]); Fri, 28 Jul 2017 08:07:28 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 11/29] migration: new postcopy-pause state X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Introducing a new state "postcopy-paused", which can be used to pause a postcopy migration. It is targeted to support network failures during postcopy migration. Now when network down for postcopy, the source side will not fail the migration. Instead we convert the status into this new paused state, and we will try to wait for a rescue in the future. Signed-off-by: Peter Xu --- migration/migration.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++= +--- migration/migration.h | 3 ++ migration/trace-events | 1 + qapi-schema.json | 5 +++- 4 files changed, 82 insertions(+), 5 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index efee87e..0bc70c8 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -470,6 +470,7 @@ static bool migration_is_setup_or_active(int state) switch (state) { case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: + case MIGRATION_STATUS_POSTCOPY_PAUSED: case MIGRATION_STATUS_SETUP: return true; =20 @@ -545,6 +546,7 @@ MigrationInfo *qmp_query_migrate(Error **errp) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_CANCELLING: case MIGRATION_STATUS_POSTCOPY_ACTIVE: + case MIGRATION_STATUS_POSTCOPY_PAUSED: /* TODO add some postcopy stats */ info->has_status =3D true; info->has_total_time =3D true; @@ -991,6 +993,8 @@ static void migrate_fd_cleanup(void *opaque) =20 notifier_list_notify(&migration_state_notifiers, s); block_cleanup_parameters(s); + + qemu_sem_destroy(&s->postcopy_pause_sem); } =20 void migrate_fd_error(MigrationState *s, const Error *error) @@ -1134,6 +1138,7 @@ MigrationState *migrate_init(void) s->migration_thread_running =3D false; error_free(s->error); s->error =3D NULL; + qemu_sem_init(&s->postcopy_pause_sem, 0); =20 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_S= ETUP); =20 @@ -1942,6 +1947,69 @@ static bool postcopy_should_start(MigrationState *s) } =20 /* + * We don't return until we are in a safe state to continue current + * postcopy migration. Returns true to continue the migration, or + * false to terminate current migration. + */ +static bool postcopy_pause(MigrationState *s) +{ + assert(s->state =3D=3D MIGRATION_STATUS_POSTCOPY_ACTIVE); + migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, + MIGRATION_STATUS_POSTCOPY_PAUSED); + + /* Current channel is possibly broken. Release it. */ + assert(s->to_dst_file); + qemu_file_shutdown(s->to_dst_file); + qemu_fclose(s->to_dst_file); + s->to_dst_file =3D NULL; + + /* + * We wait until things fixed up. Then someone will setup the + * status back for us. + */ + while (s->state =3D=3D MIGRATION_STATUS_POSTCOPY_PAUSED) { + qemu_sem_wait(&s->postcopy_pause_sem); + } + + trace_postcopy_pause_continued(); + + return true; +} + +/* Return true if we want to stop the migration, otherwise false. */ +static bool migration_detect_error(MigrationState *s) +{ + int ret; + + /* Try to detect any file errors */ + ret =3D qemu_file_get_error(s->to_dst_file); + + if (!ret) { + /* Everything is fine */ + return false; + } + + if (s->state =3D=3D MIGRATION_STATUS_POSTCOPY_ACTIVE && ret =3D=3D -EI= O) { + /* + * For postcopy, we allow the network to be down for a + * while. After that, it can be continued by a + * recovery phase. + */ + return !postcopy_pause(s); + } else { + /* + * For precopy (or postcopy with error outside IO), we fail + * with no time. + */ + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); + trace_migration_thread_file_err(); + + /* Time to stop the migration, now. */ + return true; + } +} + +/* * Master migration thread on the source VM. * It drives the migration and pumps the data down the outgoing channel. */ @@ -2037,12 +2105,14 @@ static void *migration_thread(void *opaque) } } =20 - if (qemu_file_get_error(s->to_dst_file)) { - migrate_set_state(&s->state, current_active_state, - MIGRATION_STATUS_FAILED); - trace_migration_thread_file_err(); + /* + * Try to detect any kind of failures, and see whether we + * should stop the migration now. + */ + if (migration_detect_error(s)) { break; } + current_time =3D qemu_clock_get_ms(QEMU_CLOCK_REALTIME); if (current_time >=3D initial_time + BUFFER_DELAY) { uint64_t transferred_bytes =3D qemu_ftell(s->to_dst_file) - diff --git a/migration/migration.h b/migration/migration.h index e902bae..24cdaf6 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -151,6 +151,9 @@ struct MigrationState bool send_configuration; /* Whether we send section footer during migration */ bool send_section_footer; + + /* Needed by postcopy-pause state */ + QemuSemaphore postcopy_pause_sem; }; =20 void migrate_set_state(int *state, int old_state, int new_state); diff --git a/migration/trace-events b/migration/trace-events index 08d00fa..2211acc 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -98,6 +98,7 @@ migration_thread_setup_complete(void) "" open_return_path_on_source(void) "" open_return_path_on_source_continue(void) "" postcopy_start(void) "" +postcopy_pause_continued(void) "" postcopy_start_set_run(void) "" source_return_path_thread_bad_end(void) "" source_return_path_thread_end(void) "" diff --git a/qapi-schema.json b/qapi-schema.json index 9c6c3e1..2a36b80 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -667,6 +667,8 @@ # # @postcopy-active: like active, but now in postcopy mode. (since 2.5) # +# @postcopy-paused: during postcopy but paused. (since 2.10) +# # @completed: migration is finished. # # @failed: some error occurred during migration process. @@ -679,7 +681,8 @@ ## { 'enum': 'MigrationStatus', 'data': [ 'none', 'setup', 'cancelling', 'cancelled', - 'active', 'postcopy-active', 'completed', 'failed', 'colo' ] } + 'active', 'postcopy-active', 'postcopy-paused', + 'completed', 'failed', 'colo' ] } =20 ## # @MigrationInfo: --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229766680104.62570477228735; Fri, 28 Jul 2017 01:16:06 -0700 (PDT) Received: from localhost ([::1]:46694 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0RJ-00020q-6h for importer@patchew.org; Fri, 28 Jul 2017 04:16:05 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46942) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0J4-0000wR-9Y for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:35 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0J3-0001ET-GU for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:34 -0400 Received: from mx1.redhat.com ([209.132.183.28]:34514) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0J3-0001Ds-8P for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:33 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 3C32144BE4; Fri, 28 Jul 2017 08:07:32 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id DA675600C2; Fri, 28 Jul 2017 08:07:28 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 3C32144BE4 Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:21 +0800 Message-Id: <1501229198-30588-13-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.30]); Fri, 28 Jul 2017 08:07:32 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 12/29] migration: allow dst vm pause on postcopy X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" When there is IO error on the incoming channel (e.g., network down), instead of bailing out immediately, we allow the dst vm to switch to the new POSTCOPY_PAUSE state. Currently it is still simple - it waits the new semaphore, until someone poke it for another attempt. Signed-off-by: Peter Xu --- migration/migration.c | 1 + migration/migration.h | 3 +++ migration/savevm.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ migration/trace-events | 2 ++ 4 files changed, 51 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index 0bc70c8..c729c5a 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -146,6 +146,7 @@ MigrationIncomingState *migration_incoming_get_current(= void) memset(&mis_current, 0, sizeof(MigrationIncomingState)); qemu_mutex_init(&mis_current.rp_mutex); qemu_event_init(&mis_current.main_thread_load_event, false); + qemu_sem_init(&mis_current.postcopy_pause_sem_dst, 0); once =3D true; } return &mis_current; diff --git a/migration/migration.h b/migration/migration.h index 24cdaf6..08b90e8 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -60,6 +60,9 @@ struct MigrationIncomingState { /* The coroutine we should enter (back) after failover */ Coroutine *migration_incoming_co; QemuSemaphore colo_incoming_sem; + + /* notify PAUSED postcopy incoming migrations to try to continue */ + QemuSemaphore postcopy_pause_sem_dst; }; =20 MigrationIncomingState *migration_incoming_get_current(void); diff --git a/migration/savevm.c b/migration/savevm.c index 13ae9d6..1f62268 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1954,11 +1954,41 @@ void qemu_loadvm_state_cleanup(void) } } =20 +/* Return true if we should continue the migration, or false. */ +static bool postcopy_pause_incoming(MigrationIncomingState *mis) +{ + trace_postcopy_pause_incoming(); + + migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, + MIGRATION_STATUS_POSTCOPY_PAUSED); + + assert(mis->from_src_file); + qemu_file_shutdown(mis->from_src_file); + qemu_fclose(mis->from_src_file); + mis->from_src_file =3D NULL; + + assert(mis->to_src_file); + qemu_mutex_lock(&mis->rp_mutex); + qemu_file_shutdown(mis->to_src_file); + qemu_fclose(mis->to_src_file); + mis->to_src_file =3D NULL; + qemu_mutex_unlock(&mis->rp_mutex); + + while (mis->state =3D=3D MIGRATION_STATUS_POSTCOPY_PAUSED) { + qemu_sem_wait(&mis->postcopy_pause_sem_dst); + } + + trace_postcopy_pause_incoming_continued(); + + return true; +} + static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis) { uint8_t section_type; int ret =3D 0; =20 +retry: while (true) { section_type =3D qemu_get_byte(f); =20 @@ -2004,6 +2034,21 @@ static int qemu_loadvm_state_main(QEMUFile *f, Migra= tionIncomingState *mis) out: if (ret < 0) { qemu_file_set_error(f, ret); + + /* + * Detect whether it is: + * + * 1. postcopy running + * 2. network failure (-EIO) + * + * If so, we try to wait for a recovery. + */ + if (mis->state =3D=3D MIGRATION_STATUS_POSTCOPY_ACTIVE && + ret =3D=3D -EIO && postcopy_pause_incoming(mis)) { + /* Reset f to point to the newly created channel */ + f =3D mis->from_src_file; + goto retry; + } } return ret; } diff --git a/migration/trace-events b/migration/trace-events index 2211acc..22a629e 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -99,6 +99,8 @@ open_return_path_on_source(void) "" open_return_path_on_source_continue(void) "" postcopy_start(void) "" postcopy_pause_continued(void) "" +postcopy_pause_incoming(void) "" +postcopy_pause_incoming_continued(void) "" postcopy_start_set_run(void) "" source_return_path_thread_bad_end(void) "" source_return_path_thread_end(void) "" --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501230010861918.8630988291316; Fri, 28 Jul 2017 01:20:10 -0700 (PDT) Received: from localhost ([::1]:46708 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0VF-0006UM-Br for importer@patchew.org; Fri, 28 Jul 2017 04:20:09 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47055) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0JB-000131-2c for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:42 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0J7-0001HY-UM for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:41 -0400 Received: from mx1.redhat.com ([209.132.183.28]:45984) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0J7-0001H7-MA for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:37 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id ABF7623C475; Fri, 28 Jul 2017 08:07:36 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id A90DC600C2; Fri, 28 Jul 2017 08:07:32 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com ABF7623C475 Authentication-Results: ext-mx09.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx09.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:22 +0800 Message-Id: <1501229198-30588-14-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.38]); Fri, 28 Jul 2017 08:07:36 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 13/29] migration: allow src return path to pause X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Let the thread pause for network issues. Signed-off-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert --- migration/migration.c | 35 +++++++++++++++++++++++++++++++++-- migration/migration.h | 1 + migration/trace-events | 2 ++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index c729c5a..d0b9a86 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -996,6 +996,7 @@ static void migrate_fd_cleanup(void *opaque) block_cleanup_parameters(s); =20 qemu_sem_destroy(&s->postcopy_pause_sem); + qemu_sem_destroy(&s->postcopy_pause_rp_sem); } =20 void migrate_fd_error(MigrationState *s, const Error *error) @@ -1140,6 +1141,7 @@ MigrationState *migrate_init(void) error_free(s->error); s->error =3D NULL; qemu_sem_init(&s->postcopy_pause_sem, 0); + qemu_sem_init(&s->postcopy_pause_rp_sem, 0); =20 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_S= ETUP); =20 @@ -1527,6 +1529,18 @@ static void migrate_handle_rp_req_pages(MigrationSta= te *ms, const char* rbname, } } =20 +/* Return true to retry, false to quit */ +static bool postcopy_pause_return_path_thread(MigrationState *s) +{ + trace_postcopy_pause_return_path(); + + qemu_sem_wait(&s->postcopy_pause_rp_sem); + + trace_postcopy_pause_return_path_continued(); + + return true; +} + /* * Handles messages sent on the return path towards the source VM * @@ -1543,6 +1557,8 @@ static void *source_return_path_thread(void *opaque) int res; =20 trace_source_return_path_thread_entry(); + +retry: while (!ms->rp_state.error && !qemu_file_get_error(rp) && migration_is_setup_or_active(ms->state)) { trace_source_return_path_thread_loop_top(); @@ -1634,13 +1650,28 @@ static void *source_return_path_thread(void *opaque) break; } } - if (qemu_file_get_error(rp)) { + +out: + res =3D qemu_file_get_error(rp); + if (res) { + if (res =3D=3D -EIO) { + /* + * Maybe there is something we can do: it looks like a + * network down issue, and we pause for a recovery. + */ + if (postcopy_pause_return_path_thread(ms)) { + /* Reload rp, reset the rest */ + rp =3D ms->rp_state.from_dst_file; + ms->rp_state.error =3D false; + goto retry; + } + } + trace_source_return_path_thread_bad_end(); mark_source_rp_bad(ms); } =20 trace_source_return_path_thread_end(); -out: ms->rp_state.from_dst_file =3D NULL; qemu_fclose(rp); return NULL; diff --git a/migration/migration.h b/migration/migration.h index 08b90e8..7aaab13 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -157,6 +157,7 @@ struct MigrationState =20 /* Needed by postcopy-pause state */ QemuSemaphore postcopy_pause_sem; + QemuSemaphore postcopy_pause_rp_sem; }; =20 void migrate_set_state(int *state, int old_state, int new_state); diff --git a/migration/trace-events b/migration/trace-events index 22a629e..a269eec 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -98,6 +98,8 @@ migration_thread_setup_complete(void) "" open_return_path_on_source(void) "" open_return_path_on_source_continue(void) "" postcopy_start(void) "" +postcopy_pause_return_path(void) "" +postcopy_pause_return_path_continued(void) "" postcopy_pause_continued(void) "" postcopy_pause_incoming(void) "" postcopy_pause_incoming_continued(void) "" --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229772770851.6085727047144; Fri, 28 Jul 2017 01:16:12 -0700 (PDT) Received: from localhost ([::1]:46695 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0RP-00025n-7v for importer@patchew.org; Fri, 28 Jul 2017 04:16:11 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47075) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0JD-00015X-Lj for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:45 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0JC-0001Km-Kf for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:43 -0400 Received: from mx1.redhat.com ([209.132.183.28]:39870) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0JC-0001KF-Ak for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:42 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 35FE5628B3; Fri, 28 Jul 2017 08:07:41 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 2020B600C2; Fri, 28 Jul 2017 08:07:36 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 35FE5628B3 Authentication-Results: ext-mx01.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx01.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:23 +0800 Message-Id: <1501229198-30588-15-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.25]); Fri, 28 Jul 2017 08:07:41 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 14/29] migration: allow send_rq to fail X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" We will not allow failures to happen when sending data from destination to source via the return path. However it is possible that there can be errors along the way. This patch allows the migrate_send_rp_message() to return error when it happens, and further extended it to migrate_send_rp_req_pages(). Signed-off-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert --- migration/migration.c | 38 ++++++++++++++++++++++++++++++-------- migration/migration.h | 2 +- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index d0b9a86..9a0b5b0 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -195,17 +195,35 @@ static void deferred_incoming_migration(Error **errp) * Send a message on the return channel back to the source * of the migration. */ -static void migrate_send_rp_message(MigrationIncomingState *mis, - enum mig_rp_message_type message_type, - uint16_t len, void *data) +static int migrate_send_rp_message(MigrationIncomingState *mis, + enum mig_rp_message_type message_type, + uint16_t len, void *data) { + int ret =3D 0; + trace_migrate_send_rp_message((int)message_type, len); qemu_mutex_lock(&mis->rp_mutex); + + /* + * It's possible that the file handle got lost due to network + * failures. + */ + if (!mis->to_src_file) { + ret =3D -EIO; + goto error; + } + qemu_put_be16(mis->to_src_file, (unsigned int)message_type); qemu_put_be16(mis->to_src_file, len); qemu_put_buffer(mis->to_src_file, data, len); qemu_fflush(mis->to_src_file); + + /* It's possible that qemu file got error during sending */ + ret =3D qemu_file_get_error(mis->to_src_file); + +error: qemu_mutex_unlock(&mis->rp_mutex); + return ret; } =20 /* Request a range of pages from the source VM at the given @@ -215,26 +233,30 @@ static void migrate_send_rp_message(MigrationIncoming= State *mis, * Start: Address offset within the RB * Len: Length in bytes required - must be a multiple of pagesize */ -void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rb= name, - ram_addr_t start, size_t len) +int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbn= ame, + ram_addr_t start, size_t len) { uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ size_t msglen =3D 12; /* start + len */ + int rbname_len; + enum mig_rp_message_type msg_type; =20 *(uint64_t *)bufc =3D cpu_to_be64((uint64_t)start); *(uint32_t *)(bufc + 8) =3D cpu_to_be32((uint32_t)len); =20 if (rbname) { - int rbname_len =3D strlen(rbname); + rbname_len =3D strlen(rbname); assert(rbname_len < 256); =20 bufc[msglen++] =3D rbname_len; memcpy(bufc + msglen, rbname, rbname_len); msglen +=3D rbname_len; - migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES_ID, msglen, bufc= ); + msg_type =3D MIG_RP_MSG_REQ_PAGES_ID; } else { - migrate_send_rp_message(mis, MIG_RP_MSG_REQ_PAGES, msglen, bufc); + msg_type =3D MIG_RP_MSG_REQ_PAGES; } + + return migrate_send_rp_message(mis, msg_type, msglen, bufc); } =20 void qemu_start_incoming_migration(const char *uri, Error **errp) diff --git a/migration/migration.h b/migration/migration.h index 7aaab13..047872b 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -201,7 +201,7 @@ void migrate_send_rp_shut(MigrationIncomingState *mis, uint32_t value); void migrate_send_rp_pong(MigrationIncomingState *mis, uint32_t value); -void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rb= name, +int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbn= ame, ram_addr_t start, size_t len); =20 #endif --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229928607988.4446973765133; Fri, 28 Jul 2017 01:18:48 -0700 (PDT) Received: from localhost ([::1]:46703 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Tv-00046B-67 for importer@patchew.org; Fri, 28 Jul 2017 04:18:47 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47110) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0JK-00018L-3y for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:51 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0JG-0001Mf-TR for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:50 -0400 Received: from mx1.redhat.com ([209.132.183.28]:52160) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0JG-0001MJ-Kd for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:46 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 952694F085; Fri, 28 Jul 2017 08:07:45 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id A227F5B80A; Fri, 28 Jul 2017 08:07:41 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 952694F085 Authentication-Results: ext-mx04.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx04.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:24 +0800 Message-Id: <1501229198-30588-16-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.28]); Fri, 28 Jul 2017 08:07:45 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 15/29] migration: allow fault thread to pause X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Allows the fault thread to stop handling page faults temporarily. When network failure happened (and if we expect a recovery afterwards), we should not allow the fault thread to continue sending things to source, instead, it should halt for a while until the connection is rebuilt. When the dest main thread noticed the failure, it kicks the fault thread to switch to pause state. Signed-off-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert --- migration/migration.c | 1 + migration/migration.h | 1 + migration/postcopy-ram.c | 50 ++++++++++++++++++++++++++++++++++++++++++++= ---- migration/savevm.c | 3 +++ migration/trace-events | 2 ++ 5 files changed, 53 insertions(+), 4 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 9a0b5b0..9d93836 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -147,6 +147,7 @@ MigrationIncomingState *migration_incoming_get_current(= void) qemu_mutex_init(&mis_current.rp_mutex); qemu_event_init(&mis_current.main_thread_load_event, false); qemu_sem_init(&mis_current.postcopy_pause_sem_dst, 0); + qemu_sem_init(&mis_current.postcopy_pause_sem_fault, 0); once =3D true; } return &mis_current; diff --git a/migration/migration.h b/migration/migration.h index 047872b..574fedd 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -63,6 +63,7 @@ struct MigrationIncomingState { =20 /* notify PAUSED postcopy incoming migrations to try to continue */ QemuSemaphore postcopy_pause_sem_dst; + QemuSemaphore postcopy_pause_sem_fault; }; =20 MigrationIncomingState *migration_incoming_get_current(void); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 9ce391d..ba53155 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -418,6 +418,17 @@ static int ram_block_enable_notify(const char *block_n= ame, void *host_addr, return 0; } =20 +static bool postcopy_pause_fault_thread(MigrationIncomingState *mis) +{ + trace_postcopy_pause_fault_thread(); + + qemu_sem_wait(&mis->postcopy_pause_sem_fault); + + trace_postcopy_pause_fault_thread_continued(); + + return true; +} + /* * Handle faults detected by the USERFAULT markings */ @@ -465,6 +476,22 @@ static void *postcopy_ram_fault_thread(void *opaque) } } =20 + if (!mis->to_src_file) { + /* + * Possibly someone tells us that the return path is + * broken already using the event. We should hold until + * the channel is rebuilt. + */ + if (postcopy_pause_fault_thread(mis)) { + last_rb =3D NULL; + /* Continue to read the userfaultfd */ + } else { + error_report("%s: paused but don't allow to continue", + __func__); + break; + } + } + ret =3D read(mis->userfault_fd, &msg, sizeof(msg)); if (ret !=3D sizeof(msg)) { if (errno =3D=3D EAGAIN) { @@ -504,18 +531,33 @@ static void *postcopy_ram_fault_thread(void *opaque) qemu_ram_get_idstr(rb), rb_offset); =20 +retry: /* * Send the request to the source - we want to request one * of our host page sizes (which is >=3D TPS) */ if (rb !=3D last_rb) { last_rb =3D rb; - migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), - rb_offset, qemu_ram_pagesize(rb)); + ret =3D migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), + rb_offset, qemu_ram_pagesize(r= b)); } else { /* Save some space */ - migrate_send_rp_req_pages(mis, NULL, - rb_offset, qemu_ram_pagesize(rb)); + ret =3D migrate_send_rp_req_pages(mis, NULL, + rb_offset, qemu_ram_pagesize(r= b)); + } + + if (ret) { + /* May be network failure, try to wait for recovery */ + if (ret =3D=3D -EIO && postcopy_pause_fault_thread(mis)) { + /* We got reconnected somehow, try to continue */ + last_rb =3D NULL; + goto retry; + } else { + /* This is a unavoidable fault */ + error_report("%s: migrate_send_rp_req_pages() get %d", + __func__, ret); + break; + } } } trace_postcopy_ram_fault_thread_exit(); diff --git a/migration/savevm.c b/migration/savevm.c index 1f62268..386788d 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1974,6 +1974,9 @@ static bool postcopy_pause_incoming(MigrationIncoming= State *mis) mis->to_src_file =3D NULL; qemu_mutex_unlock(&mis->rp_mutex); =20 + /* Notify the fault thread for the invalidated file handle */ + postcopy_fault_thread_notify(mis); + while (mis->state =3D=3D MIGRATION_STATUS_POSTCOPY_PAUSED) { qemu_sem_wait(&mis->postcopy_pause_sem_dst); } diff --git a/migration/trace-events b/migration/trace-events index a269eec..dbb4971 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -100,6 +100,8 @@ open_return_path_on_source_continue(void) "" postcopy_start(void) "" postcopy_pause_return_path(void) "" postcopy_pause_return_path_continued(void) "" +postcopy_pause_fault_thread(void) "" +postcopy_pause_fault_thread_continued(void) "" postcopy_pause_continued(void) "" postcopy_pause_incoming(void) "" postcopy_pause_incoming_continued(void) "" --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501230092847286.6999477367773; Fri, 28 Jul 2017 01:21:32 -0700 (PDT) Received: from localhost ([::1]:46723 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0WZ-0007cj-AW for importer@patchew.org; Fri, 28 Jul 2017 04:21:31 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47134) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0JL-0001A2-RP for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:52 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0JK-0001OF-Ql for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:51 -0400 Received: from mx1.redhat.com ([209.132.183.28]:56962) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0JK-0001Nt-HJ for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:50 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 7030BF8019; Fri, 28 Jul 2017 08:07:49 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 0A3F8600C2; Fri, 28 Jul 2017 08:07:45 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 7030BF8019 Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:25 +0800 Message-Id: <1501229198-30588-17-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.39]); Fri, 28 Jul 2017 08:07:49 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 16/29] qmp: hmp: add migrate "resume" option X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" It will be used when we want to resume one paused migration. Signed-off-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert --- hmp-commands.hx | 7 ++++--- hmp.c | 4 +++- migration/migration.c | 2 +- qapi-schema.json | 5 ++++- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index 1941e19..7adb029 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -928,13 +928,14 @@ ETEXI =20 { .name =3D "migrate", - .args_type =3D "detach:-d,blk:-b,inc:-i,uri:s", - .params =3D "[-d] [-b] [-i] uri", + .args_type =3D "detach:-d,blk:-b,inc:-i,resume:-r,uri:s", + .params =3D "[-d] [-b] [-i] [-r] uri", .help =3D "migrate to URI (using -d to not wait for completi= on)" "\n\t\t\t -b for migration without shared storage with" " full copy of disk\n\t\t\t -i for migration without " "shared storage with incremental copy of disk " - "(base image shared between src and destination)", + "(base image shared between src and destination)" + "\n\t\t\t -r to resume a paused migration", .cmd =3D hmp_migrate, }, =20 diff --git a/hmp.c b/hmp.c index fd80dce..ebc1563 100644 --- a/hmp.c +++ b/hmp.c @@ -1891,10 +1891,12 @@ void hmp_migrate(Monitor *mon, const QDict *qdict) bool detach =3D qdict_get_try_bool(qdict, "detach", false); bool blk =3D qdict_get_try_bool(qdict, "blk", false); bool inc =3D qdict_get_try_bool(qdict, "inc", false); + bool resume =3D qdict_get_try_bool(qdict, "resume", false); const char *uri =3D qdict_get_str(qdict, "uri"); Error *err =3D NULL; =20 - qmp_migrate(uri, !!blk, blk, !!inc, inc, false, false, &err); + qmp_migrate(uri, !!blk, blk, !!inc, inc, + false, false, true, resume, &err); if (err) { error_report_err(err); return; diff --git a/migration/migration.c b/migration/migration.c index 9d93836..36ff8c3 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1238,7 +1238,7 @@ bool migration_is_blocked(Error **errp) =20 void qmp_migrate(const char *uri, bool has_blk, bool blk, bool has_inc, bool inc, bool has_detach, bool detach, - Error **errp) + bool has_resume, bool resume, Error **errp) { Error *local_err =3D NULL; MigrationState *s =3D migrate_get_current(); diff --git a/qapi-schema.json b/qapi-schema.json index 2a36b80..27b7c4c 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3208,6 +3208,8 @@ # @detach: this argument exists only for compatibility reasons and # is ignored by QEMU # +# @resume: resume one paused migration +# # Returns: nothing on success # # Since: 0.14.0 @@ -3229,7 +3231,8 @@ # ## { 'command': 'migrate', - 'data': {'uri': 'str', '*blk': 'bool', '*inc': 'bool', '*detach': 'bool'= } } + 'data': {'uri': 'str', '*blk': 'bool', '*inc': 'bool', + '*detach': 'bool', '*resume': 'bool' } } =20 ## # @migrate-incoming: --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501230173057671.3196634370537; Fri, 28 Jul 2017 01:22:53 -0700 (PDT) Received: from localhost ([::1]:46729 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Xr-0000DV-LZ for importer@patchew.org; Fri, 28 Jul 2017 04:22:51 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47155) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0JO-0001Cw-Lq for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:55 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0JN-0001PK-Io for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:54 -0400 Received: from mx1.redhat.com ([209.132.183.28]:40036) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0JN-0001P1-8i for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:53 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 3A5E712394; Fri, 28 Jul 2017 08:07:52 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id D9000600C2; Fri, 28 Jul 2017 08:07:49 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 3A5E712394 Authentication-Results: ext-mx01.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx01.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:26 +0800 Message-Id: <1501229198-30588-18-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.25]); Fri, 28 Jul 2017 08:07:52 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 17/29] migration: rebuild channel on source X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" This patch detects the "resume" flag of migration command, rebuild the channels only if the flag is set. Signed-off-by: Peter Xu --- migration/migration.c | 52 ++++++++++++++++++++++++++++++++++++++++-------= ---- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 36ff8c3..64de0ee 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1244,6 +1244,15 @@ void qmp_migrate(const char *uri, bool has_blk, bool= blk, MigrationState *s =3D migrate_get_current(); const char *p; =20 + if (has_resume && resume) { + if (s->state !=3D MIGRATION_STATUS_POSTCOPY_PAUSED) { + error_setg(errp, "Cannot resume if there is no " + "paused migration"); + return; + } + goto do_resume; + } + if (migration_is_setup_or_active(s->state) || s->state =3D=3D MIGRATION_STATUS_CANCELLING || s->state =3D=3D MIGRATION_STATUS_COLO) { @@ -1279,6 +1288,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool = blk, =20 s =3D migrate_init(); =20 +do_resume: if (strstart(uri, "tcp:", &p)) { tcp_start_outgoing_migration(s, p, &local_err); #ifdef CONFIG_RDMA @@ -1700,7 +1710,8 @@ out: return NULL; } =20 -static int open_return_path_on_source(MigrationState *ms) +static int open_return_path_on_source(MigrationState *ms, + bool create_thread) { =20 ms->rp_state.from_dst_file =3D qemu_file_get_return_path(ms->to_dst_fi= le); @@ -1709,6 +1720,12 @@ static int open_return_path_on_source(MigrationState= *ms) } =20 trace_open_return_path_on_source(); + + if (!create_thread) { + /* We're done */ + return 0; + } + qemu_thread_create(&ms->rp_state.rp_thread, "return path", source_return_path_thread, ms, QEMU_THREAD_JOINABLE= ); =20 @@ -2249,15 +2266,24 @@ static void *migration_thread(void *opaque) =20 void migrate_fd_connect(MigrationState *s) { - s->expected_downtime =3D s->parameters.downtime_limit; - s->cleanup_bh =3D qemu_bh_new(migrate_fd_cleanup, s); + int64_t rate_limit; + bool resume =3D s->state =3D=3D MIGRATION_STATUS_POSTCOPY_PAUSED; =20 - qemu_file_set_blocking(s->to_dst_file, true); - qemu_file_set_rate_limit(s->to_dst_file, - s->parameters.max_bandwidth / XFER_LIMIT_RATI= O); + if (resume) { + /* This is a resumed migration */ + rate_limit =3D INT64_MAX; + } else { + /* This is a fresh new migration */ + rate_limit =3D s->parameters.max_bandwidth / XFER_LIMIT_RATIO; + s->expected_downtime =3D s->parameters.downtime_limit; + s->cleanup_bh =3D qemu_bh_new(migrate_fd_cleanup, s); =20 - /* Notify before starting migration thread */ - notifier_list_notify(&migration_state_notifiers, s); + /* Notify before starting migration thread */ + notifier_list_notify(&migration_state_notifiers, s); + } + + qemu_file_set_rate_limit(s->to_dst_file, rate_limit); + qemu_file_set_blocking(s->to_dst_file, true); =20 /* * Open the return path. For postcopy, it is used exclusively. For @@ -2265,15 +2291,19 @@ void migrate_fd_connect(MigrationState *s) * QEMU uses the return path. */ if (migrate_postcopy_ram() || migrate_use_return_path()) { - if (open_return_path_on_source(s)) { + if (open_return_path_on_source(s, !resume)) { error_report("Unable to open return-path for postcopy"); - migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, - MIGRATION_STATUS_FAILED); + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED= ); migrate_fd_cleanup(s); return; } } =20 + if (resume) { + /* TODO: do the resume logic */ + return; + } + qemu_thread_create(&s->thread, "live_migration", migration_thread, s, QEMU_THREAD_JOINABLE); s->migration_thread_running =3D true; --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 150122966101889.72432898368629; Fri, 28 Jul 2017 01:14:21 -0700 (PDT) Received: from localhost ([::1]:46682 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Pa-0008Nk-8V for importer@patchew.org; Fri, 28 Jul 2017 04:14:18 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47223) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0JT-0001Gu-9k for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:00 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0JQ-0001Qt-4h for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:59 -0400 Received: from mx1.redhat.com ([209.132.183.28]:35100) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0JP-0001QS-SC for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:56 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id CF815486D2; Fri, 28 Jul 2017 08:07:54 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id A72C9600C2; Fri, 28 Jul 2017 08:07:52 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com CF815486D2 Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx06.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:27 +0800 Message-Id: <1501229198-30588-19-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.30]); Fri, 28 Jul 2017 08:07:54 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 18/29] migration: new state "postcopy-recover" X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Introducing new migration state "postcopy-recover". If a migration procedure is paused and the connection is rebuilt afterward successfully, we'll switch the source VM state from "postcopy-paused" to the new state "postcopy-recover", then we'll do the resume logic in the migration thread (along with the return path thread). This patch only do the state switch on source side. Another following up patch will handle the state switching on destination side using the same status bit. Signed-off-by: Peter Xu --- migration/migration.c | 45 +++++++++++++++++++++++++++++++++++++++++---- qapi-schema.json | 4 +++- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 64de0ee..3aabe11 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -495,6 +495,7 @@ static bool migration_is_setup_or_active(int state) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER: case MIGRATION_STATUS_SETUP: return true; =20 @@ -571,6 +572,7 @@ MigrationInfo *qmp_query_migrate(Error **errp) case MIGRATION_STATUS_CANCELLING: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_POSTCOPY_PAUSED: + case MIGRATION_STATUS_POSTCOPY_RECOVER: /* TODO add some postcopy stats */ info->has_status =3D true; info->has_total_time =3D true; @@ -2018,6 +2020,13 @@ static bool postcopy_should_start(MigrationState *s) return atomic_read(&s->start_postcopy) || s->start_postcopy_fast; } =20 +/* Return zero if success, or <0 for error */ +static int postcopy_do_resume(MigrationState *s) +{ + /* TODO: do the resume logic */ + return 0; +} + /* * We don't return until we are in a safe state to continue current * postcopy migration. Returns true to continue the migration, or @@ -2026,7 +2035,9 @@ static bool postcopy_should_start(MigrationState *s) static bool postcopy_pause(MigrationState *s) { assert(s->state =3D=3D MIGRATION_STATUS_POSTCOPY_ACTIVE); - migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, + +do_pause: + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_POSTCOPY_PAUSED); =20 /* Current channel is possibly broken. Release it. */ @@ -2043,9 +2054,32 @@ static bool postcopy_pause(MigrationState *s) qemu_sem_wait(&s->postcopy_pause_sem); } =20 - trace_postcopy_pause_continued(); + if (s->state =3D=3D MIGRATION_STATUS_POSTCOPY_RECOVER) { + /* We were waken up by a recover procedure. Give it a shot */ =20 - return true; + /* + * Firstly, let's wake up the return path now, with a new + * return path channel. + */ + qemu_sem_post(&s->postcopy_pause_rp_sem); + + /* Do the resume logic */ + if (postcopy_do_resume(s) =3D=3D 0) { + /* Let's continue! */ + trace_postcopy_pause_continued(); + return true; + } else { + /* + * Something wrong happened during the recovery, let's + * pause again. Pause is always better than throwing data + * away. + */ + goto do_pause; + } + } else { + /* This is not right... Time to quit. */ + return false; + } } =20 /* Return true if we want to stop the migration, otherwise false. */ @@ -2300,7 +2334,10 @@ void migrate_fd_connect(MigrationState *s) } =20 if (resume) { - /* TODO: do the resume logic */ + /* Wakeup the main migration thread to do the recovery */ + migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, + MIGRATION_STATUS_POSTCOPY_RECOVER); + qemu_sem_post(&s->postcopy_pause_sem); return; } =20 diff --git a/qapi-schema.json b/qapi-schema.json index 27b7c4c..10f1f60 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -669,6 +669,8 @@ # # @postcopy-paused: during postcopy but paused. (since 2.10) # +# @postcopy-recover: trying to recover from a paused postcopy. (since 2.11) +# # @completed: migration is finished. # # @failed: some error occurred during migration process. @@ -682,7 +684,7 @@ { 'enum': 'MigrationStatus', 'data': [ 'none', 'setup', 'cancelling', 'cancelled', 'active', 'postcopy-active', 'postcopy-paused', - 'completed', 'failed', 'colo' ] } + 'postcopy-recover', 'completed', 'failed', 'colo' ] } =20 ## # @MigrationInfo: --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501230245698511.34050312747195; Fri, 28 Jul 2017 01:24:05 -0700 (PDT) Received: from localhost ([::1]:46734 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Z2-0001HN-9C for importer@patchew.org; Fri, 28 Jul 2017 04:24:04 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47228) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0JT-0001HC-II for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:00 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0JS-0001Sd-Nw for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:59 -0400 Received: from mx1.redhat.com ([209.132.183.28]:54788) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0JS-0001S0-Gx for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:07:58 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 746D4C0CD022; Fri, 28 Jul 2017 08:07:57 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 46F68600C2; Fri, 28 Jul 2017 08:07:55 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 746D4C0CD022 Authentication-Results: ext-mx07.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx07.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:28 +0800 Message-Id: <1501229198-30588-20-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.31]); Fri, 28 Jul 2017 08:07:57 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 19/29] migration: let dst listen on port always X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Signed-off-by: Peter Xu --- migration/exec.c | 2 +- migration/fd.c | 2 +- migration/socket.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/migration/exec.c b/migration/exec.c index 08b599e..b4412db 100644 --- a/migration/exec.c +++ b/migration/exec.c @@ -49,7 +49,7 @@ static gboolean exec_accept_incoming_migration(QIOChannel= *ioc, { migration_channel_process_incoming(ioc); object_unref(OBJECT(ioc)); - return FALSE; /* unregister */ + return TRUE; /* keep it registered */ } =20 void exec_start_incoming_migration(const char *command, Error **errp) diff --git a/migration/fd.c b/migration/fd.c index 30f5258..865277a 100644 --- a/migration/fd.c +++ b/migration/fd.c @@ -49,7 +49,7 @@ static gboolean fd_accept_incoming_migration(QIOChannel *= ioc, { migration_channel_process_incoming(ioc); object_unref(OBJECT(ioc)); - return FALSE; /* unregister */ + return TRUE; /* keep it registered */ } =20 void fd_start_incoming_migration(const char *infd, Error **errp) diff --git a/migration/socket.c b/migration/socket.c index 757d382..f2c2d01 100644 --- a/migration/socket.c +++ b/migration/socket.c @@ -153,8 +153,8 @@ static gboolean socket_accept_incoming_migration(QIOCha= nnel *ioc, =20 out: /* Close listening socket as its no longer needed */ - qio_channel_close(ioc, NULL); - return FALSE; /* unregister */ + // qio_channel_close(ioc, NULL); + return TRUE; /* keep it registered */ } =20 =20 --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501230310185561.1311631654927; Fri, 28 Jul 2017 01:25:10 -0700 (PDT) Received: from localhost ([::1]:46738 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0a4-0002mi-CB for importer@patchew.org; Fri, 28 Jul 2017 04:25:08 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47326) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Jc-0001Mi-A9 for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:09 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0JZ-0001X9-13 for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:08 -0400 Received: from mx1.redhat.com ([209.132.183.28]:40222) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0JY-0001Wg-RF for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:04 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id C44CA12398; Fri, 28 Jul 2017 08:08:03 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id E003D600C2; Fri, 28 Jul 2017 08:07:57 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com C44CA12398 Authentication-Results: ext-mx01.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx01.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:29 +0800 Message-Id: <1501229198-30588-21-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.25]); Fri, 28 Jul 2017 08:08:03 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 20/29] migration: wakeup dst ram-load-thread for recover X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" On the destination side, we cannot wake up all the threads when we got reconnected. The first thing to do is to wake up the main load thread, so that we can continue to receive valid messages from source again and reply when needed. At this point, we switch the destination VM state from postcopy-paused back to postcopy-recover. Now we are finally ready to do the resume logic. Signed-off-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert --- migration/migration.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 3aabe11..e498fa4 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -389,10 +389,38 @@ static void process_incoming_migration_co(void *opaqu= e) =20 void migration_fd_process_incoming(QEMUFile *f) { - Coroutine *co =3D qemu_coroutine_create(process_incoming_migration_co,= f); + MigrationIncomingState *mis =3D migration_incoming_get_current(); + Coroutine *co; + + mis->from_src_file =3D f; + + if (mis->state =3D=3D MIGRATION_STATUS_POSTCOPY_PAUSED) { + /* Resumed migration to postcopy state */ + + /* Postcopy has standalone thread to do vm load */ + qemu_file_set_blocking(f, true); + + /* Re-configure the return path */ + mis->to_src_file =3D qemu_file_get_return_path(f); =20 - qemu_file_set_blocking(f, false); - qemu_coroutine_enter(co); + /* Reset the migration status to postcopy-active */ + migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED, + MIGRATION_STATUS_POSTCOPY_RECOVER); + + /* + * Here, we only wake up the main loading thread (while the + * fault thread will still be waiting), so that we can receive + * commands from source now, and answer it if needed. The + * fault thread will be waked up afterwards until we are sure + * that source is ready to reply to page requests. + */ + qemu_sem_post(&mis->postcopy_pause_sem_dst); + } else { + /* New incoming migration */ + qemu_file_set_blocking(f, false); + co =3D qemu_coroutine_create(process_incoming_migration_co, f); + qemu_coroutine_enter(co); + } } =20 /* --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501230395955305.75786213984395; Fri, 28 Jul 2017 01:26:35 -0700 (PDT) Received: from localhost ([::1]:46752 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0bS-0004wO-Lo for importer@patchew.org; Fri, 28 Jul 2017 04:26:34 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47330) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Jc-0001NQ-JC for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:09 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Jb-0001Yv-Li for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:08 -0400 Received: from mx1.redhat.com ([209.132.183.28]:34020) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Jb-0001YC-Cy for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:07 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 620EE2A2D0; Fri, 28 Jul 2017 08:08:06 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 3C1315B816; Fri, 28 Jul 2017 08:08:04 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 620EE2A2D0 Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:30 +0800 Message-Id: <1501229198-30588-22-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.27]); Fri, 28 Jul 2017 08:08:06 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 21/29] migration: new cmd MIG_CMD_RECV_BITMAP X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Add a new vm command MIG_CMD_RECV_BITMAP to request received bitmap for one ramblock. Signed-off-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert --- migration/savevm.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++= ++++ migration/savevm.h | 1 + migration/trace-events | 1 + 3 files changed, 61 insertions(+) diff --git a/migration/savevm.c b/migration/savevm.c index 386788d..0ab13c0 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -78,6 +78,7 @@ enum qemu_vm_cmd { were previously sent during precopy but are dirty. */ MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream= */ + MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */ MIG_CMD_MAX }; =20 @@ -95,6 +96,7 @@ static struct mig_cmd_args { [MIG_CMD_POSTCOPY_RAM_DISCARD] =3D { .len =3D -1, .name =3D "POSTCOPY_RAM_DI= SCARD" }, [MIG_CMD_PACKAGED] =3D { .len =3D 4, .name =3D "PACKAGED" }, + [MIG_CMD_RECV_BITMAP] =3D { .len =3D -1, .name =3D "RECV_BITMAP" = }, [MIG_CMD_MAX] =3D { .len =3D -1, .name =3D "MAX" }, }; =20 @@ -929,6 +931,19 @@ void qemu_savevm_send_postcopy_run(QEMUFile *f) qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL); } =20 +void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name) +{ + size_t len; + char buf[512]; + + trace_savevm_send_recv_bitmap(block_name); + + buf[0] =3D len =3D strlen(block_name); + memcpy(buf + 1, block_name, len); + + qemu_savevm_command_send(f, MIG_CMD_RECV_BITMAP, len + 1, (uint8_t *)b= uf); +} + bool qemu_savevm_state_blocked(Error **errp) { SaveStateEntry *se; @@ -1705,6 +1720,47 @@ static int loadvm_handle_cmd_packaged(MigrationIncom= ingState *mis) } =20 /* + * Handle request that source requests for recved_bitmap on + * destination. Payload format: + * + * len (1 byte) + ramblock_name (<255 bytes) + */ +static int loadvm_handle_recv_bitmap(MigrationIncomingState *mis, + uint16_t len) +{ + QEMUFile *file =3D mis->from_src_file; + RAMBlock *rb; + char block_name[256]; + size_t cnt; + + cnt =3D qemu_get_counted_string(file, block_name); + if (!cnt) { + error_report("%s: failed to read block name", __func__); + return -EINVAL; + } + + /* Validate before using the data */ + if (qemu_file_get_error(file)) { + return qemu_file_get_error(file); + } + + if (len !=3D cnt + 1) { + error_report("%s: invalid payload length (%d)", __func__, len); + return -EINVAL; + } + + rb =3D qemu_ram_block_by_name(block_name); + if (!rb) { + error_report("%s: block '%s' not found", __func__, block_name); + return -EINVAL; + } + + /* TODO: send the bitmap back to source */ + + return 0; +} + +/* * Process an incoming 'QEMU_VM_COMMAND' * 0 just a normal return * LOADVM_QUIT All good, but exit the loop @@ -1777,6 +1833,9 @@ static int loadvm_process_command(QEMUFile *f) =20 case MIG_CMD_POSTCOPY_RAM_DISCARD: return loadvm_postcopy_ram_handle_discard(mis, len); + + case MIG_CMD_RECV_BITMAP: + return loadvm_handle_recv_bitmap(mis, len); } =20 return 0; diff --git a/migration/savevm.h b/migration/savevm.h index 295c4a1..8126b1c 100644 --- a/migration/savevm.h +++ b/migration/savevm.h @@ -46,6 +46,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t = *buf, size_t len); void qemu_savevm_send_postcopy_advise(QEMUFile *f); void qemu_savevm_send_postcopy_listen(QEMUFile *f); void qemu_savevm_send_postcopy_run(QEMUFile *f); +void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name); =20 void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, uint16_t len, diff --git a/migration/trace-events b/migration/trace-events index dbb4971..ca7b43f 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -34,6 +34,7 @@ savevm_send_open_return_path(void) "" savevm_send_ping(uint32_t val) "%x" savevm_send_postcopy_listen(void) "" savevm_send_postcopy_run(void) "" +savevm_send_recv_bitmap(char *name) "%s" savevm_state_setup(void) "" savevm_state_header(void) "" savevm_state_iterate(void) "" --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501230492445731.61413252474; Fri, 28 Jul 2017 01:28:12 -0700 (PDT) Received: from localhost ([::1]:46756 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0d0-000612-Vf for importer@patchew.org; Fri, 28 Jul 2017 04:28:10 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47380) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Jf-0001Rp-R5 for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:13 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Je-0001bu-DX for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:11 -0400 Received: from mx1.redhat.com ([209.132.183.28]:42252) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Je-0001bF-4n for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:10 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 07650C024B1A; Fri, 28 Jul 2017 08:08:09 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id D0CE8600C2; Fri, 28 Jul 2017 08:08:06 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 07650C024B1A Authentication-Results: ext-mx08.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx08.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:31 +0800 Message-Id: <1501229198-30588-23-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.32]); Fri, 28 Jul 2017 08:08:09 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 22/29] migration: new message MIG_RP_MSG_RECV_BITMAP X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Introducing new return path message MIG_RP_MSG_RECV_BITMAP to send received bitmap of ramblock back to source. This is the reply message of MIG_CMD_RECV_BITMAP, it contains not only the header (including the ramblock name), and it was appended with the whole ramblock received bitmap on the destination side. When the source receives such a reply message (MIG_RP_MSG_RECV_BITMAP), it parses it, convert it to the dirty bitmap by reverting the bits. Signed-off-by: Peter Xu --- migration/migration.c | 62 ++++++++++++++++++++++++++++++++++++++++++ migration/migration.h | 2 ++ migration/ram.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++= ++++ migration/ram.h | 2 ++ migration/savevm.c | 2 +- migration/trace-events | 2 ++ 6 files changed, 143 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index e498fa4..c2b85ac 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -92,6 +92,7 @@ enum mig_rp_message_type { =20 MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) = */ MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ + MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ =20 MIG_RP_MSG_MAX }; @@ -450,6 +451,39 @@ void migrate_send_rp_pong(MigrationIncomingState *mis, migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf); } =20 +void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, + char *block_name) +{ + char buf[512]; + int len; + int64_t res; + + /* + * First, we send the header part. It contains only the len of + * idstr, and the idstr itself. + */ + len =3D strlen(block_name); + buf[0] =3D len; + memcpy(buf + 1, block_name, len); + + migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf); + + /* + * Next, we dump the received bitmap to the stream. + * + * TODO: currently we are safe since we are the only one that is + * using the to_src_file handle (fault thread is still paused), + * and it's ok even not taking the mutex. However the best way is + * to take the lock before sending the message header, and release + * the lock after sending the bitmap. + */ + qemu_mutex_lock(&mis->rp_mutex); + res =3D ramblock_recv_bitmap_send(mis->to_src_file, block_name); + qemu_mutex_unlock(&mis->rp_mutex); + + trace_migrate_send_rp_recv_bitmap(block_name, res); +} + MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) { MigrationCapabilityStatusList *head =3D NULL; @@ -1560,6 +1594,7 @@ static struct rp_cmd_args { [MIG_RP_MSG_PONG] =3D { .len =3D 4, .name =3D "PONG" }, [MIG_RP_MSG_REQ_PAGES] =3D { .len =3D 12, .name =3D "REQ_PAGES" }, [MIG_RP_MSG_REQ_PAGES_ID] =3D { .len =3D -1, .name =3D "REQ_PAGES_ID= " }, + [MIG_RP_MSG_RECV_BITMAP] =3D { .len =3D -1, .name =3D "RECV_BITMAP"= }, [MIG_RP_MSG_MAX] =3D { .len =3D -1, .name =3D "MAX" }, }; =20 @@ -1604,6 +1639,19 @@ static bool postcopy_pause_return_path_thread(Migrat= ionState *s) return true; } =20 +static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_na= me) +{ + RAMBlock *block =3D qemu_ram_block_by_name(block_name); + + if (!block) { + error_report("%s: invalid block name '%s'", __func__, block_name); + return -EINVAL; + } + + /* Fetch the received bitmap and refresh the dirty bitmap */ + return ram_dirty_bitmap_reload(s, block); +} + /* * Handles messages sent on the return path towards the source VM * @@ -1709,6 +1757,20 @@ retry: migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len); break; =20 + case MIG_RP_MSG_RECV_BITMAP: + if (header_len < 1) { + error_report("%s: missing block name", __func__); + mark_source_rp_bad(ms); + goto out; + } + /* Format: len (1B) + idstr (<255B). This ends the idstr. */ + buf[buf[0] + 1] =3D '\0'; + if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) { + mark_source_rp_bad(ms); + goto out; + } + break; + default: break; } diff --git a/migration/migration.h b/migration/migration.h index 574fedd..4d38308 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -204,5 +204,7 @@ void migrate_send_rp_pong(MigrationIncomingState *mis, uint32_t value); int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbn= ame, ram_addr_t start, size_t len); +void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, + char *block_name); =20 #endif diff --git a/migration/ram.c b/migration/ram.c index 7f4cb0f..d543483 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -182,6 +182,32 @@ void ramblock_recv_bitmap_clear(void *host_addr, RAMBl= ock *rb) } =20 /* + * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes). + * + * Returns >0 if success with sent bytes, or <0 if error. + */ +int64_t ramblock_recv_bitmap_send(QEMUFile *file, char *block_name) +{ + RAMBlock *block =3D qemu_ram_block_by_name(block_name); + uint64_t size; + + /* We should have made sure that the block exists */ + assert(block); + + /* Size of the bitmap, in bytes */ + size =3D (block->max_length >> TARGET_PAGE_BITS) / 8; + qemu_put_be64(file, size); + qemu_put_buffer(file, (const uint8_t *)block->receivedmap, size); + qemu_fflush(file); + + if (qemu_file_get_error(file)) { + return qemu_file_get_error(file); + } + + return sizeof(size) + size; +} + +/* * An outstanding page request, on the source, having been received * and queued */ @@ -2705,6 +2731,54 @@ static int ram_load(QEMUFile *f, void *opaque, int v= ersion_id) return ret; } =20 +/* + * Read the received bitmap, revert it as the initial dirty bitmap. + * This is only used when the postcopy migration is paused but wants + * to resume from a middle point. + */ +int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) +{ + QEMUFile *file =3D s->rp_state.from_dst_file; + uint64_t local_size =3D (block->max_length >> TARGET_PAGE_BITS) / 8; + uint64_t size; + + if (s->state !=3D MIGRATION_STATUS_POSTCOPY_RECOVER) { + error_report("%s: incorrect state %s", __func__, + MigrationStatus_lookup[s->state]); + return -EINVAL; + } + + size =3D qemu_get_be64(file); + + /* The size of the bitmap should match with our ramblock */ + if (size !=3D local_size) { + error_report("%s: ramblock '%s' bitmap size mismatch " + "(0x%lx !=3D 0x%lx)", __func__, block->idstr, + size, local_size); + return -EINVAL; + } + + /* + * We are still during migration (though paused). The dirty bitmap + * won't change. We can directly modify it. + */ + size =3D qemu_get_buffer(file, (uint8_t *)block->bmap, local_size); + + if (qemu_file_get_error(file)) { + return qemu_file_get_error(file); + } + + /* + * What we received is "received bitmap". Revert it as the initial + * dirty bitmap for this ramblock. + */ + bitmap_invert(block->bmap, block->max_length >> TARGET_PAGE_BITS); + + trace_ram_dirty_bitmap_reload(block->idstr); + + return 0; +} + static SaveVMHandlers savevm_ram_handlers =3D { .save_setup =3D ram_save_setup, .save_live_iterate =3D ram_save_iterate, diff --git a/migration/ram.h b/migration/ram.h index 84e8623..86eb973 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -58,5 +58,7 @@ void ramblock_recv_bitmap_set(void *host_addr, RAMBlock *= rb); void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t len); void ramblock_recv_bitmap_clear(void *host_addr, RAMBlock *rb); +int64_t ramblock_recv_bitmap_send(QEMUFile *file, char *block_name); +int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb); =20 #endif diff --git a/migration/savevm.c b/migration/savevm.c index 0ab13c0..def9213 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1755,7 +1755,7 @@ static int loadvm_handle_recv_bitmap(MigrationIncomin= gState *mis, return -EINVAL; } =20 - /* TODO: send the bitmap back to source */ + migrate_send_rp_recv_bitmap(mis, block_name); =20 return 0; } diff --git a/migration/trace-events b/migration/trace-events index ca7b43f..ed69551 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -77,6 +77,7 @@ ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRI= x64 " %x" ram_postcopy_send_discard_bitmap(void) "" ram_save_page(const char *rbname, uint64_t offset, void *host) "%s: offset= : %" PRIx64 " host: %p" ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: st= art: %zx len: %zx" +ram_dirty_bitmap_reload(char *str) "%s" =20 # migration/migration.c await_return_path_close_on_source_close(void) "" @@ -88,6 +89,7 @@ migrate_fd_cancel(void) "" migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) = "in %s at %zx len %zx" migrate_pending(uint64_t size, uint64_t max, uint64_t post, uint64_t nonpo= st) "pending size %" PRIu64 " max %" PRIu64 " (post=3D%" PRIu64 " nonpost= =3D%" PRIu64 ")" migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d" +migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%= "PRIi64 migration_completion_file_err(void) "" migration_completion_postcopy_end(void) "" migration_completion_postcopy_end_after_complete(void) "" --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229808285605.855106376384; Fri, 28 Jul 2017 01:16:48 -0700 (PDT) Received: from localhost ([::1]:46696 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Ry-0002aJ-Tr for importer@patchew.org; Fri, 28 Jul 2017 04:16:47 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47400) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Jh-0001Ts-Ui for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:17 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Jg-0001dR-Tb for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:13 -0400 Received: from mx1.redhat.com ([209.132.183.28]:42282) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Jg-0001cz-KX for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:12 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 99105C0FED2C; Fri, 28 Jul 2017 08:08:11 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 6EF4F600C2; Fri, 28 Jul 2017 08:08:09 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 99105C0FED2C Authentication-Results: ext-mx08.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx08.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:32 +0800 Message-Id: <1501229198-30588-24-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.32]); Fri, 28 Jul 2017 08:08:11 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 23/29] migration: new cmd MIG_CMD_POSTCOPY_RESUME X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Introducing this new command to be sent when the source VM is ready to resume the paused migration. What the destination does here is basically release the fault thread to continue service page faults. Signed-off-by: Peter Xu --- migration/savevm.c | 27 +++++++++++++++++++++++++++ migration/savevm.h | 1 + migration/trace-events | 1 + 3 files changed, 29 insertions(+) diff --git a/migration/savevm.c b/migration/savevm.c index def9213..2e330bc 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -77,6 +77,7 @@ enum qemu_vm_cmd { MIG_CMD_POSTCOPY_RAM_DISCARD, /* A list of pages to discard that were previously sent during precopy but are dirty. */ + MIG_CMD_POSTCOPY_RESUME, /* resume postcopy on dest */ MIG_CMD_PACKAGED, /* Send a wrapped stream within this stream= */ MIG_CMD_RECV_BITMAP, /* Request for recved bitmap on dst */ MIG_CMD_MAX @@ -95,6 +96,7 @@ static struct mig_cmd_args { [MIG_CMD_POSTCOPY_RUN] =3D { .len =3D 0, .name =3D "POSTCOPY_RUN"= }, [MIG_CMD_POSTCOPY_RAM_DISCARD] =3D { .len =3D -1, .name =3D "POSTCOPY_RAM_DI= SCARD" }, + [MIG_CMD_POSTCOPY_RESUME] =3D { .len =3D 0, .name =3D "POSTCOPY_RESU= ME" }, [MIG_CMD_PACKAGED] =3D { .len =3D 4, .name =3D "PACKAGED" }, [MIG_CMD_RECV_BITMAP] =3D { .len =3D -1, .name =3D "RECV_BITMAP" = }, [MIG_CMD_MAX] =3D { .len =3D -1, .name =3D "MAX" }, @@ -931,6 +933,12 @@ void qemu_savevm_send_postcopy_run(QEMUFile *f) qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RUN, 0, NULL); } =20 +void qemu_savevm_send_postcopy_resume(QEMUFile *f) +{ + trace_savevm_send_postcopy_resume(); + qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_RESUME, 0, NULL); +} + void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name) { size_t len; @@ -1671,6 +1679,22 @@ static int loadvm_postcopy_handle_run(MigrationIncom= ingState *mis) return LOADVM_QUIT; } =20 +static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis) +{ + /* + * This means source VM is ready to resume the postcopy migration. + * It's time to switch state and release the fault thread to + * continue service page faults. + */ + migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER, + MIGRATION_STATUS_POSTCOPY_ACTIVE); + qemu_sem_post(&mis->postcopy_pause_sem_fault); + + /* TODO: Tell source that "we are ready" */ + + return 0; +} + /** * Immediately following this command is a blob of data containing an embe= dded * chunk of migration stream; read it and load it. @@ -1834,6 +1858,9 @@ static int loadvm_process_command(QEMUFile *f) case MIG_CMD_POSTCOPY_RAM_DISCARD: return loadvm_postcopy_ram_handle_discard(mis, len); =20 + case MIG_CMD_POSTCOPY_RESUME: + return loadvm_postcopy_handle_resume(mis); + case MIG_CMD_RECV_BITMAP: return loadvm_handle_recv_bitmap(mis, len); } diff --git a/migration/savevm.h b/migration/savevm.h index 8126b1c..a5f3879 100644 --- a/migration/savevm.h +++ b/migration/savevm.h @@ -46,6 +46,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t = *buf, size_t len); void qemu_savevm_send_postcopy_advise(QEMUFile *f); void qemu_savevm_send_postcopy_listen(QEMUFile *f); void qemu_savevm_send_postcopy_run(QEMUFile *f); +void qemu_savevm_send_postcopy_resume(QEMUFile *f); void qemu_savevm_send_recv_bitmap(QEMUFile *f, char *block_name); =20 void qemu_savevm_send_postcopy_ram_discard(QEMUFile *f, const char *name, diff --git a/migration/trace-events b/migration/trace-events index ed69551..04dd9d8 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -34,6 +34,7 @@ savevm_send_open_return_path(void) "" savevm_send_ping(uint32_t val) "%x" savevm_send_postcopy_listen(void) "" savevm_send_postcopy_run(void) "" +savevm_send_postcopy_resume(void) "" savevm_send_recv_bitmap(char *name) "%s" savevm_state_setup(void) "" savevm_state_header(void) "" --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 150123042707221.122630429535775; Fri, 28 Jul 2017 01:27:07 -0700 (PDT) Received: from localhost ([::1]:46753 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0bx-0005HX-Lj for importer@patchew.org; Fri, 28 Jul 2017 04:27:05 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47441) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Jl-0001XW-RM for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:18 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Jk-0001fb-QR for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:17 -0400 Received: from mx1.redhat.com ([209.132.183.28]:48098) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Jk-0001et-IX for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:16 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 803291B9831; Fri, 28 Jul 2017 08:08:15 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 10DF3600C2; Fri, 28 Jul 2017 08:08:11 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 803291B9831 Authentication-Results: ext-mx02.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx02.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:33 +0800 Message-Id: <1501229198-30588-25-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.26]); Fri, 28 Jul 2017 08:08:15 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 24/29] migration: new message MIG_RP_MSG_RESUME_ACK X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Creating new message to reply for MIG_CMD_POSTCOPY_RESUME. One uint32_t is used as payload to let the source know whether destination is ready to continue the migration. Signed-off-by: Peter Xu --- migration/migration.c | 37 +++++++++++++++++++++++++++++++++++++ migration/migration.h | 1 + migration/savevm.c | 3 ++- migration/trace-events | 1 + 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index c2b85ac..62f91ce 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -93,6 +93,7 @@ enum mig_rp_message_type { MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) = */ MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ + MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ =20 MIG_RP_MSG_MAX }; @@ -484,6 +485,14 @@ void migrate_send_rp_recv_bitmap(MigrationIncomingStat= e *mis, trace_migrate_send_rp_recv_bitmap(block_name, res); } =20 +void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t valu= e) +{ + uint32_t buf; + + buf =3D cpu_to_be32(value); + migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); +} + MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) { MigrationCapabilityStatusList *head =3D NULL; @@ -1595,6 +1604,7 @@ static struct rp_cmd_args { [MIG_RP_MSG_REQ_PAGES] =3D { .len =3D 12, .name =3D "REQ_PAGES" }, [MIG_RP_MSG_REQ_PAGES_ID] =3D { .len =3D -1, .name =3D "REQ_PAGES_ID= " }, [MIG_RP_MSG_RECV_BITMAP] =3D { .len =3D -1, .name =3D "RECV_BITMAP"= }, + [MIG_RP_MSG_RESUME_ACK] =3D { .len =3D 4, .name =3D "RESUME_ACK" = }, [MIG_RP_MSG_MAX] =3D { .len =3D -1, .name =3D "MAX" }, }; =20 @@ -1652,6 +1662,25 @@ static int migrate_handle_rp_recv_bitmap(MigrationSt= ate *s, char *block_name) return ram_dirty_bitmap_reload(s, block); } =20 +static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) +{ + trace_source_return_path_thread_resume_ack(value); + + /* + * Currently value will always be one. It can be used in the + * future to notify source that destination cannot continue. + */ + assert(value =3D=3D 1); + + /* Now both sides are active. */ + migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER, + MIGRATION_STATUS_POSTCOPY_ACTIVE); + + /* TODO: notify send thread that time to continue send pages */ + + return 0; +} + /* * Handles messages sent on the return path towards the source VM * @@ -1771,6 +1800,14 @@ retry: } break; =20 + case MIG_RP_MSG_RESUME_ACK: + tmp32 =3D ldl_be_p(buf); + if (migrate_handle_rp_resume_ack(ms, tmp32)) { + mark_source_rp_bad(ms); + goto out; + } + break; + default: break; } diff --git a/migration/migration.h b/migration/migration.h index 4d38308..2a3f905 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -206,5 +206,6 @@ int migrate_send_rp_req_pages(MigrationIncomingState *m= is, const char* rbname, ram_addr_t start, size_t len); void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, char *block_name); +void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t valu= e); =20 #endif diff --git a/migration/savevm.c b/migration/savevm.c index 2e330bc..02a67ac 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1690,7 +1690,8 @@ static int loadvm_postcopy_handle_resume(MigrationInc= omingState *mis) MIGRATION_STATUS_POSTCOPY_ACTIVE); qemu_sem_post(&mis->postcopy_pause_sem_fault); =20 - /* TODO: Tell source that "we are ready" */ + /* Tell source that "we are ready" */ + migrate_send_rp_resume_ack(mis, 1); =20 return 0; } diff --git a/migration/trace-events b/migration/trace-events index 04dd9d8..0b43fec 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -116,6 +116,7 @@ source_return_path_thread_entry(void) "" source_return_path_thread_loop_top(void) "" source_return_path_thread_pong(uint32_t val) "%x" source_return_path_thread_shut(uint32_t val) "%x" +source_return_path_thread_resume_ack(uint32_t v) "%"PRIu32 migrate_global_state_post_load(const char *state) "loaded state: %s" migrate_global_state_pre_save(const char *state) "saved state: %s" migration_thread_low_pending(uint64_t pending) "%" PRIu64 --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501229999575158.74070774208542; Fri, 28 Jul 2017 01:19:59 -0700 (PDT) Received: from localhost ([::1]:46706 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0V4-0006Ls-5m for importer@patchew.org; Fri, 28 Jul 2017 04:19:58 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47467) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Jo-0001a7-IK for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:21 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Jn-0001h2-Jg for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:20 -0400 Received: from mx1.redhat.com ([209.132.183.28]:52914) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Jn-0001gX-AZ for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:19 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 48C68974BB; Fri, 28 Jul 2017 08:08:18 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id E8F82600C2; Fri, 28 Jul 2017 08:08:15 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 48C68974BB Authentication-Results: ext-mx04.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx04.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:34 +0800 Message-Id: <1501229198-30588-26-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.28]); Fri, 28 Jul 2017 08:08:18 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 25/29] migration: introduce SaveVMHandlers.resume_prepare X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" This is hook function to be called when a postcopy migration wants to resume from a failure. For each module, it should provide its own recovery logic before we switch to the postcopy-active state. Signed-off-by: Peter Xu --- include/migration/register.h | 2 ++ migration/migration.c | 20 +++++++++++++++++++- migration/savevm.c | 25 +++++++++++++++++++++++++ migration/savevm.h | 1 + migration/trace-events | 1 + 5 files changed, 48 insertions(+), 1 deletion(-) diff --git a/include/migration/register.h b/include/migration/register.h index a0f1edd..b669362 100644 --- a/include/migration/register.h +++ b/include/migration/register.h @@ -41,6 +41,8 @@ typedef struct SaveVMHandlers { LoadStateHandler *load_state; int (*load_setup)(QEMUFile *f, void *opaque); int (*load_cleanup)(void *opaque); + /* Called when postcopy migration wants to resume from failure */ + int (*resume_prepare)(MigrationState *s, void *opaque); } SaveVMHandlers; =20 int register_savevm_live(DeviceState *dev, diff --git a/migration/migration.c b/migration/migration.c index 62f91ce..6cb0ad3 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2150,7 +2150,25 @@ static bool postcopy_should_start(MigrationState *s) /* Return zero if success, or <0 for error */ static int postcopy_do_resume(MigrationState *s) { - /* TODO: do the resume logic */ + int ret; + + /* + * Call all the resume_prepare() hooks, so that modules can be + * ready for the migration resume. + */ + ret =3D qemu_savevm_state_resume_prepare(s); + if (ret) { + error_report("%s: resume_prepare() failure detected: %d", + __func__, ret); + return ret; + } + + /* + * TODO: handshake with dest using MIG_CMD_RESUME, + * MIG_RP_MSG_RESUME_ACK, then switch source state to + * "postcopy-active" + */ + return 0; } =20 diff --git a/migration/savevm.c b/migration/savevm.c index 02a67ac..08a4712 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1004,6 +1004,31 @@ void qemu_savevm_state_setup(QEMUFile *f) } } =20 +int qemu_savevm_state_resume_prepare(MigrationState *s) +{ + SaveStateEntry *se; + int ret; + + trace_savevm_state_resume_prepare(); + + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { + if (!se->ops || !se->ops->resume_prepare) { + continue; + } + if (se->ops && se->ops->is_active) { + if (!se->ops->is_active(se->opaque)) { + continue; + } + } + ret =3D se->ops->resume_prepare(s, se->opaque); + if (ret < 0) { + return ret; + } + } + + return 0; +} + /* * this function has three return values: * negative: there was one error, and we have -errno. diff --git a/migration/savevm.h b/migration/savevm.h index a5f3879..3193f04 100644 --- a/migration/savevm.h +++ b/migration/savevm.h @@ -31,6 +31,7 @@ =20 bool qemu_savevm_state_blocked(Error **errp); void qemu_savevm_state_setup(QEMUFile *f); +int qemu_savevm_state_resume_prepare(MigrationState *s); void qemu_savevm_state_header(QEMUFile *f); int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy); void qemu_savevm_state_cleanup(void); diff --git a/migration/trace-events b/migration/trace-events index 0b43fec..0fb2d1e 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -37,6 +37,7 @@ savevm_send_postcopy_run(void) "" savevm_send_postcopy_resume(void) "" savevm_send_recv_bitmap(char *name) "%s" savevm_state_setup(void) "" +savevm_state_resume_prepare(void) "" savevm_state_header(void) "" savevm_state_iterate(void) "" savevm_state_cleanup(void) "" --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501230527584895.8635381626066; Fri, 28 Jul 2017 01:28:47 -0700 (PDT) Received: from localhost ([::1]:46758 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0dY-0006Mb-QL for importer@patchew.org; Fri, 28 Jul 2017 04:28:44 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47556) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Jw-0001gP-BS for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:29 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Jt-0001kf-5C for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:28 -0400 Received: from mx1.redhat.com ([209.132.183.28]:40534) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Js-0001k7-S8 for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:25 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id D6AADD3D6D; Fri, 28 Jul 2017 08:08:23 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id B0E096031D; Fri, 28 Jul 2017 08:08:18 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com D6AADD3D6D Authentication-Results: ext-mx01.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx01.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:35 +0800 Message-Id: <1501229198-30588-27-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.25]); Fri, 28 Jul 2017 08:08:24 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 26/29] migration: synchronize dirty bitmap for resume X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" This patch implements the first part of core RAM resume logic for postcopy. ram_resume_prepare() is provided for the work. When the migration is interrupted by network failure, the dirty bitmap on the source side will be meaningless, because even the dirty bit is cleared, it is still possible that the sent page was lost along the way to destination. Here instead of continue the migration with the old dirty bitmap on source, we ask the destination side to send back its received bitmap, then invert it to be our initial dirty bitmap. The source side send thread will issue the MIG_CMD_RECV_BITMAP requests, once per ramblock, to ask for the received bitmap. On destination side, MIG_RP_MSG_RECV_BITMAP will be issued, along with the requested bitmap. Data will be received on the return-path thread of source, and the main migration thread will be notified when all the ramblock bitmaps are synchronized. One issue to be solved here is how to synchronize the source send thread and return-path thread. Semaphore cannot really work here since we cannot guarantee the order of wait/post (it's possible that the reply is very fast, even before send thread starts to wait). So conditional variable is used to make sure the ordering is always correct. Signed-off-by: Peter Xu --- migration/migration.c | 4 +++ migration/migration.h | 4 +++ migration/ram.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++= ++++ migration/trace-events | 1 + 4 files changed, 77 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index 6cb0ad3..93fbc96 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1093,6 +1093,8 @@ static void migrate_fd_cleanup(void *opaque) =20 qemu_sem_destroy(&s->postcopy_pause_sem); qemu_sem_destroy(&s->postcopy_pause_rp_sem); + qemu_mutex_destroy(&s->resume_lock); + qemu_cond_destroy(&s->resume_cond); } =20 void migrate_fd_error(MigrationState *s, const Error *error) @@ -1238,6 +1240,8 @@ MigrationState *migrate_init(void) s->error =3D NULL; qemu_sem_init(&s->postcopy_pause_sem, 0); qemu_sem_init(&s->postcopy_pause_rp_sem, 0); + qemu_mutex_init(&s->resume_lock); + qemu_cond_init(&s->resume_cond); =20 migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_S= ETUP); =20 diff --git a/migration/migration.h b/migration/migration.h index 2a3f905..c270f4c 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -159,6 +159,10 @@ struct MigrationState /* Needed by postcopy-pause state */ QemuSemaphore postcopy_pause_sem; QemuSemaphore postcopy_pause_rp_sem; + + /* Used to sync-up between main send thread and rp-thread */ + QemuMutex resume_lock; + QemuCond resume_cond; }; =20 void migrate_set_state(int *state, int old_state, int new_state); diff --git a/migration/ram.c b/migration/ram.c index d543483..c695b13 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -46,6 +46,7 @@ #include "exec/ram_addr.h" #include "qemu/rcu_queue.h" #include "migration/colo.h" +#include "savevm.h" =20 /***********************************************************/ /* ram save/restore */ @@ -256,6 +257,8 @@ struct RAMState { RAMBlock *last_req_rb; /* Queue of outstanding page requests from the destination */ QemuMutex src_page_req_mutex; + /* Ramblock counts to sync dirty bitmap. Only used for recovery */ + int ramblock_to_sync; QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests; }; typedef struct RAMState RAMState; @@ -2731,6 +2734,57 @@ static int ram_load(QEMUFile *f, void *opaque, int v= ersion_id) return ret; } =20 +/* Sync all the dirty bitmap with destination VM. */ +static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs) +{ + RAMBlock *block; + QEMUFile *file =3D s->to_dst_file; + int ramblock_count =3D 0; + + trace_ram_dirty_bitmap_sync("start"); + + /* + * We need to take the resume lock to make sure that the send + * thread (current thread) and the rp-thread will do their work in + * order. + */ + qemu_mutex_lock(&s->resume_lock); + + /* Request for receive-bitmap for each block */ + RAMBLOCK_FOREACH(block) { + ramblock_count++; + qemu_savevm_send_recv_bitmap(file, block->idstr); + } + + /* Init the ramblock count to total */ + atomic_set(&rs->ramblock_to_sync, ramblock_count); + + trace_ram_dirty_bitmap_sync("wait-bitmap"); + + /* Wait until all the ramblocks' dirty bitmap synced */ + while (rs->ramblock_to_sync) { + qemu_cond_wait(&s->resume_cond, &s->resume_lock); + } + + trace_ram_dirty_bitmap_sync("completed"); + + qemu_mutex_unlock(&s->resume_lock); + + return 0; +} + +static void ram_dirty_bitmap_reload_notify(MigrationState *s) +{ + qemu_mutex_lock(&s->resume_lock); + atomic_dec(&ram_state->ramblock_to_sync); + if (ram_state->ramblock_to_sync =3D=3D 0) { + /* Make sure the other thread gets the latest */ + trace_ram_dirty_bitmap_sync("notify-send"); + qemu_cond_signal(&s->resume_cond); + } + qemu_mutex_unlock(&s->resume_lock); +} + /* * Read the received bitmap, revert it as the initial dirty bitmap. * This is only used when the postcopy migration is paused but wants @@ -2776,9 +2830,22 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBl= ock *block) =20 trace_ram_dirty_bitmap_reload(block->idstr); =20 + /* + * We succeeded to sync bitmap for current ramblock. If this is + * the last one to sync, we need to notify the main send thread. + */ + ram_dirty_bitmap_reload_notify(s); + return 0; } =20 +static int ram_resume_prepare(MigrationState *s, void *opaque) +{ + RAMState *rs =3D *(RAMState **)opaque; + + return ram_dirty_bitmap_sync_all(s, rs); +} + static SaveVMHandlers savevm_ram_handlers =3D { .save_setup =3D ram_save_setup, .save_live_iterate =3D ram_save_iterate, @@ -2789,6 +2856,7 @@ static SaveVMHandlers savevm_ram_handlers =3D { .save_cleanup =3D ram_save_cleanup, .load_setup =3D ram_load_setup, .load_cleanup =3D ram_load_cleanup, + .resume_prepare =3D ram_resume_prepare, }; =20 void ram_mig_init(void) diff --git a/migration/trace-events b/migration/trace-events index 0fb2d1e..15ff1bf 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -80,6 +80,7 @@ ram_postcopy_send_discard_bitmap(void) "" ram_save_page(const char *rbname, uint64_t offset, void *host) "%s: offset= : %" PRIx64 " host: %p" ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: st= art: %zx len: %zx" ram_dirty_bitmap_reload(char *str) "%s" +ram_dirty_bitmap_sync(const char *str) "%s" =20 # migration/migration.c await_return_path_close_on_source_close(void) "" --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501230608476754.991695615294; Fri, 28 Jul 2017 01:30:08 -0700 (PDT) Received: from localhost ([::1]:46763 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0et-0007b6-21 for importer@patchew.org; Fri, 28 Jul 2017 04:30:07 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47590) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Jy-0001gm-1m for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:30 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0Jx-0001nZ-7W for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:30 -0400 Received: from mx1.redhat.com ([209.132.183.28]:55072) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0Jx-0001mn-1D for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:29 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 0686D3372F1; Fri, 28 Jul 2017 08:08:28 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 4E49D600C2; Fri, 28 Jul 2017 08:08:24 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 0686D3372F1 Authentication-Results: ext-mx05.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx05.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:36 +0800 Message-Id: <1501229198-30588-28-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.29]); Fri, 28 Jul 2017 08:08:28 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 27/29] migration: setup ramstate for resume X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" After we updated the dirty bitmaps of ramblocks, we also need to update the critical fields in RAMState to make sure it is ready for a resume. Signed-off-by: Peter Xu --- migration/ram.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index c695b13..427bf6e 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1947,6 +1947,31 @@ static int ram_state_init(RAMState **rsp) return 0; } =20 +static void ram_state_resume_prepare(RAMState *rs) +{ + RAMBlock *block; + long pages =3D 0; + + /* + * Postcopy is not using xbzrle/compression, so no need for that. + * Also, since source are already halted, we don't need to care + * about dirty page logging as well. + */ + + RAMBLOCK_FOREACH(block) { + pages +=3D bitmap_count_one(block->bmap, + block->max_length >> TARGET_PAGE_BITS); + } + + /* This may not be aligned with current bitmaps. Recalculate. */ + rs->migration_dirty_pages =3D pages; + + rs->last_seen_block =3D NULL; + rs->last_sent_block =3D NULL; + rs->last_page =3D 0; + rs->last_version =3D ram_list.version; +} + /* * Each of ram_save_setup, ram_save_iterate and ram_save_complete has * long-running RCU critical section. When rcu-reclaims in the code @@ -2842,8 +2867,16 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBl= ock *block) static int ram_resume_prepare(MigrationState *s, void *opaque) { RAMState *rs =3D *(RAMState **)opaque; + int ret; =20 - return ram_dirty_bitmap_sync_all(s, rs); + ret =3D ram_dirty_bitmap_sync_all(s, rs); + if (ret) { + return ret; + } + + ram_state_resume_prepare(rs); + + return 0; } =20 static SaveVMHandlers savevm_ram_handlers =3D { --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 150122997126260.357770198278104; Fri, 28 Jul 2017 01:19:31 -0700 (PDT) Received: from localhost ([::1]:46705 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0Ub-0005Nu-O9 for importer@patchew.org; Fri, 28 Jul 2017 04:19:29 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47667) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0K2-0001l0-NO for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:38 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0K1-0001rD-PO for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:34 -0400 Received: from mx1.redhat.com ([209.132.183.28]:57990) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0K1-0001qi-GB for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:33 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 6B9E0F8028; Fri, 28 Jul 2017 08:08:32 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id 73DB9600C2; Fri, 28 Jul 2017 08:08:28 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 6B9E0F8028 Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx10.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:37 +0800 Message-Id: <1501229198-30588-29-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.39]); Fri, 28 Jul 2017 08:08:32 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 28/29] migration: final handshake for the resume X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Finish the last step to do the final handshake for the recovery. First source sends one MIG_CMD_RESUME to dst, telling that source is ready to resume. Then, dest replies with MIG_RP_MSG_RESUME_ACK to source, telling that dest is ready to resume (after switch to postcopy-active state). When source received the RESUME_ACK, it switches its state to postcopy-active, and finally the recovery is completed. Signed-off-by: Peter Xu --- migration/migration.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 93fbc96..ecebe30 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1666,6 +1666,13 @@ static int migrate_handle_rp_recv_bitmap(MigrationSt= ate *s, char *block_name) return ram_dirty_bitmap_reload(s, block); } =20 +static void postcopy_resume_handshake_ack(MigrationState *s) +{ + qemu_mutex_lock(&s->resume_lock); + qemu_cond_signal(&s->resume_cond); + qemu_mutex_unlock(&s->resume_lock); +} + static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) { trace_source_return_path_thread_resume_ack(value); @@ -1680,7 +1687,8 @@ static int migrate_handle_rp_resume_ack(MigrationStat= e *s, uint32_t value) migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER, MIGRATION_STATUS_POSTCOPY_ACTIVE); =20 - /* TODO: notify send thread that time to continue send pages */ + /* Notify send thread that time to continue send pages */ + postcopy_resume_handshake_ack(s); =20 return 0; } @@ -2151,6 +2159,25 @@ static bool postcopy_should_start(MigrationState *s) return atomic_read(&s->start_postcopy) || s->start_postcopy_fast; } =20 +static int postcopy_resume_handshake(MigrationState *s) +{ + qemu_mutex_lock(&s->resume_lock); + + qemu_savevm_send_postcopy_resume(s->to_dst_file); + + while (s->state =3D=3D MIGRATION_STATUS_POSTCOPY_RECOVER) { + qemu_cond_wait(&s->resume_cond, &s->resume_lock); + } + + qemu_mutex_unlock(&s->resume_lock); + + if (s->state =3D=3D MIGRATION_STATUS_POSTCOPY_ACTIVE) { + return 0; + } + + return -1; +} + /* Return zero if success, or <0 for error */ static int postcopy_do_resume(MigrationState *s) { @@ -2168,10 +2195,14 @@ static int postcopy_do_resume(MigrationState *s) } =20 /* - * TODO: handshake with dest using MIG_CMD_RESUME, - * MIG_RP_MSG_RESUME_ACK, then switch source state to - * "postcopy-active" + * Last handshake with destination on the resume (destination will + * switch to postcopy-active afterwards) */ + ret =3D postcopy_resume_handshake(s); + if (ret) { + error_report("%s: handshake failed: %d", __func__, ret); + return ret; + } =20 return 0; } --=20 2.7.4 From nobody Thu May 2 08:44:07 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1501230706736671.337688486053; Fri, 28 Jul 2017 01:31:46 -0700 (PDT) Received: from localhost ([::1]:46774 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0gT-0000dC-A1 for importer@patchew.org; Fri, 28 Jul 2017 04:31:45 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47726) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1db0K6-0001r2-DV for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:39 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1db0K5-0001tJ-9A for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:38 -0400 Received: from mx1.redhat.com ([209.132.183.28]:55520) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1db0K5-0001si-0Q for qemu-devel@nongnu.org; Fri, 28 Jul 2017 04:08:37 -0400 Received: from smtp.corp.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 02C56C107C93; Fri, 28 Jul 2017 08:08:36 +0000 (UTC) Received: from pxdev.xzpeter.org.com (dhcp-15-224.nay.redhat.com [10.66.15.224]) by smtp.corp.redhat.com (Postfix) with ESMTP id D656A600C2; Fri, 28 Jul 2017 08:08:32 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 02C56C107C93 Authentication-Results: ext-mx07.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx07.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=peterx@redhat.com From: Peter Xu To: qemu-devel@nongnu.org Date: Fri, 28 Jul 2017 16:06:38 +0800 Message-Id: <1501229198-30588-30-git-send-email-peterx@redhat.com> In-Reply-To: <1501229198-30588-1-git-send-email-peterx@redhat.com> References: <1501229198-30588-1-git-send-email-peterx@redhat.com> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.11 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.31]); Fri, 28 Jul 2017 08:08:36 +0000 (UTC) X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 209.132.183.28 Subject: [Qemu-devel] [RFC 29/29] migration: reset migrate thread vars when resumed X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Laurent Vivier , Andrea Arcangeli , Juan Quintela , Alexey Perevalov , peterx@redhat.com, "Dr . David Alan Gilbert" Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Firstly, MigThrError enumeration is introduced to describe the error in migration_detect_error() better. This gives the migration_thread() a chance to know whether a recovery has happened. Then, if a recovery is detected, migration_thread() will reset its local variables to prepare for that. Signed-off-by: Peter Xu --- migration/migration.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index ecebe30..439bc22 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2159,6 +2159,15 @@ static bool postcopy_should_start(MigrationState *s) return atomic_read(&s->start_postcopy) || s->start_postcopy_fast; } =20 +typedef enum MigThrError { + /* No error detected */ + MIG_THR_ERR_NONE =3D 0, + /* Detected error, but resumed successfully */ + MIG_THR_ERR_RECOVERED =3D 1, + /* Detected fatal error, need to exit */ + MIG_THR_ERR_FATAL =3D 2, +} MigThrError; + static int postcopy_resume_handshake(MigrationState *s) { qemu_mutex_lock(&s->resume_lock); @@ -2209,10 +2218,10 @@ static int postcopy_do_resume(MigrationState *s) =20 /* * We don't return until we are in a safe state to continue current - * postcopy migration. Returns true to continue the migration, or - * false to terminate current migration. + * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or + * MIG_THR_ERR_FATAL if unrecovery failure happened. */ -static bool postcopy_pause(MigrationState *s) +static MigThrError postcopy_pause(MigrationState *s) { assert(s->state =3D=3D MIGRATION_STATUS_POSTCOPY_ACTIVE); =20 @@ -2247,7 +2256,7 @@ do_pause: if (postcopy_do_resume(s) =3D=3D 0) { /* Let's continue! */ trace_postcopy_pause_continued(); - return true; + return MIG_THR_ERR_RECOVERED; } else { /* * Something wrong happened during the recovery, let's @@ -2258,12 +2267,11 @@ do_pause: } } else { /* This is not right... Time to quit. */ - return false; + return MIG_THR_ERR_FATAL; } } =20 -/* Return true if we want to stop the migration, otherwise false. */ -static bool migration_detect_error(MigrationState *s) +static MigThrError migration_detect_error(MigrationState *s) { int ret; =20 @@ -2272,7 +2280,7 @@ static bool migration_detect_error(MigrationState *s) =20 if (!ret) { /* Everything is fine */ - return false; + return MIG_THR_ERR_NONE; } =20 if (s->state =3D=3D MIGRATION_STATUS_POSTCOPY_ACTIVE && ret =3D=3D -EI= O) { @@ -2281,7 +2289,7 @@ static bool migration_detect_error(MigrationState *s) * while. After that, it can be continued by a * recovery phase. */ - return !postcopy_pause(s); + return postcopy_pause(s); } else { /* * For precopy (or postcopy with error outside IO), we fail @@ -2291,7 +2299,7 @@ static bool migration_detect_error(MigrationState *s) trace_migration_thread_file_err(); =20 /* Time to stop the migration, now. */ - return true; + return MIG_THR_ERR_FATAL; } } =20 @@ -2319,6 +2327,7 @@ static void *migration_thread(void *opaque) /* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */ enum MigrationStatus current_active_state =3D MIGRATION_STATUS_ACTIVE; bool enable_colo =3D migrate_colo_enabled(); + MigThrError thr_error; =20 rcu_register_thread(); =20 @@ -2395,8 +2404,17 @@ static void *migration_thread(void *opaque) * Try to detect any kind of failures, and see whether we * should stop the migration now. */ - if (migration_detect_error(s)) { + thr_error =3D migration_detect_error(s); + if (thr_error =3D=3D MIG_THR_ERR_FATAL) { + /* Stop migration */ break; + } else if (thr_error =3D=3D MIG_THR_ERR_RECOVERED) { + /* + * Just recovered from a e.g. network failure, reset all + * the local variables. + */ + initial_time =3D qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + initial_bytes =3D 0; } =20 current_time =3D qemu_clock_get_ms(QEMU_CLOCK_REALTIME); --=20 2.7.4