From nobody Mon Apr 29 16:25:58 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=redhat.com Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1524124491142635.2235726396625; Thu, 19 Apr 2018 00:54:51 -0700 (PDT) Received: from localhost ([::1]:57336 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1f94P0-00061E-Rx for importer@patchew.org; Thu, 19 Apr 2018 03:54:46 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43217) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1f94N9-0004o6-NF for qemu-devel@nongnu.org; Thu, 19 Apr 2018 03:52:52 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1f94N8-0002pz-Tv for qemu-devel@nongnu.org; Thu, 19 Apr 2018 03:52:51 -0400 Received: from mx3-rdu2.redhat.com ([66.187.233.73]:54122 helo=mx1.redhat.com) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1f94N6-0002o1-EO; Thu, 19 Apr 2018 03:52:48 -0400 Received: from smtp.corp.redhat.com (int-mx06.intmail.prod.int.rdu2.redhat.com [10.11.54.6]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 9DB6A406E8C3; Thu, 19 Apr 2018 07:52:47 +0000 (UTC) Received: from localhost (ovpn-116-54.ams2.redhat.com [10.36.116.54]) by smtp.corp.redhat.com (Postfix) with ESMTP id ABF1521568B2; Thu, 19 Apr 2018 07:52:46 +0000 (UTC) From: Stefan Hajnoczi To: Date: Thu, 19 Apr 2018 15:52:31 +0800 Message-Id: <20180419075232.31407-2-stefanha@redhat.com> In-Reply-To: <20180419075232.31407-1-stefanha@redhat.com> References: <20180419075232.31407-1-stefanha@redhat.com> X-Scanned-By: MIMEDefang 2.78 on 10.11.54.6 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.11.55.7]); Thu, 19 Apr 2018 07:52:47 +0000 (UTC) X-Greylist: inspected by milter-greylist-4.5.16 (mx1.redhat.com [10.11.55.7]); Thu, 19 Apr 2018 07:52:47 +0000 (UTC) for IP:'10.11.54.6' DOMAIN:'int-mx06.intmail.prod.int.rdu2.redhat.com' HELO:'smtp.corp.redhat.com' FROM:'stefanha@redhat.com' RCPT:'' X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 66.187.233.73 Subject: [Qemu-devel] [RFC 1/2] block/file-posix: implement bdrv_co_invalidate_cache() on Linux X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Sergio Lopez , qemu-block@nongnu.org, "Dr. David Alan Gilbert" , Max Reitz , Stefan Hajnoczi Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" On Linux posix_fadvise(POSIX_FADV_DONTNEED) invalidates pages*. Use this to drop page cache on the destination host during shared storage migration. This way the destination host will read the latest copy of the data and will not use stale data from the page cache. The flow is as follows: 1. Source host writes out all dirty pages and inactivates drives. 2. QEMU_VM_EOF is sent on migration stream. 3. Destination host invalidates caches before accessing drives. This patch enables live migration even with -drive cache.direct=3Doff. * Terms and conditions may apply, please see patch for details. Signed-off-by: Stefan Hajnoczi --- block/file-posix.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/block/file-posix.c b/block/file-posix.c index 3794c0007a..df4f52919f 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2236,6 +2236,42 @@ static int coroutine_fn raw_co_block_status(BlockDri= verState *bs, return ret | BDRV_BLOCK_OFFSET_VALID; } =20 +static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs, + Error **errp) +{ + BDRVRawState *s =3D bs->opaque; + int ret; + + ret =3D fd_open(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "The file descriptor is not open"); + return; + } + + if (s->open_flags & O_DIRECT) { + return; /* No host kernel page cache */ + } + +#if defined(__linux__) + /* This sets the scene for the next syscall... */ + ret =3D bdrv_co_flush(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "flush failed"); + return; + } + + /* Linux does not invalidate pages that are dirty, locked, or mmapped = by a + * process. These limitations are okay because we just fsynced the fi= le, + * we don't use mmap, and the file should not be in use by other proce= sses. + */ + ret =3D posix_fadvise(s->fd, 0, 0, POSIX_FADV_DONTNEED); + if (ret !=3D 0) { /* the return value is a positive errno */ + error_setg_errno(errp, ret, "fadvise failed"); + return; + } +#endif /* __linux__ */ +} + static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs, int64_t offset, int bytes, BlockCompletionFunc *cb, void *opaque) @@ -2328,6 +2364,7 @@ BlockDriver bdrv_file =3D { .bdrv_co_create_opts =3D raw_co_create_opts, .bdrv_has_zero_init =3D bdrv_has_zero_init_1, .bdrv_co_block_status =3D raw_co_block_status, + .bdrv_co_invalidate_cache =3D raw_co_invalidate_cache, .bdrv_co_pwrite_zeroes =3D raw_co_pwrite_zeroes, =20 .bdrv_co_preadv =3D raw_co_preadv, @@ -2805,6 +2842,7 @@ static BlockDriver bdrv_host_device =3D { .bdrv_reopen_abort =3D raw_reopen_abort, .bdrv_co_create_opts =3D hdev_co_create_opts, .create_opts =3D &raw_create_opts, + .bdrv_co_invalidate_cache =3D raw_co_invalidate_cache, .bdrv_co_pwrite_zeroes =3D hdev_co_pwrite_zeroes, =20 .bdrv_co_preadv =3D raw_co_preadv, @@ -2927,6 +2965,7 @@ static BlockDriver bdrv_host_cdrom =3D { .bdrv_reopen_abort =3D raw_reopen_abort, .bdrv_co_create_opts =3D hdev_co_create_opts, .create_opts =3D &raw_create_opts, + .bdrv_co_invalidate_cache =3D raw_co_invalidate_cache, =20 =20 .bdrv_co_preadv =3D raw_co_preadv, --=20 2.14.3 From nobody Mon Apr 29 16:25:58 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=redhat.com Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1524124492743154.2821974183953; Thu, 19 Apr 2018 00:54:52 -0700 (PDT) Received: from localhost ([::1]:57338 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1f94P6-000674-0v for importer@patchew.org; Thu, 19 Apr 2018 03:54:52 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43297) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1f94NJ-0004wI-DP for qemu-devel@nongnu.org; Thu, 19 Apr 2018 03:53:02 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1f94NG-0002x0-9d for qemu-devel@nongnu.org; Thu, 19 Apr 2018 03:53:01 -0400 Received: from mx3-rdu2.redhat.com ([66.187.233.73]:38518 helo=mx1.redhat.com) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1f94NA-0002qG-9A; Thu, 19 Apr 2018 03:52:52 -0400 Received: from smtp.corp.redhat.com (int-mx06.intmail.prod.int.rdu2.redhat.com [10.11.54.6]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 4F6F24023150; Thu, 19 Apr 2018 07:52:51 +0000 (UTC) Received: from localhost (ovpn-116-54.ams2.redhat.com [10.36.116.54]) by smtp.corp.redhat.com (Postfix) with ESMTP id 122242156617; Thu, 19 Apr 2018 07:52:49 +0000 (UTC) From: Stefan Hajnoczi To: Date: Thu, 19 Apr 2018 15:52:32 +0800 Message-Id: <20180419075232.31407-3-stefanha@redhat.com> In-Reply-To: <20180419075232.31407-1-stefanha@redhat.com> References: <20180419075232.31407-1-stefanha@redhat.com> X-Scanned-By: MIMEDefang 2.78 on 10.11.54.6 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.11.55.6]); Thu, 19 Apr 2018 07:52:51 +0000 (UTC) X-Greylist: inspected by milter-greylist-4.5.16 (mx1.redhat.com [10.11.55.6]); Thu, 19 Apr 2018 07:52:51 +0000 (UTC) for IP:'10.11.54.6' DOMAIN:'int-mx06.intmail.prod.int.rdu2.redhat.com' HELO:'smtp.corp.redhat.com' FROM:'stefanha@redhat.com' RCPT:'' X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 66.187.233.73 Subject: [Qemu-devel] [RFC 2/2] block/file-posix: verify page cache is not used X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Sergio Lopez , qemu-block@nongnu.org, "Dr. David Alan Gilbert" , Max Reitz , Stefan Hajnoczi Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" This commit is for debugging only. Do not merge it. mincore(2) checks whether pages are resident. Use it to verify that page cache has been dropped. You can trigger a verification failure by mmapping the image file from another process and loading a byte from a page so that it becomes resident. bdrv_co_invalidate_cache() will fail while the process is alive. Signed-off-by: Stefan Hajnoczi --- block/file-posix.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++= ++++ 1 file changed, 71 insertions(+) diff --git a/block/file-posix.c b/block/file-posix.c index df4f52919f..d3105269c6 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2236,6 +2236,75 @@ static int coroutine_fn raw_co_block_status(BlockDri= verState *bs, return ret | BDRV_BLOCK_OFFSET_VALID; } =20 +static bool is_mincore(void *addr, size_t length) +{ + size_t vec_len =3D DIV_ROUND_UP(length, sysconf(_SC_PAGESIZE)); + unsigned char *vec; + size_t i; + int ret; + bool incore =3D false; + + vec =3D g_malloc(vec_len); + ret =3D mincore(addr, length, vec); + if (ret < 0) { + incore =3D true; + goto out; + } + + for (i =3D 0; i < vec_len; i++) { + if (vec[i] & 0x1) { + incore =3D true; + break; + } + } + +out: + g_free(vec); + return incore; +} + +static void check_not_in_page_cache(BlockDriverState *bs, Error **errp) +{ + const size_t WINDOW_SIZE =3D 128 * 1024 * 1024; + BDRVRawState *s =3D bs->opaque; + void *window =3D NULL; + size_t length =3D 0; + off_t end; + off_t offset; + + end =3D raw_getlength(bs); + + for (offset =3D 0; offset < end; offset +=3D WINDOW_SIZE) { + void *new_window; + size_t new_length =3D MIN(end - offset, WINDOW_SIZE); + + if (new_length !=3D length) { + munmap(window, length); + window =3D NULL; + length =3D 0; + } + + new_window =3D mmap(window, new_length, PROT_NONE, MAP_PRIVATE, + s->fd, offset); + if (new_window =3D=3D MAP_FAILED) { + error_setg_errno(errp, errno, "mmap failed"); + break; + } + + window =3D new_window; + length =3D new_length; + + if (is_mincore(window, length)) { + error_setg(errp, "page cache still in use!"); + break; + } + } + + if (window) { + munmap(window, length); + } +} + static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs, Error **errp) { @@ -2270,6 +2339,8 @@ static void coroutine_fn raw_co_invalidate_cache(Bloc= kDriverState *bs, return; } #endif /* __linux__ */ + + check_not_in_page_cache(bs, errp); } =20 static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs, --=20 2.14.3