From nobody Wed Dec 17 05:37:36 2025 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=redhat.com Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1532964239379862.2008961895654; Mon, 30 Jul 2018 08:23:59 -0700 (PDT) Received: from localhost ([::1]:53223 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fkA1e-00059W-8L for importer@patchew.org; Mon, 30 Jul 2018 11:23:58 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:44125) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fk9oV-0003NW-BL for qemu-devel@nongnu.org; Mon, 30 Jul 2018 11:10:24 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fk9oR-0008Kq-1d for qemu-devel@nongnu.org; Mon, 30 Jul 2018 11:10:23 -0400 Received: from mx3-rdu2.redhat.com ([66.187.233.73]:33852 helo=mx1.redhat.com) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fk9oO-0008JC-4B; Mon, 30 Jul 2018 11:10:16 -0400 Received: from smtp.corp.redhat.com (int-mx03.intmail.prod.int.rdu2.redhat.com [10.11.54.3]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id B5EA94022909; Mon, 30 Jul 2018 15:10:15 +0000 (UTC) Received: from localhost.localdomain.com (ovpn-117-86.ams2.redhat.com [10.36.117.86]) by smtp.corp.redhat.com (Postfix) with ESMTP id DD673112D189; Mon, 30 Jul 2018 15:10:14 +0000 (UTC) From: Kevin Wolf To: qemu-block@nongnu.org Date: Mon, 30 Jul 2018 17:09:55 +0200 Message-Id: <20180730150958.14607-11-kwolf@redhat.com> In-Reply-To: <20180730150958.14607-1-kwolf@redhat.com> References: <20180730150958.14607-1-kwolf@redhat.com> X-Scanned-By: MIMEDefang 2.78 on 10.11.54.3 X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.11.55.6]); Mon, 30 Jul 2018 15:10:15 +0000 (UTC) X-Greylist: inspected by milter-greylist-4.5.16 (mx1.redhat.com [10.11.55.6]); Mon, 30 Jul 2018 15:10:15 +0000 (UTC) for IP:'10.11.54.3' DOMAIN:'int-mx03.intmail.prod.int.rdu2.redhat.com' HELO:'smtp.corp.redhat.com' FROM:'kwolf@redhat.com' RCPT:'' X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] [fuzzy] X-Received-From: 66.187.233.73 Subject: [Qemu-devel] [PULL 10/13] file-posix: Fix write_zeroes with unmap on block devices X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, peter.maydell@linaro.org, qemu-devel@nongnu.org Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" The BLKDISCARD ioctl doesn't guarantee that the discarded blocks read as all-zero afterwards, so don't try to abuse it for zero writing. We try to only use this if BLKDISCARDZEROES tells us that it is safe, but this is unreliable on older kernels and a constant 0 in newer kernels. In other words, this code path is never actually used with newer kernels, so we don't even try to unmap while writing zeros. This patch removes the abuse of discard for writing zeroes from file-posix and instead adds a new function that uses interfaces that are actually meant to deallocate and zero out at the same time. Only if those fail, it falls back to zeroing out without unmap. We never fall back to a discard operation any more that may or may not result in zeros. Signed-off-by: Kevin Wolf --- block/file-posix.c | 59 ++++++++++++++++++++++++++++++++++++++++----------= ---- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 928b863ced..fe83cbf0eb 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -648,7 +648,7 @@ static int raw_open_common(BlockDriverState *bs, QDict = *options, } #endif =20 - bs->supported_zero_flags =3D s->discard_zeroes ? BDRV_REQ_MAY_UNMAP : = 0; + bs->supported_zero_flags =3D BDRV_REQ_MAY_UNMAP; ret =3D 0; fail: if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { @@ -1487,6 +1487,35 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIO= Data *aiocb) return -ENOTSUP; } =20 +static ssize_t handle_aiocb_write_zeroes_unmap(RawPosixAIOData *aiocb) +{ + BDRVRawState *s G_GNUC_UNUSED =3D aiocb->bs->opaque; + int ret; + + /* First try to write zeros and unmap at the same time */ + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + ret =3D do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + aiocb->aio_offset, aiocb->aio_nbytes); + if (ret !=3D -ENOTSUP) { + return ret; + } +#endif + +#ifdef CONFIG_XFS + if (s->is_xfs) { + /* xfs_discard() guarantees that the discarded area reads as all-z= ero + * afterwards, so we can use it here. */ + return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes); + } +#endif + + /* If we couldn't manage to unmap while guaranteed that the area reads= as + * all-zero afterwards, just write zeroes without unmapping */ + ret =3D handle_aiocb_write_zeroes(aiocb); + return ret; +} + #ifndef HAVE_COPY_FILE_RANGE static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd, off_t *out_off, size_t len, unsigned int flag= s) @@ -1732,6 +1761,9 @@ static int aio_worker(void *arg) case QEMU_AIO_WRITE_ZEROES: ret =3D handle_aiocb_write_zeroes(aiocb); break; + case QEMU_AIO_WRITE_ZEROES | QEMU_AIO_DISCARD: + ret =3D handle_aiocb_write_zeroes_unmap(aiocb); + break; case QEMU_AIO_COPY_RANGE: ret =3D handle_aiocb_copy_range(aiocb); break; @@ -2556,15 +2588,13 @@ static int coroutine_fn raw_co_pwrite_zeroes( int bytes, BdrvRequestFlags flags) { BDRVRawState *s =3D bs->opaque; + int operation =3D QEMU_AIO_WRITE_ZEROES; =20 - if (!(flags & BDRV_REQ_MAY_UNMAP)) { - return paio_submit_co(bs, s->fd, offset, NULL, bytes, - QEMU_AIO_WRITE_ZEROES); - } else if (s->discard_zeroes) { - return paio_submit_co(bs, s->fd, offset, NULL, bytes, - QEMU_AIO_DISCARD); + if (flags & BDRV_REQ_MAY_UNMAP) { + operation |=3D QEMU_AIO_DISCARD; } - return -ENOTSUP; + + return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation); } =20 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) @@ -3057,20 +3087,19 @@ static coroutine_fn int hdev_co_pwrite_zeroes(Block= DriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags) { BDRVRawState *s =3D bs->opaque; + int operation =3D QEMU_AIO_WRITE_ZEROES | QEMU_AIO_BLKDEV; int rc; =20 rc =3D fd_open(bs); if (rc < 0) { return rc; } - if (!(flags & BDRV_REQ_MAY_UNMAP)) { - return paio_submit_co(bs, s->fd, offset, NULL, bytes, - QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV); - } else if (s->discard_zeroes) { - return paio_submit_co(bs, s->fd, offset, NULL, bytes, - QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); + + if (flags & BDRV_REQ_MAY_UNMAP) { + operation |=3D QEMU_AIO_DISCARD; } - return -ENOTSUP; + + return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation); } =20 static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts= *opts, --=20 2.13.6