From nobody Thu May 16 18:16:18 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=virtuozzo.com ARC-Seal: i=1; a=rsa-sha256; t=1586776506; cv=none; d=zohomail.com; s=zohoarc; b=Hy9Xy1VNpo2iiRJBoGZ6KSwmzsUOUYw1odoRLC+EWDkN5igmLAwlyVYp/Rq3W4dFYhPH/AOm8vgIPJqUeX/6xZlEBnSyWw39/VeeYhjQe3T6B3SmD+jFwPmtQwBpAEqRn+4K5gwBsRcps1BufCfJ9qnUNbvUyKS0xZaF8ZvLx6A= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1586776506; h=Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:Message-ID:References:Sender:Subject:To; bh=WsvPLWPbg0SyTLHaaHCAlxZiwjUFCbFWk/sXYatcQvA=; b=KtU2k5iZ0jUdhp/zEUjNXN5vBxNhchgJ7pkzmzsRD91yjsCmktBJLAHWgaWiJvA3ZPj+Bz8HAmt29y8vCkzEFcEKPUt2YcWwFTH2HWN2IJB5dmrXB5qZ3PdpMe0z95nau8HH85af23S8i3LZPJTQtYxDDq70qz7ehkgIfXEFKAw= ARC-Authentication-Results: i=1; mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail header.from= (p=none dis=none) header.from= Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1586776506258263.2164164042863; Mon, 13 Apr 2020 04:15:06 -0700 (PDT) Received: from localhost ([::1]:43086 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1jNx3Q-0006fn-Vs for importer@patchew.org; Mon, 13 Apr 2020 07:15:05 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58861) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1jNx0u-0002jw-RY for qemu-devel@nongnu.org; Mon, 13 Apr 2020 07:12:30 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1jNx0s-0000if-Q4 for qemu-devel@nongnu.org; Mon, 13 Apr 2020 07:12:28 -0400 Received: from relay.sw.ru ([185.231.240.75]:40326) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1jNx0s-0000fy-EE for qemu-devel@nongnu.org; Mon, 13 Apr 2020 07:12:26 -0400 Received: from vgpu0.qa.sw.ru ([10.94.1.107]) by relay.sw.ru with esmtp (Exim 4.92.3) (envelope-from ) id 1jNx0h-0002VQ-IR; Mon, 13 Apr 2020 14:12:15 +0300 From: Denis Plotnikov To: qemu-devel@nongnu.org Subject: [RFC patch v1 1/3] qemu-file: introduce current buffer Date: Mon, 13 Apr 2020 14:12:12 +0300 Message-Id: <1586776334-641239-2-git-send-email-dplotnikov@virtuozzo.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1586776334-641239-1-git-send-email-dplotnikov@virtuozzo.com> References: <1586776334-641239-1-git-send-email-dplotnikov@virtuozzo.com> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 185.231.240.75 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: den@openvz.org, dgilbert@redhat.com, quintela@redhat.com Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" To approach async wrtiting in the further commits, the buffer allocated in QEMUFile struct is replaced with the link to the current buffer. We're going to use many buffers to write the qemu file stream to the unerlying storage asynchronously. The current buffer points out to the buffer is currently filled with data. This patch doesn't add any features to qemu-file and doesn't change any qemu-file behavior. Signed-off-by: Denis Plotnikov Reviewed-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy --- include/qemu/typedefs.h | 1 + migration/qemu-file.c | 156 +++++++++++++++++++++++++++++---------------= ---- 2 files changed, 95 insertions(+), 62 deletions(-) diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 375770a..88dce54 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -97,6 +97,7 @@ typedef struct QDict QDict; typedef struct QEMUBH QEMUBH; typedef struct QemuConsole QemuConsole; typedef struct QEMUFile QEMUFile; +typedef struct QEMUFileBuffer QEMUFileBuffer; typedef struct QemuLockable QemuLockable; typedef struct QemuMutex QemuMutex; typedef struct QemuOpt QemuOpt; diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 1c3a358..285c6ef 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -33,6 +33,17 @@ #define IO_BUF_SIZE 32768 #define MAX_IOV_SIZE MIN(IOV_MAX, 64) =20 +QEMU_BUILD_BUG_ON(!QEMU_IS_ALIGNED(IO_BUF_SIZE, 512)); + +struct QEMUFileBuffer { + int buf_index; + int buf_size; /* 0 when writing */ + uint8_t *buf; + unsigned long *may_free; + struct iovec *iov; + unsigned int iovcnt; +}; + struct QEMUFile { const QEMUFileOps *ops; const QEMUFileHooks *hooks; @@ -43,18 +54,12 @@ struct QEMUFile { =20 int64_t pos; /* start of buffer when writing, end of buffer when reading */ - int buf_index; - int buf_size; /* 0 when writing */ - uint8_t buf[IO_BUF_SIZE]; - - DECLARE_BITMAP(may_free, MAX_IOV_SIZE); - struct iovec iov[MAX_IOV_SIZE]; - unsigned int iovcnt; - int last_error; Error *last_error_obj; /* has the file has been shutdown */ bool shutdown; + /* currently used buffer */ + QEMUFileBuffer *current_buf; }; =20 /* @@ -109,6 +114,12 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileO= ps *ops) =20 f->opaque =3D opaque; f->ops =3D ops; + + f->current_buf =3D g_new0(QEMUFileBuffer, 1); + f->current_buf->buf =3D g_malloc(IO_BUF_SIZE); + f->current_buf->iov =3D g_new0(struct iovec, MAX_IOV_SIZE); + f->current_buf->may_free =3D bitmap_new(MAX_IOV_SIZE); + return f; } =20 @@ -177,35 +188,37 @@ static void qemu_iovec_release_ram(QEMUFile *f) { struct iovec iov; unsigned long idx; + QEMUFileBuffer *fb =3D f->current_buf; =20 /* Find and release all the contiguous memory ranges marked as may_fre= e. */ - idx =3D find_next_bit(f->may_free, f->iovcnt, 0); - if (idx >=3D f->iovcnt) { + idx =3D find_next_bit(fb->may_free, fb->iovcnt, 0); + if (idx >=3D fb->iovcnt) { return; } - iov =3D f->iov[idx]; + iov =3D fb->iov[idx]; =20 /* The madvise() in the loop is called for iov within a continuous ran= ge and * then reinitialize the iov. And in the end, madvise() is called for = the * last iov. */ - while ((idx =3D find_next_bit(f->may_free, f->iovcnt, idx + 1)) < f->i= ovcnt) { + while ((idx =3D find_next_bit(fb->may_free, + fb->iovcnt, idx + 1)) < fb->iovcnt) { /* check for adjacent buffer and coalesce them */ - if (iov.iov_base + iov.iov_len =3D=3D f->iov[idx].iov_base) { - iov.iov_len +=3D f->iov[idx].iov_len; + if (iov.iov_base + iov.iov_len =3D=3D fb->iov[idx].iov_base) { + iov.iov_len +=3D fb->iov[idx].iov_len; continue; } if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < = 0) { error_report("migrate: madvise DONTNEED failed %p %zd: %s", iov.iov_base, iov.iov_len, strerror(errno)); } - iov =3D f->iov[idx]; + iov =3D fb->iov[idx]; } if (qemu_madvise(iov.iov_base, iov.iov_len, QEMU_MADV_DONTNEED) < 0) { error_report("migrate: madvise DONTNEED failed %p %zd: %s", iov.iov_base, iov.iov_len, strerror(errno)); } - memset(f->may_free, 0, sizeof(f->may_free)); + bitmap_zero(fb->may_free, MAX_IOV_SIZE); } =20 /** @@ -219,6 +232,7 @@ void qemu_fflush(QEMUFile *f) ssize_t ret =3D 0; ssize_t expect =3D 0; Error *local_error =3D NULL; + QEMUFileBuffer *fb =3D f->current_buf; =20 if (!qemu_file_is_writable(f)) { return; @@ -227,9 +241,9 @@ void qemu_fflush(QEMUFile *f) if (f->shutdown) { return; } - if (f->iovcnt > 0) { - expect =3D iov_size(f->iov, f->iovcnt); - ret =3D f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos, + if (fb->iovcnt > 0) { + expect =3D iov_size(fb->iov, fb->iovcnt); + ret =3D f->ops->writev_buffer(f->opaque, fb->iov, fb->iovcnt, f->p= os, &local_error); =20 qemu_iovec_release_ram(f); @@ -244,8 +258,8 @@ void qemu_fflush(QEMUFile *f) if (ret !=3D expect) { qemu_file_set_error_obj(f, ret < 0 ? ret : -EIO, local_error); } - f->buf_index =3D 0; - f->iovcnt =3D 0; + fb->buf_index =3D 0; + fb->iovcnt =3D 0; } =20 void ram_control_before_iterate(QEMUFile *f, uint64_t flags) @@ -331,24 +345,25 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) int len; int pending; Error *local_error =3D NULL; + QEMUFileBuffer *fb =3D f->current_buf; =20 assert(!qemu_file_is_writable(f)); =20 - pending =3D f->buf_size - f->buf_index; + pending =3D fb->buf_size - fb->buf_index; if (pending > 0) { - memmove(f->buf, f->buf + f->buf_index, pending); + memmove(fb->buf, fb->buf + fb->buf_index, pending); } - f->buf_index =3D 0; - f->buf_size =3D pending; + fb->buf_index =3D 0; + fb->buf_size =3D pending; =20 if (f->shutdown) { return 0; } =20 - len =3D f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, + len =3D f->ops->get_buffer(f->opaque, fb->buf + pending, f->pos, IO_BUF_SIZE - pending, &local_error); if (len > 0) { - f->buf_size +=3D len; + fb->buf_size +=3D len; f->pos +=3D len; } else if (len =3D=3D 0) { qemu_file_set_error_obj(f, -EIO, local_error); @@ -393,6 +408,10 @@ int qemu_fclose(QEMUFile *f) ret =3D f->last_error; } error_free(f->last_error_obj); + g_free(f->current_buf->buf); + g_free(f->current_buf->iov); + g_free(f->current_buf->may_free); + g_free(f->current_buf); g_free(f); trace_qemu_file_fclose(); return ret; @@ -409,21 +428,22 @@ int qemu_fclose(QEMUFile *f) static int add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size, bool may_free) { + QEMUFileBuffer *fb =3D f->current_buf; /* check for adjacent buffer and coalesce them */ - if (f->iovcnt > 0 && buf =3D=3D f->iov[f->iovcnt - 1].iov_base + - f->iov[f->iovcnt - 1].iov_len && - may_free =3D=3D test_bit(f->iovcnt - 1, f->may_free)) + if (fb->iovcnt > 0 && buf =3D=3D fb->iov[fb->iovcnt - 1].iov_base + + fb->iov[fb->iovcnt - 1].iov_len && + may_free =3D=3D test_bit(fb->iovcnt - 1, fb->may_free)) { - f->iov[f->iovcnt - 1].iov_len +=3D size; + fb->iov[fb->iovcnt - 1].iov_len +=3D size; } else { if (may_free) { - set_bit(f->iovcnt, f->may_free); + set_bit(fb->iovcnt, fb->may_free); } - f->iov[f->iovcnt].iov_base =3D (uint8_t *)buf; - f->iov[f->iovcnt++].iov_len =3D size; + fb->iov[fb->iovcnt].iov_base =3D (uint8_t *)buf; + fb->iov[fb->iovcnt++].iov_len =3D size; } =20 - if (f->iovcnt >=3D MAX_IOV_SIZE) { + if (fb->iovcnt >=3D MAX_IOV_SIZE) { qemu_fflush(f); return 1; } @@ -433,9 +453,10 @@ static int add_to_iovec(QEMUFile *f, const uint8_t *bu= f, size_t size, =20 static void add_buf_to_iovec(QEMUFile *f, size_t len) { - if (!add_to_iovec(f, f->buf + f->buf_index, len, false)) { - f->buf_index +=3D len; - if (f->buf_index =3D=3D IO_BUF_SIZE) { + QEMUFileBuffer *fb =3D f->current_buf; + if (!add_to_iovec(f, fb->buf + fb->buf_index, len, false)) { + fb->buf_index +=3D len; + if (fb->buf_index =3D=3D IO_BUF_SIZE) { qemu_fflush(f); } } @@ -455,17 +476,18 @@ void qemu_put_buffer_async(QEMUFile *f, const uint8_t= *buf, size_t size, void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size) { size_t l; + QEMUFileBuffer *fb =3D f->current_buf; =20 if (f->last_error) { return; } =20 while (size > 0) { - l =3D IO_BUF_SIZE - f->buf_index; + l =3D IO_BUF_SIZE - fb->buf_index; if (l > size) { l =3D size; } - memcpy(f->buf + f->buf_index, buf, l); + memcpy(fb->buf + fb->buf_index, buf, l); f->bytes_xfer +=3D l; add_buf_to_iovec(f, l); if (qemu_file_get_error(f)) { @@ -478,19 +500,23 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf,= size_t size) =20 void qemu_put_byte(QEMUFile *f, int v) { + QEMUFileBuffer *fb =3D f->current_buf; + if (f->last_error) { return; } =20 - f->buf[f->buf_index] =3D v; + fb->buf[fb->buf_index] =3D v; f->bytes_xfer++; add_buf_to_iovec(f, 1); } =20 void qemu_file_skip(QEMUFile *f, int size) { - if (f->buf_index + size <=3D f->buf_size) { - f->buf_index +=3D size; + QEMUFileBuffer *fb =3D f->current_buf; + + if (fb->buf_index + size <=3D fb->buf_size) { + fb->buf_index +=3D size; } } =20 @@ -506,15 +532,16 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, s= ize_t size, size_t offset) { ssize_t pending; size_t index; + QEMUFileBuffer *fb =3D f->current_buf; =20 assert(!qemu_file_is_writable(f)); assert(offset < IO_BUF_SIZE); assert(size <=3D IO_BUF_SIZE - offset); =20 /* The 1st byte to read from */ - index =3D f->buf_index + offset; + index =3D fb->buf_index + offset; /* The number of available bytes starting at index */ - pending =3D f->buf_size - index; + pending =3D fb->buf_size - index; =20 /* * qemu_fill_buffer might return just a few bytes, even when there isn= 't @@ -527,8 +554,8 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, siz= e_t size, size_t offset) break; } =20 - index =3D f->buf_index + offset; - pending =3D f->buf_size - index; + index =3D fb->buf_index + offset; + pending =3D fb->buf_size - index; } =20 if (pending <=3D 0) { @@ -538,7 +565,7 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, siz= e_t size, size_t offset) size =3D pending; } =20 - *buf =3D f->buf + index; + *buf =3D fb->buf + index; return size; } =20 @@ -615,19 +642,21 @@ size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t = **buf, size_t size) */ int qemu_peek_byte(QEMUFile *f, int offset) { - int index =3D f->buf_index + offset; + QEMUFileBuffer *fb =3D f->current_buf; + + int index =3D fb->buf_index + offset; =20 assert(!qemu_file_is_writable(f)); assert(offset < IO_BUF_SIZE); =20 - if (index >=3D f->buf_size) { + if (index >=3D fb->buf_size) { qemu_fill_buffer(f); - index =3D f->buf_index + offset; - if (index >=3D f->buf_size) { + index =3D fb->buf_index + offset; + if (index >=3D fb->buf_size) { return 0; } } - return f->buf[index]; + return fb->buf[index]; } =20 int qemu_get_byte(QEMUFile *f) @@ -643,9 +672,10 @@ int64_t qemu_ftell_fast(QEMUFile *f) { int64_t ret =3D f->pos; int i; + QEMUFileBuffer *fb =3D f->current_buf; =20 - for (i =3D 0; i < f->iovcnt; i++) { - ret +=3D f->iov[i].iov_len; + for (i =3D 0; i < fb->iovcnt; i++) { + ret +=3D fb->iov[i].iov_len; } =20 return ret; @@ -770,13 +800,14 @@ static int qemu_compress_data(z_stream *stream, uint8= _t *dest, size_t dest_len, ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, const uint8_t *p, size_t size) { - ssize_t blen =3D IO_BUF_SIZE - f->buf_index - sizeof(int32_t); + QEMUFileBuffer *fb =3D f->current_buf; + ssize_t blen =3D IO_BUF_SIZE - fb->buf_index - sizeof(int32_t); =20 if (blen < compressBound(size)) { return -1; } =20 - blen =3D qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int= 32_t), + blen =3D qemu_compress_data(stream, fb->buf + fb->buf_index + sizeof(i= nt32_t), blen, p, size); if (blen < 0) { return -1; @@ -794,12 +825,13 @@ ssize_t qemu_put_compression_data(QEMUFile *f, z_stre= am *stream, int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src) { int len =3D 0; + QEMUFileBuffer *fb_src =3D f_src->current_buf; =20 - if (f_src->buf_index > 0) { - len =3D f_src->buf_index; - qemu_put_buffer(f_des, f_src->buf, f_src->buf_index); - f_src->buf_index =3D 0; - f_src->iovcnt =3D 0; + if (fb_src->buf_index > 0) { + len =3D fb_src->buf_index; + qemu_put_buffer(f_des, fb_src->buf, fb_src->buf_index); + fb_src->buf_index =3D 0; + fb_src->iovcnt =3D 0; } return len; } --=20 1.8.3.1 From nobody Thu May 16 18:16:18 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=virtuozzo.com ARC-Seal: i=1; a=rsa-sha256; t=1586776557; cv=none; d=zohomail.com; s=zohoarc; b=ND7LbfRg0BllbamDu3BGlBihfPG3cSw83EpxQa4lVf0vSquVRqHVSM2N38QZUdNAHsmsMq4vFR2nGs4OwCvNyfNIjINqH8mRCV7DroEQRvv7KLMzsaSBWORSuhjsNUSv9Q36ImZD/SbEXwmvxrjh3YEgp+dSBG4tuRQRRoF483w= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1586776557; h=Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:Message-ID:References:Sender:Subject:To; bh=lfGKJrf6+DpuNr3dGQ866En2/ns+G/6SAPM2EobBsCQ=; b=Hu0cgoLrnXrk40VMgJvwYJTKh6zToGLeU+dOjNuapgSZQNJ8W0jOzX4rZ5O8ypD6jxyjPAtxpvsWDWCbiT5LmS3y2QpxMmoC7diYLXuyMltVgpKoZ85W87k3aWIJVFPw4i1BxMhAjYzF2g7dHQf3vxxuEfDUTgvDYFQy69JbOGI= ARC-Authentication-Results: i=1; mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail header.from= (p=none dis=none) header.from= Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1586776557817387.4246238552937; Mon, 13 Apr 2020 04:15:57 -0700 (PDT) Received: from localhost ([::1]:43106 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1jNx4E-0007O4-5U for importer@patchew.org; Mon, 13 Apr 2020 07:15:54 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58863) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1jNx0u-0002k3-Ts for qemu-devel@nongnu.org; Mon, 13 Apr 2020 07:12:31 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1jNx0s-0000iL-Jd for qemu-devel@nongnu.org; Mon, 13 Apr 2020 07:12:28 -0400 Received: from relay.sw.ru ([185.231.240.75]:40332) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1jNx0s-0000fx-7n for qemu-devel@nongnu.org; Mon, 13 Apr 2020 07:12:26 -0400 Received: from vgpu0.qa.sw.ru ([10.94.1.107]) by relay.sw.ru with esmtp (Exim 4.92.3) (envelope-from ) id 1jNx0h-0002VQ-PK; Mon, 13 Apr 2020 14:12:15 +0300 From: Denis Plotnikov To: qemu-devel@nongnu.org Subject: [RFC patch v1 2/3] qemu-file: add buffered mode Date: Mon, 13 Apr 2020 14:12:13 +0300 Message-Id: <1586776334-641239-3-git-send-email-dplotnikov@virtuozzo.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1586776334-641239-1-git-send-email-dplotnikov@virtuozzo.com> References: <1586776334-641239-1-git-send-email-dplotnikov@virtuozzo.com> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 185.231.240.75 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: den@openvz.org, dgilbert@redhat.com, quintela@redhat.com Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" The patch adds ability to qemu-file to write the data asynchronously to improve the performance on writing. Before, only synchronous writing was supported. Enabling of the asyncronous mode is managed by new "enabled_buffered" callback. Signed-off-by: Denis Plotnikov --- include/qemu/typedefs.h | 1 + migration/qemu-file.c | 351 ++++++++++++++++++++++++++++++++++++++++++++= +--- migration/qemu-file.h | 9 ++ 3 files changed, 339 insertions(+), 22 deletions(-) diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 88dce54..9b388c8 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -98,6 +98,7 @@ typedef struct QEMUBH QEMUBH; typedef struct QemuConsole QemuConsole; typedef struct QEMUFile QEMUFile; typedef struct QEMUFileBuffer QEMUFileBuffer; +typedef struct QEMUFileAioTask QEMUFileAioTask; typedef struct QemuLockable QemuLockable; typedef struct QemuMutex QemuMutex; typedef struct QemuOpt QemuOpt; diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 285c6ef..f42f949 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -29,19 +29,25 @@ #include "qemu-file.h" #include "trace.h" #include "qapi/error.h" +#include "block/aio_task.h" =20 -#define IO_BUF_SIZE 32768 +#define IO_BUF_SIZE (1024 * 1024) #define MAX_IOV_SIZE MIN(IOV_MAX, 64) +#define IO_BUF_NUM 2 +#define IO_BUF_ALIGNMENT 512 =20 -QEMU_BUILD_BUG_ON(!QEMU_IS_ALIGNED(IO_BUF_SIZE, 512)); +QEMU_BUILD_BUG_ON(!QEMU_IS_ALIGNED(IO_BUF_SIZE, IO_BUF_ALIGNMENT)); +QEMU_BUILD_BUG_ON(IO_BUF_SIZE > INT_MAX); +QEMU_BUILD_BUG_ON(IO_BUF_NUM <=3D 0); =20 struct QEMUFileBuffer { int buf_index; - int buf_size; /* 0 when writing */ + int buf_size; /* 0 when non-buffered writing */ uint8_t *buf; unsigned long *may_free; struct iovec *iov; unsigned int iovcnt; + QLIST_ENTRY(QEMUFileBuffer) link; }; =20 struct QEMUFile { @@ -60,6 +66,22 @@ struct QEMUFile { bool shutdown; /* currently used buffer */ QEMUFileBuffer *current_buf; + /* + * with buffered_mode enabled all the data copied to 512 byte + * aligned buffer, including iov data. Then the buffer is passed + * to writev_buffer callback. + */ + bool buffered_mode; + /* for async buffer writing */ + AioTaskPool *pool; + /* the list of free buffers, currently used on is NOT there */ + QLIST_HEAD(, QEMUFileBuffer) free_buffers; +}; + +struct QEMUFileAioTask { + AioTask task; + QEMUFile *f; + QEMUFileBuffer *fb; }; =20 /* @@ -115,10 +137,42 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFile= Ops *ops) f->opaque =3D opaque; f->ops =3D ops; =20 - f->current_buf =3D g_new0(QEMUFileBuffer, 1); - f->current_buf->buf =3D g_malloc(IO_BUF_SIZE); - f->current_buf->iov =3D g_new0(struct iovec, MAX_IOV_SIZE); - f->current_buf->may_free =3D bitmap_new(MAX_IOV_SIZE); + if (f->ops->enable_buffered) { + f->buffered_mode =3D f->ops->enable_buffered(f->opaque); + } + + if (f->buffered_mode && qemu_file_is_writable(f)) { + int i; + /* + * in buffered_mode we don't use internal io vectors + * and may_free bitmap, because we copy the data to be + * written right away to the buffer + */ + f->pool =3D aio_task_pool_new(IO_BUF_NUM); + + /* allocate io buffers */ + for (i =3D 0; i < IO_BUF_NUM; i++) { + QEMUFileBuffer *fb =3D g_new0(QEMUFileBuffer, 1); + + fb->buf =3D qemu_memalign(IO_BUF_ALIGNMENT, IO_BUF_SIZE); + fb->buf_size =3D IO_BUF_SIZE; + + /* + * put the first buffer to the current buf and the rest + * to the list of free buffers + */ + if (i =3D=3D 0) { + f->current_buf =3D fb; + } else { + QLIST_INSERT_HEAD(&f->free_buffers, fb, link); + } + } + } else { + f->current_buf =3D g_new0(QEMUFileBuffer, 1); + f->current_buf->buf =3D g_malloc(IO_BUF_SIZE); + f->current_buf->iov =3D g_new0(struct iovec, MAX_IOV_SIZE); + f->current_buf->may_free =3D bitmap_new(MAX_IOV_SIZE); + } =20 return f; } @@ -190,6 +244,8 @@ static void qemu_iovec_release_ram(QEMUFile *f) unsigned long idx; QEMUFileBuffer *fb =3D f->current_buf; =20 + assert(!f->buffered_mode); + /* Find and release all the contiguous memory ranges marked as may_fre= e. */ idx =3D find_next_bit(fb->may_free, fb->iovcnt, 0); if (idx >=3D fb->iovcnt) { @@ -221,6 +277,147 @@ static void qemu_iovec_release_ram(QEMUFile *f) bitmap_zero(fb->may_free, MAX_IOV_SIZE); } =20 +static void advance_buf_ptr(QEMUFile *f, size_t size) +{ + QEMUFileBuffer *fb =3D f->current_buf; + /* must not advance to 0 */ + assert(size); + /* must not overflow buf_index (int) */ + assert(fb->buf_index + size <=3D INT_MAX); + /* must not exceed buf_size */ + assert(fb->buf_index + size <=3D fb->buf_size); + + fb->buf_index +=3D size; +} + +static size_t get_buf_free_size(QEMUFile *f) +{ + QEMUFileBuffer *fb =3D f->current_buf; + /* buf_index can't be greated than buf_size */ + assert(fb->buf_size >=3D fb->buf_index); + return fb->buf_size - fb->buf_index; +} + +static size_t get_buf_used_size(QEMUFile *f) +{ + QEMUFileBuffer *fb =3D f->current_buf; + return fb->buf_index; +} + +static uint8_t *get_buf_ptr(QEMUFile *f) +{ + QEMUFileBuffer *fb =3D f->current_buf; + /* protects from out of bound reading */ + assert(fb->buf_index <=3D IO_BUF_SIZE); + return fb->buf + fb->buf_index; +} + +static bool buf_is_full(QEMUFile *f) +{ + return get_buf_free_size(f) =3D=3D 0; +} + +static void reset_buf(QEMUFile *f) +{ + QEMUFileBuffer *fb =3D f->current_buf; + fb->buf_index =3D 0; +} + +static int write_task_fn(AioTask *task) +{ + int ret; + Error *local_error =3D NULL; + QEMUFileAioTask *t =3D (QEMUFileAioTask *) task; + QEMUFile *f =3D t->f; + QEMUFileBuffer *fb =3D t->fb; + uint64_t pos =3D f->pos; + struct iovec v =3D (struct iovec) { + .iov_base =3D fb->buf, + .iov_len =3D fb->buf_index, + }; + + assert(f->buffered_mode); + + /* + * Increment file position. + * This needs to be here before calling writev_buffer, because + * writev_buffer is asynchronous and there could be more than one + * writev_buffer started simultaniously. Each writev_buffer should + * use its own file pos to write to. writev_buffer may write less + * than buf_index bytes but we treat this situation as an error. + * If error appeared, further file using is meaningless. + * We expect that, the most of the time the full buffer is written, + * (when buf_size =3D=3D buf_index). The only case when the non-full + * buffer is written (buf_size !=3D buf_index) is file close, + * when we need to flush the rest of the buffer content. + */ + f->pos +=3D fb->buf_index; + + ret =3D f->ops->writev_buffer(f->opaque, &v, 1, pos, &local_error); + + /* return the just written buffer to the free list */ + QLIST_INSERT_HEAD(&f->free_buffers, fb, link); + + /* check that we have written everything */ + if (ret !=3D fb->buf_index) { + qemu_file_set_error_obj(f, ret < 0 ? ret : -EIO, local_error); + } + + /* + * always return 0 - don't use task error handling, relay on + * qemu file error handling + */ + return 0; +} + +static void qemu_file_switch_current_buf(QEMUFile *f) +{ + /* + * if the list is empty, wait until some task returns a buffer + * to the list of free buffers. + */ + if (QLIST_EMPTY(&f->free_buffers)) { + aio_task_pool_wait_slot(f->pool); + } + + /* + * sanity check that the list isn't empty + * if the free list was empty, we waited for a task complition, + * and the pompleted task must return a buffer to a list of free buffe= rs + */ + assert(!QLIST_EMPTY(&f->free_buffers)); + + /* set the current buffer for using from the free list */ + f->current_buf =3D QLIST_FIRST(&f->free_buffers); + reset_buf(f); + + QLIST_REMOVE(f->current_buf, link); +} + +/** + * Asynchronously flushes QEMUFile buffer + * + * This will flush all pending data. If data was only partially flushed, it + * will set an error state. The function may return before the data actual= ly + * written. + */ +static void flush_buffer(QEMUFile *f) +{ + QEMUFileAioTask *t =3D g_new(QEMUFileAioTask, 1); + + *t =3D (QEMUFileAioTask) { + .task.func =3D &write_task_fn, + .f =3D f, + .fb =3D f->current_buf, + }; + + /* aio_task_pool should free t for us */ + aio_task_pool_start_task(f->pool, (AioTask *) t); + + /* if no errors this will switch the buffer */ + qemu_file_switch_current_buf(f); +} + /** * Flushes QEMUFile buffer * @@ -241,7 +438,13 @@ void qemu_fflush(QEMUFile *f) if (f->shutdown) { return; } + + if (f->buffered_mode) { + return; + } + if (fb->iovcnt > 0) { + /* this is non-buffered mode */ expect =3D iov_size(fb->iov, fb->iovcnt); ret =3D f->ops->writev_buffer(f->opaque, fb->iov, fb->iovcnt, f->p= os, &local_error); @@ -378,6 +581,7 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) =20 void qemu_update_position(QEMUFile *f, size_t size) { + assert(!f->buffered_mode); f->pos +=3D size; } =20 @@ -392,7 +596,18 @@ void qemu_update_position(QEMUFile *f, size_t size) int qemu_fclose(QEMUFile *f) { int ret; - qemu_fflush(f); + + if (qemu_file_is_writable(f) && f->buffered_mode) { + ret =3D qemu_file_get_error(f); + if (!ret) { + flush_buffer(f); + } + /* wait until all tasks are done */ + aio_task_pool_wait_all(f->pool); + } else { + qemu_fflush(f); + } + ret =3D qemu_file_get_error(f); =20 if (f->ops->close) { @@ -408,16 +623,77 @@ int qemu_fclose(QEMUFile *f) ret =3D f->last_error; } error_free(f->last_error_obj); - g_free(f->current_buf->buf); - g_free(f->current_buf->iov); - g_free(f->current_buf->may_free); - g_free(f->current_buf); + + if (f->buffered_mode) { + QEMUFileBuffer *fb, *next; + /* + * put the current back to the free buffers list + * to destroy all the buffers in one loop + */ + QLIST_INSERT_HEAD(&f->free_buffers, f->current_buf, link); + + /* destroy all the buffers */ + QLIST_FOREACH_SAFE(fb, &f->free_buffers, link, next) { + QLIST_REMOVE(fb, link); + /* looks like qemu_vfree pairs with qemu_memalign */ + qemu_vfree(fb->buf); + g_free(fb); + } + g_free(f->pool); + } else { + g_free(f->current_buf->buf); + g_free(f->current_buf->iov); + g_free(f->current_buf->may_free); + g_free(f->current_buf); + } + g_free(f); trace_qemu_file_fclose(); return ret; } =20 /* + * Copy an external buffer to the intenal current buffer. + */ +static void copy_buf(QEMUFile *f, const uint8_t *buf, size_t size, + bool may_free) +{ + size_t data_size =3D size; + const uint8_t *src_ptr =3D buf; + + assert(f->buffered_mode); + assert(size <=3D INT_MAX); + + while (data_size > 0) { + size_t chunk_size; + + if (buf_is_full(f)) { + flush_buffer(f); + if (qemu_file_get_error(f)) { + return; + } + } + + chunk_size =3D MIN(get_buf_free_size(f), data_size); + + memcpy(get_buf_ptr(f), src_ptr, chunk_size); + + advance_buf_ptr(f, chunk_size); + + src_ptr +=3D chunk_size; + data_size -=3D chunk_size; + f->bytes_xfer +=3D chunk_size; + } + + if (may_free) { + if (qemu_madvise((void *) buf, size, QEMU_MADV_DONTNEED) < 0) { + error_report("migrate: madvise DONTNEED failed %p %zd: %s", + buf, size, strerror(errno)); + } + } +} + +/* * Add buf to iovec. Do flush if iovec is full. * * Return values: @@ -454,6 +730,9 @@ static int add_to_iovec(QEMUFile *f, const uint8_t *buf= , size_t size, static void add_buf_to_iovec(QEMUFile *f, size_t len) { QEMUFileBuffer *fb =3D f->current_buf; + + assert(!f->buffered_mode); + if (!add_to_iovec(f, fb->buf + fb->buf_index, len, false)) { fb->buf_index +=3D len; if (fb->buf_index =3D=3D IO_BUF_SIZE) { @@ -469,8 +748,12 @@ void qemu_put_buffer_async(QEMUFile *f, const uint8_t = *buf, size_t size, return; } =20 - f->bytes_xfer +=3D size; - add_to_iovec(f, buf, size, may_free); + if (f->buffered_mode) { + copy_buf(f, buf, size, may_free); + } else { + f->bytes_xfer +=3D size; + add_to_iovec(f, buf, size, may_free); + } } =20 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size) @@ -482,6 +765,11 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, = size_t size) return; } =20 + if (f->buffered_mode) { + copy_buf(f, buf, size, false); + return; + } + while (size > 0) { l =3D IO_BUF_SIZE - fb->buf_index; if (l > size) { @@ -506,15 +794,21 @@ void qemu_put_byte(QEMUFile *f, int v) return; } =20 - fb->buf[fb->buf_index] =3D v; - f->bytes_xfer++; - add_buf_to_iovec(f, 1); + if (f->buffered_mode) { + copy_buf(f, (const uint8_t *) &v, 1, false); + } else { + fb->buf[fb->buf_index] =3D v; + add_buf_to_iovec(f, 1); + f->bytes_xfer++; + } } =20 void qemu_file_skip(QEMUFile *f, int size) { QEMUFileBuffer *fb =3D f->current_buf; =20 + assert(!f->buffered_mode); + if (fb->buf_index + size <=3D fb->buf_size) { fb->buf_index +=3D size; } @@ -672,10 +966,14 @@ int64_t qemu_ftell_fast(QEMUFile *f) { int64_t ret =3D f->pos; int i; - QEMUFileBuffer *fb =3D f->current_buf; =20 - for (i =3D 0; i < fb->iovcnt; i++) { - ret +=3D fb->iov[i].iov_len; + if (f->buffered_mode) { + ret +=3D get_buf_used_size(f); + } else { + QEMUFileBuffer *fb =3D f->current_buf; + for (i =3D 0; i < fb->iovcnt; i++) { + ret +=3D fb->iov[i].iov_len; + } } =20 return ret; @@ -683,8 +981,12 @@ int64_t qemu_ftell_fast(QEMUFile *f) =20 int64_t qemu_ftell(QEMUFile *f) { - qemu_fflush(f); - return f->pos; + if (f->buffered_mode) { + return qemu_ftell_fast(f); + } else { + qemu_fflush(f); + return f->pos; + } } =20 int qemu_file_rate_limit(QEMUFile *f) @@ -803,6 +1105,8 @@ ssize_t qemu_put_compression_data(QEMUFile *f, z_strea= m *stream, QEMUFileBuffer *fb =3D f->current_buf; ssize_t blen =3D IO_BUF_SIZE - fb->buf_index - sizeof(int32_t); =20 + assert(!f->buffered_mode); + if (blen < compressBound(size)) { return -1; } @@ -827,6 +1131,9 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_sr= c) int len =3D 0; QEMUFileBuffer *fb_src =3D f_src->current_buf; =20 + assert(!f_des->buffered_mode); + assert(!f_src->buffered_mode); + if (fb_src->buf_index > 0) { len =3D fb_src->buf_index; qemu_put_buffer(f_des, fb_src->buf, fb_src->buf_index); diff --git a/migration/qemu-file.h b/migration/qemu-file.h index a9b6d6c..08655d2 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -103,6 +103,14 @@ typedef QEMUFile *(QEMURetPathFunc)(void *opaque); typedef int (QEMUFileShutdownFunc)(void *opaque, bool rd, bool wr, Error **errp); =20 +/* + * Enables or disables the buffered mode + * Existing blocking reads/writes must be woken + * Returns true if the buffered mode has to be enabled, + * false if it has to be disabled. + */ +typedef bool (QEMUFileEnableBufferedFunc)(void *opaque); + typedef struct QEMUFileOps { QEMUFileGetBufferFunc *get_buffer; QEMUFileCloseFunc *close; @@ -110,6 +118,7 @@ typedef struct QEMUFileOps { QEMUFileWritevBufferFunc *writev_buffer; QEMURetPathFunc *get_return_path; QEMUFileShutdownFunc *shut_down; + QEMUFileEnableBufferedFunc *enable_buffered; } QEMUFileOps; =20 typedef struct QEMUFileHooks { --=20 1.8.3.1 From nobody Thu May 16 18:16:18 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=virtuozzo.com ARC-Seal: i=1; a=rsa-sha256; t=1586776404; cv=none; d=zohomail.com; s=zohoarc; b=GsxD4LvoLlhnTPiqYId4MgJek8NRGzIvifP0D0XWR0wqxkfIUvRYW7WvxW5NR+yiP2fjt/IEQ0BDP7J/5N5B9UvTuvjKivIB3HNjYrBk2G71KG9VmPPwyEhRRw6Tmfj87XF9rtSnCw18gUy0wdADSfNMJeZryRsxHLwj4TAHDCQ= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1586776404; h=Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:Message-ID:References:Sender:Subject:To; bh=Dq/0h3xKtJnaVCrv4Jg4u0ns29oCLF0YSfLdPsTWuls=; b=Nv4EgaGFAnc+of1fU/v5JxkeINFhXFn3POW+htKTI1gOrLxKf6iMCbEEjK5TeHgLLu4uiPyLz8UOxEoPTYHb/LdNFUswVH3PJRQeAtu7ixn+t2EXjULCXJYFhmponrfsazRbsaWCeDb5d5zHXokReQfsaYMs+ZrXwezIlprL0kI= ARC-Authentication-Results: i=1; mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail header.from= (p=none dis=none) header.from= Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 15867764043761003.5684086151135; Mon, 13 Apr 2020 04:13:24 -0700 (PDT) Received: from localhost ([::1]:43064 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1jNx1n-0004NQ-7n for importer@patchew.org; Mon, 13 Apr 2020 07:13:23 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58841) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1jNx0u-0002iU-24 for qemu-devel@nongnu.org; Mon, 13 Apr 2020 07:12:29 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1jNx0s-0000hw-D4 for qemu-devel@nongnu.org; Mon, 13 Apr 2020 07:12:27 -0400 Received: from relay.sw.ru ([185.231.240.75]:40330) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1jNx0s-0000g0-5u for qemu-devel@nongnu.org; Mon, 13 Apr 2020 07:12:26 -0400 Received: from vgpu0.qa.sw.ru ([10.94.1.107]) by relay.sw.ru with esmtp (Exim 4.92.3) (envelope-from ) id 1jNx0h-0002VQ-TU; Mon, 13 Apr 2020 14:12:16 +0300 From: Denis Plotnikov To: qemu-devel@nongnu.org Subject: [RFC patch v1 3/3] migration/savevm: use qemu-file buffered mode for non-cached bdrv Date: Mon, 13 Apr 2020 14:12:14 +0300 Message-Id: <1586776334-641239-4-git-send-email-dplotnikov@virtuozzo.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1586776334-641239-1-git-send-email-dplotnikov@virtuozzo.com> References: <1586776334-641239-1-git-send-email-dplotnikov@virtuozzo.com> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 185.231.240.75 X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: den@openvz.org, dgilbert@redhat.com, quintela@redhat.com Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" This makes internal snapshots of HDD placed qcow2 images opened with O_DIRECT flag 4 times faster. The test: creates 500M internal snapshot for a cow2 image placed on HDD Result times: with the patch: ~6 sec without patch: ~24 sec This happens because the internal snapshot saving produces a lot of pwrites, because of flushing the internal buffers with non-aligned io vectors and direct calling qemu_fflush. To fix it, we introduce an internal pointer and size aligned buffer. The most of the time the buffer is flushed only when it's full regardless of direct calling qemu_fflush. When the buffer is full, it is written asynchronously. This gives us a cople of advantages leading to performance improvement: 1. beacause of pointer and size aligned buffers we can use asynchronous os write syscall, like io_submit 2. when some buffer is being written, another buffer is filled with data. Signed-off-by: Denis Plotnikov --- migration/savevm.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index c00a680..db0cac9 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -63,6 +63,7 @@ #include "migration/colo.h" #include "qemu/bitmap.h" #include "net/announce.h" +#include "block/block_int.h" =20 const unsigned int postcopy_ram_discard_version =3D 0; =20 @@ -153,6 +154,12 @@ static int bdrv_fclose(void *opaque, Error **errp) return bdrv_flush(opaque); } =20 +static bool qemu_file_is_buffered(void *opaque) +{ + BlockDriverState *bs =3D (BlockDriverState *) opaque; + return !!(bs->open_flags & BDRV_O_NOCACHE); +} + static const QEMUFileOps bdrv_read_ops =3D { .get_buffer =3D block_get_buffer, .close =3D bdrv_fclose @@ -160,7 +167,8 @@ static const QEMUFileOps bdrv_read_ops =3D { =20 static const QEMUFileOps bdrv_write_ops =3D { .writev_buffer =3D block_writev_buffer, - .close =3D bdrv_fclose + .close =3D bdrv_fclose, + .enable_buffered =3D qemu_file_is_buffered }; =20 static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable) @@ -2624,7 +2632,7 @@ int qemu_load_device_state(QEMUFile *f) return 0; } =20 -int save_snapshot(const char *name, Error **errp) +static int coroutine_fn save_snapshot_fn(const char *name, Error **errp) { BlockDriverState *bs, *bs1; QEMUSnapshotInfo sn1, *sn =3D &sn1, old_sn1, *old_sn =3D &old_sn1; @@ -2747,6 +2755,32 @@ int save_snapshot(const char *name, Error **errp) return ret; } =20 + typedef struct SaveVMParams { + const char *name; + Error **errp; + int ret; + } SaveVMParams; + +static void coroutine_fn save_snapshot_entry(void *opaque) +{ + SaveVMParams *p =3D (SaveVMParams *) opaque; + p->ret =3D save_snapshot_fn(p->name, p->errp); +} + +int save_snapshot(const char *name, Error **errp) +{ + SaveVMParams p =3D (SaveVMParams) { + .name =3D name, + .errp =3D errp, + .ret =3D -EINPROGRESS, + }; + + Coroutine *co =3D qemu_coroutine_create(save_snapshot_entry, &p); + aio_co_enter(qemu_get_aio_context(), co); + AIO_WAIT_WHILE(qemu_get_aio_context(), p.ret =3D=3D -EINPROGRESS); + return p.ret; +} + void qmp_xen_save_devices_state(const char *filename, bool has_live, bool = live, Error **errp) { --=20 1.8.3.1