From nobody Mon Feb 9 23:15:02 2026 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=virtuozzo.com Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1528392447503250.75815248013532; Thu, 7 Jun 2018 10:27:27 -0700 (PDT) Received: from localhost ([::1]:59449 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fQyh4-0002JV-KM for importer@patchew.org; Thu, 07 Jun 2018 13:27:26 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:34213) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1fQyUs-0008B0-BN for qemu-devel@nongnu.org; Thu, 07 Jun 2018 13:14:53 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1fQyUo-00085T-VN for qemu-devel@nongnu.org; Thu, 07 Jun 2018 13:14:50 -0400 Received: from relay.sw.ru ([195.214.232.25]:52028) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1fQyUo-0007zu-Ch; Thu, 07 Jun 2018 13:14:46 -0400 Received: from msk-vpn.virtuozzo.com ([195.214.232.6] helo=kvm.sw.ru) by relay.sw.ru with esmtp (Exim 4.90_1) (envelope-from ) id 1fQyUl-0007DI-8Q; Thu, 07 Jun 2018 20:14:43 +0300 From: Vladimir Sementsov-Ogievskiy To: qemu-devel@nongnu.org, qemu-block@nongnu.org Date: Thu, 7 Jun 2018 20:14:40 +0300 Message-Id: <20180607171441.64079-11-vsementsov@virtuozzo.com> X-Mailer: git-send-email 2.11.1 In-Reply-To: <20180607171441.64079-1-vsementsov@virtuozzo.com> References: <20180607171441.64079-1-vsementsov@virtuozzo.com> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 195.214.232.25 Subject: [Qemu-devel] [PATCH v2 10/11] block/nbd-client: nbd reconnect X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: kwolf@redhat.com, vsementsov@virtuozzo.com, armbru@redhat.com, mreitz@redhat.com, den@openvz.org, pbonzini@redhat.com Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Implement reconnect. To achieve this: 1. Move from quit bool variable to state. 4 states are introduced: connecting-wait: means, that reconnecting is in progress, and there were small number of reconnect attempts, so all requests are waiting for the connection. connecting-nowait: reconnecting is in progress, there were a lot of attempts of reconnect, all requests will return errors. connected: normal state quit: exiting after fatal error or on close Possible transitions are: * -> quit connecting-* -> connected connecting-wait -> connecting-nowait connected -> connecting-wait 2. Implement reconnect in connection_co. So, in connecting-* mode, connection_co, tries to reconnect every NBD_RECONNECT_NS. Configuring of this parameter (as well as NBD_RECONNECT_ATTEMPTS, which specifies bound of transition from connecting-wait to connecting-nowait) may be done as a follow-up patch. 3. Retry nbd queries on channel error, if we are in connecting-wait state. 4. In init, wait until for connection until transition to connecting-nowait. So, NBD_RECONNECT_ATTEMPTS is a bound of fail for initial connection too. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/nbd-client.h | 2 + block/nbd-client.c | 170 ++++++++++++++++++++++++++++++++++++++-----------= ---- 2 files changed, 123 insertions(+), 49 deletions(-) diff --git a/block/nbd-client.h b/block/nbd-client.h index 2561e1ea42..1249f2eb52 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -44,6 +44,8 @@ typedef struct NBDClientSession { bool receiving; int connect_status; Error *connect_err; + int connect_attempts; + bool wait_in_flight; =20 NBDClientRequest requests[MAX_NBD_REQUESTS]; NBDReply reply; diff --git a/block/nbd-client.c b/block/nbd-client.c index 17ed7575c7..c071ac911a 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -41,10 +41,16 @@ static int nbd_client_connect(BlockDriverState *bs, const char *hostname, Error **errp); =20 -/* @ret would be used for reconnect in future */ static void nbd_channel_error(NBDClientSession *s, int ret) { - s->state =3D NBD_CLIENT_QUIT; + if (ret =3D=3D -EIO) { + if (s->state =3D=3D NBD_CLIENT_CONNECTED) { + s->state =3D NBD_CLIENT_CONNECTING_WAIT; + s->connect_attempts =3D 0; + } + } else { + s->state =3D NBD_CLIENT_QUIT; + } } =20 static void nbd_recv_coroutines_wake_all(NBDClientSession *s) @@ -90,6 +96,19 @@ typedef struct NBDConnection { uint64_t reconnect_timeout; } NBDConnection; =20 +static bool nbd_client_connecting(NBDClientSession *client) +{ + return client->state =3D=3D NBD_CLIENT_CONNECTING_WAIT || + client->state =3D=3D NBD_CLIENT_CONNECTING_NOWAIT || + client->state =3D=3D NBD_CLIENT_CONNECTING_INIT; +} + +static bool nbd_client_connecting_wait(NBDClientSession *client) +{ + return client->state =3D=3D NBD_CLIENT_CONNECTING_WAIT || + client->state =3D=3D NBD_CLIENT_CONNECTING_INIT; +} + static coroutine_fn void nbd_connection_entry(void *opaque) { NBDConnection *con =3D opaque; @@ -98,26 +117,55 @@ static coroutine_fn void nbd_connection_entry(void *op= aque) int ret =3D 0; Error *local_err =3D NULL; =20 - if (con->reconnect_attempts !=3D 0) { - error_setg(&s->connect_err, "Reconnect is not supported yet"); - s->connect_status =3D -EINVAL; - nbd_channel_error(s, s->connect_status); - return; - } + while (s->state !=3D NBD_CLIENT_QUIT) { + assert(s->reply.handle =3D=3D 0); =20 - s->connect_status =3D nbd_client_connect(con->bs, con->saddr, - con->export, con->tlscreds, - con->hostname, &s->connect_err); - if (s->connect_status < 0) { - nbd_channel_error(s, s->connect_status); - return; - } + if (nbd_client_connecting(s)) { + if (s->connect_attempts =3D=3D con->reconnect_attempts) { + s->state =3D NBD_CLIENT_CONNECTING_NOWAIT; + qemu_co_queue_restart_all(&s->free_sema); + } =20 - /* successfully connected */ - s->state =3D NBD_CLIENT_CONNECTED; + qemu_co_mutex_lock(&s->send_mutex); + + while (s->in_flight > 0) { + qemu_co_mutex_unlock(&s->send_mutex); + nbd_recv_coroutines_wake_all(s); + s->wait_in_flight =3D true; + qemu_coroutine_yield(); + s->wait_in_flight =3D false; + qemu_co_mutex_lock(&s->send_mutex); + } + + qemu_co_mutex_unlock(&s->send_mutex); + + /* Now we are sure, that nobody accessing the channel now and = nobody + * will try to access the channel, until we set state to CONNE= CTED + */ + + s->connect_status =3D nbd_client_connect(con->bs, con->saddr, + con->export, con->tlscr= eds, + con->hostname, &local_e= rr); + s->connect_attempts++; + error_free(s->connect_err); + s->connect_err =3D NULL; + error_propagate(&s->connect_err, local_err); + local_err =3D NULL; + if (s->connect_status =3D=3D -EINVAL) { + /* Protocol error or something like this */ + nbd_channel_error(s, s->connect_status); + continue; + } + if (s->connect_status < 0) { + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, con->reconnect_timeo= ut); + continue; + } + + /* successfully connected */ + s->state =3D NBD_CLIENT_CONNECTED; + qemu_co_queue_restart_all(&s->free_sema); + } =20 - while (s->state !=3D NBD_CLIENT_QUIT) { - assert(s->reply.handle =3D=3D 0); s->receiving =3D true; ret =3D nbd_receive_reply(s->ioc, &s->reply, &local_err); s->receiving =3D false; @@ -158,6 +206,7 @@ static coroutine_fn void nbd_connection_entry(void *opa= que) qemu_coroutine_yield(); } =20 + qemu_co_queue_restart_all(&s->free_sema); nbd_recv_coroutines_wake_all(s); s->connection_co =3D NULL; } @@ -170,7 +219,7 @@ static int nbd_co_send_request(BlockDriverState *bs, int rc, i; =20 qemu_co_mutex_lock(&s->send_mutex); - while (s->in_flight =3D=3D MAX_NBD_REQUESTS) { + while (s->in_flight =3D=3D MAX_NBD_REQUESTS || nbd_client_connecting_w= ait(s)) { qemu_co_queue_wait(&s->free_sema, &s->send_mutex); } =20 @@ -219,7 +268,11 @@ err: nbd_channel_error(s, rc); s->requests[i].coroutine =3D NULL; s->in_flight--; - qemu_co_queue_next(&s->free_sema); + if (s->in_flight =3D=3D 0 && s->wait_in_flight) { + aio_co_wake(s->connection_co); + } else { + qemu_co_queue_next(&s->free_sema); + } } qemu_co_mutex_unlock(&s->send_mutex); return rc; @@ -669,7 +722,11 @@ break_loop: =20 qemu_co_mutex_lock(&s->send_mutex); s->in_flight--; - qemu_co_queue_next(&s->free_sema); + if (s->in_flight =3D=3D 0 && s->wait_in_flight) { + aio_co_wake(s->connection_co); + } else { + qemu_co_queue_next(&s->free_sema); + } qemu_co_mutex_unlock(&s->send_mutex); =20 return false; @@ -818,16 +875,21 @@ static int nbd_co_request(BlockDriverState *bs, NBDRe= quest *request, } else { assert(request->type !=3D NBD_CMD_WRITE); } - ret =3D nbd_co_send_request(bs, request, write_qiov); - if (ret < 0) { - return ret; - } =20 - ret =3D nbd_co_receive_return_code(client, request->handle, - &request_ret, &local_err); - if (local_err) { - error_report_err(local_err); - } + do { + ret =3D nbd_co_send_request(bs, request, write_qiov); + if (ret < 0) { + continue; + } + + ret =3D nbd_co_receive_return_code(client, request->handle, + &request_ret, &local_err); + if (local_err) { + error_report_err(local_err); + local_err =3D NULL; + } + } while (ret < 0 && nbd_client_connecting_wait(client)); + return ret ? ret : request_ret; } =20 @@ -849,16 +911,21 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64= _t offset, if (!bytes) { return 0; } - ret =3D nbd_co_send_request(bs, &request, NULL); - if (ret < 0) { - return ret; - } =20 - ret =3D nbd_co_receive_cmdread_reply(client, request.handle, offset, q= iov, - &request_ret, &local_err); - if (local_err) { - error_report_err(local_err); - } + do { + ret =3D nbd_co_send_request(bs, &request, NULL); + if (ret < 0) { + continue; + } + + ret =3D nbd_co_receive_cmdread_reply(client, request.handle, offse= t, qiov, + &request_ret, &local_err); + if (local_err) { + error_report_err(local_err); + local_err =3D NULL; + } + } while (ret < 0 && nbd_client_connecting_wait(client)); + return ret ? ret : request_ret; } =20 @@ -972,16 +1039,21 @@ int coroutine_fn nbd_client_co_block_status(BlockDri= verState *bs, return BDRV_BLOCK_DATA; } =20 - ret =3D nbd_co_send_request(bs, &request, NULL); - if (ret < 0) { - return ret; - } + do { + ret =3D nbd_co_send_request(bs, &request, NULL); + if (ret < 0) { + continue; + } + + ret =3D nbd_co_receive_blockstatus_reply(client, request.handle, b= ytes, + &extent, &request_ret, + &local_err); + if (local_err) { + error_report_err(local_err); + local_err =3D NULL; + } + } while (ret < 0 && nbd_client_connecting_wait(client)); =20 - ret =3D nbd_co_receive_blockstatus_reply(client, request.handle, bytes, - &extent, &request_ret, &local_e= rr); - if (local_err) { - error_report_err(local_err); - } if (ret < 0 || request_ret < 0) { return ret ? ret : request_ret; } --=20 2.11.1