[Qemu-devel] [PATCH for 2.10] block/nbd-client: always return EIO on and after the first io channel error

Vladimir Sementsov-Ogievskiy posted 1 patch 6 years, 7 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20170808142944.145159-1-vsementsov@virtuozzo.com
There is a newer version of this series
block/nbd-client.h |  1 +
block/nbd-client.c | 58 +++++++++++++++++++++++++++++++++++++-----------------
2 files changed, 41 insertions(+), 18 deletions(-)
[Qemu-devel] [PATCH for 2.10] block/nbd-client: always return EIO on and after the first io channel error
Posted by Vladimir Sementsov-Ogievskiy 6 years, 7 months ago
Do not communicate after the first error to avoid communicating throught
broken channel. The only exclusion is try to send NBD_CMD_DISC anyway on
in nbd_client_close.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---

Hi all. Here is a patch, fixing a problem noted in
[PATCH 06/17] block/nbd-client: fix nbd_read_reply_entry
and
[PATCH 17/17] block/nbd-client: always return EIO on and after the first io channel error
and discussed on list.

If it will be applied to 2.10, then I'll rebase my 'nbd client refactoring and fixing'
on it (for 2.11). If not - I'll prefer not rebase the series, so, do not apply this
patch for 2.11.

 block/nbd-client.h |  1 +
 block/nbd-client.c | 58 +++++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index df80771357..28db9922c8 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -29,6 +29,7 @@ typedef struct NBDClientSession {
 
     Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
     NBDReply reply;
+    bool eio_to_all;
 } NBDClientSession;
 
 NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 25dd28406b..1282b2484e 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -49,6 +49,8 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 {
     NBDClientSession *client = nbd_get_client_session(bs);
 
+    client->eio_to_all = true;
+
     if (!client->ioc) { /* Already closed */
         return;
     }
@@ -74,12 +76,16 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
     Error *local_err = NULL;
 
     for (;;) {
+        if (s->eio_to_all) {
+            break;
+        }
+
         assert(s->reply.handle == 0);
         ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
         if (ret < 0) {
             error_report_err(local_err);
         }
-        if (ret <= 0) {
+        if (ret <= 0 || s->eio_to_all) {
             break;
         }
 
@@ -107,6 +113,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
         qemu_coroutine_yield();
     }
 
+    s->eio_to_all = true;
     nbd_recv_coroutines_enter_all(s);
     s->read_reply_co = NULL;
 }
@@ -118,6 +125,10 @@ static int nbd_co_send_request(BlockDriverState *bs,
     NBDClientSession *s = nbd_get_client_session(bs);
     int rc, ret, i;
 
+    if (s->eio_to_all) {
+        return -EIO;
+    }
+
     qemu_co_mutex_lock(&s->send_mutex);
     while (s->in_flight == MAX_NBD_REQUESTS) {
         qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
@@ -135,15 +146,15 @@ static int nbd_co_send_request(BlockDriverState *bs,
     assert(i < MAX_NBD_REQUESTS);
     request->handle = INDEX_TO_HANDLE(s, i);
 
-    if (!s->ioc) {
+    if (s->eio_to_all) {
         qemu_co_mutex_unlock(&s->send_mutex);
-        return -EPIPE;
+        return -EIO;
     }
 
     if (qiov) {
         qio_channel_set_cork(s->ioc, true);
         rc = nbd_send_request(s->ioc, request);
-        if (rc >= 0) {
+        if (rc >= 0 && !s->eio_to_all) {
             ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
                           NULL);
             if (ret != request->len) {
@@ -155,7 +166,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
         rc = nbd_send_request(s->ioc, request);
     }
     qemu_co_mutex_unlock(&s->send_mutex);
-    return rc;
+
+    return s->eio_to_all ? -EIO : rc;
 }
 
 static void nbd_co_receive_reply(NBDClientSession *s,
@@ -169,13 +181,13 @@ static void nbd_co_receive_reply(NBDClientSession *s,
     qemu_coroutine_yield();
     *reply = s->reply;
     if (reply->handle != request->handle ||
-        !s->ioc) {
+        !s->ioc || s->eio_to_all) {
         reply->error = EIO;
     } else {
         if (qiov && reply->error == 0) {
             ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
                           NULL);
-            if (ret != request->len) {
+            if (ret != request->len || s->eio_to_all) {
                 reply->error = EIO;
             }
         }
@@ -225,8 +237,10 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
     } else {
         nbd_co_receive_reply(client, &request, &reply, qiov);
     }
-    nbd_coroutine_end(bs, &request);
-    return -reply.error;
+    if (request.handle != 0) {
+        nbd_coroutine_end(bs, &request);
+    }
+    return client->eio_to_all ? -EIO : -reply.error;
 }
 
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
@@ -254,8 +268,10 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
     } else {
         nbd_co_receive_reply(client, &request, &reply, NULL);
     }
-    nbd_coroutine_end(bs, &request);
-    return -reply.error;
+    if (request.handle != 0) {
+        nbd_coroutine_end(bs, &request);
+    }
+    return client->eio_to_all ? -EIO : -reply.error;
 }
 
 int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
@@ -288,8 +304,10 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
     } else {
         nbd_co_receive_reply(client, &request, &reply, NULL);
     }
-    nbd_coroutine_end(bs, &request);
-    return -reply.error;
+    if (request.handle != 0) {
+        nbd_coroutine_end(bs, &request);
+    }
+    return client->eio_to_all ? -EIO : -reply.error;
 }
 
 int nbd_client_co_flush(BlockDriverState *bs)
@@ -312,8 +330,10 @@ int nbd_client_co_flush(BlockDriverState *bs)
     } else {
         nbd_co_receive_reply(client, &request, &reply, NULL);
     }
-    nbd_coroutine_end(bs, &request);
-    return -reply.error;
+    if (request.handle != 0) {
+        nbd_coroutine_end(bs, &request);
+    }
+    return client->eio_to_all ? -EIO : -reply.error;
 }
 
 int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
@@ -337,9 +357,10 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
     } else {
         nbd_co_receive_reply(client, &request, &reply, NULL);
     }
-    nbd_coroutine_end(bs, &request);
-    return -reply.error;
-
+    if (request.handle != 0) {
+        nbd_coroutine_end(bs, &request);
+    }
+    return client->eio_to_all ? -EIO : -reply.error;
 }
 
 void nbd_client_detach_aio_context(BlockDriverState *bs)
@@ -384,6 +405,7 @@ int nbd_client_init(BlockDriverState *bs,
     logout("session init %s\n", export);
     qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
 
+    client->eio_to_all = false;
     client->info.request_sizes = true;
     ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
                                 tlscreds, hostname,
-- 
2.11.1


Re: [Qemu-devel] [PATCH for 2.10] block/nbd-client: always return EIO on and after the first io channel error
Posted by Vladimir Sementsov-Ogievskiy 6 years, 7 months ago
08.08.2017 17:29, Vladimir Sementsov-Ogievskiy wrote:
> Do not communicate after the first error to avoid communicating throught
> broken channel. The only exclusion is try to send NBD_CMD_DISC anyway on
> in nbd_client_close.

worth add: To simplify things, return -EIO in case of disconnect too.

(refers to:

-    if (!s->ioc) {
+    if (s->eio_to_all) {
          qemu_co_mutex_unlock(&s->send_mutex);
-        return -EPIPE;
+        return -EIO;
      }

and to:

@@ -49,6 +49,8 @@ static void nbd_teardown_connection(BlockDriverState *bs)
  {
      NBDClientSession *client = nbd_get_client_session(bs);
  
+    client->eio_to_all = true;
+

However, I'm unsure about, how s->ioc may be NULL in first chunk, and if 
it possible, why
it is not checked everywhere. read/write after bdrv_close? Should it be 
handled on higher level?)

>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>
> Hi all. Here is a patch, fixing a problem noted in
> [PATCH 06/17] block/nbd-client: fix nbd_read_reply_entry
> and
> [PATCH 17/17] block/nbd-client: always return EIO on and after the first io channel error
> and discussed on list.
>
> If it will be applied to 2.10, then I'll rebase my 'nbd client refactoring and fixing'
> on it (for 2.11). If not - I'll prefer not rebase the series, so, do not apply this
> patch for 2.11.
>
>   block/nbd-client.h |  1 +
>   block/nbd-client.c | 58 +++++++++++++++++++++++++++++++++++++-----------------
>   2 files changed, 41 insertions(+), 18 deletions(-)
>
> diff --git a/block/nbd-client.h b/block/nbd-client.h
> index df80771357..28db9922c8 100644
> --- a/block/nbd-client.h
> +++ b/block/nbd-client.h
> @@ -29,6 +29,7 @@ typedef struct NBDClientSession {
>   
>       Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
>       NBDReply reply;
> +    bool eio_to_all;
>   } NBDClientSession;
>   
>   NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
> diff --git a/block/nbd-client.c b/block/nbd-client.c
> index 25dd28406b..1282b2484e 100644
> --- a/block/nbd-client.c
> +++ b/block/nbd-client.c
> @@ -49,6 +49,8 @@ static void nbd_teardown_connection(BlockDriverState *bs)
>   {
>       NBDClientSession *client = nbd_get_client_session(bs);
>   
> +    client->eio_to_all = true;
> +
>       if (!client->ioc) { /* Already closed */
>           return;
>       }
> @@ -74,12 +76,16 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
>       Error *local_err = NULL;
>   
>       for (;;) {
> +        if (s->eio_to_all) {
> +            break;
> +        }
> +
>           assert(s->reply.handle == 0);
>           ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
>           if (ret < 0) {
>               error_report_err(local_err);
>           }
> -        if (ret <= 0) {
> +        if (ret <= 0 || s->eio_to_all) {
>               break;
>           }
>   
> @@ -107,6 +113,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
>           qemu_coroutine_yield();
>       }
>   
> +    s->eio_to_all = true;
>       nbd_recv_coroutines_enter_all(s);
>       s->read_reply_co = NULL;
>   }
> @@ -118,6 +125,10 @@ static int nbd_co_send_request(BlockDriverState *bs,
>       NBDClientSession *s = nbd_get_client_session(bs);
>       int rc, ret, i;
>   
> +    if (s->eio_to_all) {
> +        return -EIO;
> +    }
> +
>       qemu_co_mutex_lock(&s->send_mutex);
>       while (s->in_flight == MAX_NBD_REQUESTS) {
>           qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
> @@ -135,15 +146,15 @@ static int nbd_co_send_request(BlockDriverState *bs,
>       assert(i < MAX_NBD_REQUESTS);
>       request->handle = INDEX_TO_HANDLE(s, i);
>   
> -    if (!s->ioc) {
> +    if (s->eio_to_all) {
>           qemu_co_mutex_unlock(&s->send_mutex);
> -        return -EPIPE;
> +        return -EIO;
>       }
>   
>       if (qiov) {
>           qio_channel_set_cork(s->ioc, true);
>           rc = nbd_send_request(s->ioc, request);
> -        if (rc >= 0) {
> +        if (rc >= 0 && !s->eio_to_all) {
>               ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
>                             NULL);
>               if (ret != request->len) {
> @@ -155,7 +166,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
>           rc = nbd_send_request(s->ioc, request);
>       }
>       qemu_co_mutex_unlock(&s->send_mutex);
> -    return rc;
> +
> +    return s->eio_to_all ? -EIO : rc;
>   }
>   
>   static void nbd_co_receive_reply(NBDClientSession *s,
> @@ -169,13 +181,13 @@ static void nbd_co_receive_reply(NBDClientSession *s,
>       qemu_coroutine_yield();
>       *reply = s->reply;
>       if (reply->handle != request->handle ||
> -        !s->ioc) {
> +        !s->ioc || s->eio_to_all) {
>           reply->error = EIO;
>       } else {
>           if (qiov && reply->error == 0) {
>               ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
>                             NULL);
> -            if (ret != request->len) {
> +            if (ret != request->len || s->eio_to_all) {
>                   reply->error = EIO;
>               }
>           }
> @@ -225,8 +237,10 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
>       } else {
>           nbd_co_receive_reply(client, &request, &reply, qiov);
>       }
> -    nbd_coroutine_end(bs, &request);
> -    return -reply.error;
> +    if (request.handle != 0) {
> +        nbd_coroutine_end(bs, &request);
> +    }
> +    return client->eio_to_all ? -EIO : -reply.error;
>   }
>   
>   int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
> @@ -254,8 +268,10 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
>       } else {
>           nbd_co_receive_reply(client, &request, &reply, NULL);
>       }
> -    nbd_coroutine_end(bs, &request);
> -    return -reply.error;
> +    if (request.handle != 0) {
> +        nbd_coroutine_end(bs, &request);
> +    }
> +    return client->eio_to_all ? -EIO : -reply.error;
>   }
>   
>   int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
> @@ -288,8 +304,10 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
>       } else {
>           nbd_co_receive_reply(client, &request, &reply, NULL);
>       }
> -    nbd_coroutine_end(bs, &request);
> -    return -reply.error;
> +    if (request.handle != 0) {
> +        nbd_coroutine_end(bs, &request);
> +    }
> +    return client->eio_to_all ? -EIO : -reply.error;
>   }
>   
>   int nbd_client_co_flush(BlockDriverState *bs)
> @@ -312,8 +330,10 @@ int nbd_client_co_flush(BlockDriverState *bs)
>       } else {
>           nbd_co_receive_reply(client, &request, &reply, NULL);
>       }
> -    nbd_coroutine_end(bs, &request);
> -    return -reply.error;
> +    if (request.handle != 0) {
> +        nbd_coroutine_end(bs, &request);
> +    }
> +    return client->eio_to_all ? -EIO : -reply.error;
>   }
>   
>   int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
> @@ -337,9 +357,10 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
>       } else {
>           nbd_co_receive_reply(client, &request, &reply, NULL);
>       }
> -    nbd_coroutine_end(bs, &request);
> -    return -reply.error;
> -
> +    if (request.handle != 0) {
> +        nbd_coroutine_end(bs, &request);
> +    }
> +    return client->eio_to_all ? -EIO : -reply.error;
>   }
>   
>   void nbd_client_detach_aio_context(BlockDriverState *bs)
> @@ -384,6 +405,7 @@ int nbd_client_init(BlockDriverState *bs,
>       logout("session init %s\n", export);
>       qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
>   
> +    client->eio_to_all = false;
>       client->info.request_sizes = true;
>       ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
>                                   tlscreds, hostname,


-- 
Best regards,
Vladimir


Re: [Qemu-devel] [PATCH for 2.10] block/nbd-client: always return EIO on and after the first io channel error
Posted by Eric Blake 6 years, 7 months ago
On 08/08/2017 09:29 AM, Vladimir Sementsov-Ogievskiy wrote:
> Do not communicate after the first error to avoid communicating throught
> broken channel. The only exclusion is try to send NBD_CMD_DISC anyway on
> in nbd_client_close.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
> 
> Hi all. Here is a patch, fixing a problem noted in
> [PATCH 06/17] block/nbd-client: fix nbd_read_reply_entry
> and
> [PATCH 17/17] block/nbd-client: always return EIO on and after the first io channel error
> and discussed on list.
> 
> If it will be applied to 2.10, then I'll rebase my 'nbd client refactoring and fixing'
> on it (for 2.11). If not - I'll prefer not rebase the series, so, do not apply this
> patch for 2.11.

It may be possible to do something even smaller:

> 
>  block/nbd-client.h |  1 +
>  block/nbd-client.c | 58 +++++++++++++++++++++++++++++++++++++-----------------
>  2 files changed, 41 insertions(+), 18 deletions(-)
> 
> diff --git a/block/nbd-client.h b/block/nbd-client.h
> index df80771357..28db9922c8 100644
> --- a/block/nbd-client.h
> +++ b/block/nbd-client.h
> @@ -29,6 +29,7 @@ typedef struct NBDClientSession {
>  
>      Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
>      NBDReply reply;
> +    bool eio_to_all;
>  } NBDClientSession;
>  
>  NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
> diff --git a/block/nbd-client.c b/block/nbd-client.c
> index 25dd28406b..1282b2484e 100644
> --- a/block/nbd-client.c
> +++ b/block/nbd-client.c
> @@ -49,6 +49,8 @@ static void nbd_teardown_connection(BlockDriverState *bs)
>  {
>      NBDClientSession *client = nbd_get_client_session(bs);
>  
> +    client->eio_to_all = true;
> +
>      if (!client->ioc) { /* Already closed */
>          return;
>      }
> @@ -74,12 +76,16 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
>      Error *local_err = NULL;
>  
>      for (;;) {
> +        if (s->eio_to_all) {
> +            break;
> +        }
> +

How is this conditional ever reached? nbd_read_reply_entry() is our
workhorse, that is supposed to run until the client is ready to disconnect.

>          assert(s->reply.handle == 0);
>          ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
>          if (ret < 0) {
>              error_report_err(local_err);
>          }
> -        if (ret <= 0) {
> +        if (ret <= 0 || s->eio_to_all) {
>              break;
>          }

This says we're now supposed to break out of the while loop.  So unless
something can set s->eio_to_all during
aio_co_wake(s->recv_coroutine[i]), the next iteration of the loop won't
hit the first conditional.

Meanwhile, even if we skip the first conditional, this second
conditional will still end the loop prior to acting on anything received
from the server (the difference being whether we called
nbd_receive_reply() one additional time, but I don't see that as getting
in the way of a clean exit).

But my question is whether we can just go with a simpler fix: if we ever
break out of the workhorse loop of nbd_read_reply_entry(), then (and
only then) is when we call nbd_recv_coroutines_enter_all() to mop up any
pending transactions before tearing down the coroutines.  Is there
something we can do in nbd_recv_coroutines_enter_all() that will
guarantee that our final entry into each coroutine will fail?

I'm still playing with the idea locally.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH for 2.10] block/nbd-client: always return EIO on and after the first io channel error
Posted by Vladimir Sementsov-Ogievskiy 6 years, 7 months ago
08.08.2017 17:44, Eric Blake wrote:
> On 08/08/2017 09:29 AM, Vladimir Sementsov-Ogievskiy wrote:
>> Do not communicate after the first error to avoid communicating throught
>> broken channel. The only exclusion is try to send NBD_CMD_DISC anyway on
>> in nbd_client_close.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>>
>> Hi all. Here is a patch, fixing a problem noted in
>> [PATCH 06/17] block/nbd-client: fix nbd_read_reply_entry
>> and
>> [PATCH 17/17] block/nbd-client: always return EIO on and after the first io channel error
>> and discussed on list.
>>
>> If it will be applied to 2.10, then I'll rebase my 'nbd client refactoring and fixing'
>> on it (for 2.11). If not - I'll prefer not rebase the series, so, do not apply this
>> patch for 2.11.
> It may be possible to do something even smaller:
>
>>   block/nbd-client.h |  1 +
>>   block/nbd-client.c | 58 +++++++++++++++++++++++++++++++++++++-----------------
>>   2 files changed, 41 insertions(+), 18 deletions(-)
>>
>> diff --git a/block/nbd-client.h b/block/nbd-client.h
>> index df80771357..28db9922c8 100644
>> --- a/block/nbd-client.h
>> +++ b/block/nbd-client.h
>> @@ -29,6 +29,7 @@ typedef struct NBDClientSession {
>>   
>>       Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
>>       NBDReply reply;
>> +    bool eio_to_all;
>>   } NBDClientSession;
>>   
>>   NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
>> diff --git a/block/nbd-client.c b/block/nbd-client.c
>> index 25dd28406b..1282b2484e 100644
>> --- a/block/nbd-client.c
>> +++ b/block/nbd-client.c
>> @@ -49,6 +49,8 @@ static void nbd_teardown_connection(BlockDriverState *bs)
>>   {
>>       NBDClientSession *client = nbd_get_client_session(bs);
>>   
>> +    client->eio_to_all = true;
>> +
>>       if (!client->ioc) { /* Already closed */
>>           return;
>>       }
>> @@ -74,12 +76,16 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
>>       Error *local_err = NULL;
>>   
>>       for (;;) {
>> +        if (s->eio_to_all) {
>> +            break;
>> +        }
>> +
> How is this conditional ever reached? nbd_read_reply_entry() is our
> workhorse, that is supposed to run until the client is ready to disconnect.

I forget to set eio_to_all on error in sending request.

>
>>           assert(s->reply.handle == 0);
>>           ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
>>           if (ret < 0) {
>>               error_report_err(local_err);
>>           }
>> -        if (ret <= 0) {
>> +        if (ret <= 0 || s->eio_to_all) {
>>               break;
>>           }
> This says we're now supposed to break out of the while loop.  So unless
> something can set s->eio_to_all during
> aio_co_wake(s->recv_coroutine[i]), the next iteration of the loop won't
> hit the first conditional.
>
> Meanwhile, even if we skip the first conditional, this second
> conditional will still end the loop prior to acting on anything received
> from the server (the difference being whether we called
> nbd_receive_reply() one additional time, but I don't see that as getting
> in the way of a clean exit).
>
> But my question is whether we can just go with a simpler fix: if we ever
> break out of the workhorse loop of nbd_read_reply_entry(), then (and
> only then) is when we call nbd_recv_coroutines_enter_all() to mop up any
> pending transactions before tearing down the coroutines.  Is there
> something we can do in nbd_recv_coroutines_enter_all() that will
> guarantee that our final entry into each coroutine will fail?

anyway, you should prevent the following new requests, using some 
additional variable or disconnect..

Also, I think, an error in sending requests should cause finishing of 
this loop in nbd_read_reply_entry, so, an error condition should be 
checked after each yield.


>
> I'm still playing with the idea locally.
>

-- 
Best regards,
Vladimir