[PATCH 21/52] migration/rdma: Fix QEMUFileHooks method return values

Markus Armbruster posted 52 patches 11 months, 3 weeks ago
Maintainers: Juan Quintela <quintela@redhat.com>, Peter Xu <peterx@redhat.com>, Leonardo Bras <leobras@redhat.com>
There is a newer version of this series
[PATCH 21/52] migration/rdma: Fix QEMUFileHooks method return values
Posted by Markus Armbruster 11 months, 3 weeks ago
The QEMUFileHooks methods don't come with a written contract.  Digging
through the code calling them, we find:

* save_page():

  Negative values RAM_SAVE_CONTROL_DELAYED and
  RAM_SAVE_CONTROL_NOT_SUPP are special.  Any other negative value is
  an unspecified error.

  qemu_rdma_save_page() returns -EIO or rdma->error_state on error.  I
  believe the latter is always negative.  Nothing stops either of them
  to clash with the special values, though.  Feels unlikely, but fix
  it anyway to return only the special values and -1.

* before_ram_iterate(), after_ram_iterate():

  Negative value means error.  qemu_rdma_registration_start() and
  qemu_rdma_registration_stop() comply as far as I can tell.  Make
  them comply *obviously*, by returning -1 on error.

* hook_ram_load:

  Negative value means error.  rdma_load_hook() already returns -1 on
  error.  Leave it alone.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 migration/rdma.c | 79 +++++++++++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 42 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index cc59155a50..46b5859268 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -3219,12 +3219,11 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
     rdma = qatomic_rcu_read(&rioc->rdmaout);
 
     if (!rdma) {
-        return -EIO;
+        return -1;
     }
 
-    ret = check_error_state(rdma);
-    if (ret) {
-        return ret;
+    if (check_error_state(rdma)) {
+        return -1;
     }
 
     qemu_fflush(f);
@@ -3290,9 +3289,10 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
     }
 
     return RAM_SAVE_CONTROL_DELAYED;
+
 err:
     rdma->error_state = ret;
-    return ret;
+    return -1;
 }
 
 static void rdma_accept_incoming_migration(void *opaque);
@@ -3538,12 +3538,11 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
     rdma = qatomic_rcu_read(&rioc->rdmain);
 
     if (!rdma) {
-        return -EIO;
+        return -1;
     }
 
-    ret = check_error_state(rdma);
-    if (ret) {
-        return ret;
+    if (check_error_state(rdma)) {
+        return -1;
     }
 
     local = &rdma->local_ram_blocks;
@@ -3576,7 +3575,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
                              (unsigned int)comp->block_idx,
                              rdma->local_ram_blocks.nb_blocks);
                 ret = -EIO;
-                goto out;
+                goto err;
             }
             block = &(rdma->local_ram_blocks.block[comp->block_idx]);
 
@@ -3588,7 +3587,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
 
         case RDMA_CONTROL_REGISTER_FINISHED:
             trace_qemu_rdma_registration_handle_finished();
-            goto out;
+            return 0;
 
         case RDMA_CONTROL_RAM_BLOCKS_REQUEST:
             trace_qemu_rdma_registration_handle_ram_blocks();
@@ -3609,7 +3608,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
                 if (ret) {
                     error_report("rdma migration: error dest "
                                     "registering ram blocks");
-                    goto out;
+                    goto err;
                 }
             }
 
@@ -3648,7 +3647,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
 
             if (ret < 0) {
                 error_report("rdma migration: error sending remote info");
-                goto out;
+                goto err;
             }
 
             break;
@@ -3675,7 +3674,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
                                  (unsigned int)reg->current_index,
                                  rdma->local_ram_blocks.nb_blocks);
                     ret = -ENOENT;
-                    goto out;
+                    goto err;
                 }
                 block = &(rdma->local_ram_blocks.block[reg->current_index]);
                 if (block->is_ram_block) {
@@ -3685,7 +3684,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
                             block->block_name, block->offset,
                             reg->key.current_addr);
                         ret = -ERANGE;
-                        goto out;
+                        goto err;
                     }
                     host_addr = (block->local_host_addr +
                                 (reg->key.current_addr - block->offset));
@@ -3701,7 +3700,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
                             " chunk: %" PRIx64,
                             block->block_name, reg->key.chunk);
                         ret = -ERANGE;
-                        goto out;
+                        goto err;
                     }
                 }
                 chunk_start = ram_chunk_start(block, chunk);
@@ -3713,7 +3712,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
                             chunk, chunk_start, chunk_end)) {
                     error_report("cannot get rkey");
                     ret = -EINVAL;
-                    goto out;
+                    goto err;
                 }
                 reg_result->rkey = tmp_rkey;
 
@@ -3730,7 +3729,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
 
             if (ret < 0) {
                 error_report("Failed to send control buffer");
-                goto out;
+                goto err;
             }
             break;
         case RDMA_CONTROL_UNREGISTER_REQUEST:
@@ -3753,7 +3752,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
                 if (ret != 0) {
                     perror("rdma unregistration chunk failed");
                     ret = -ret;
-                    goto out;
+                    goto err;
                 }
 
                 rdma->total_registrations--;
@@ -3766,24 +3765,23 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
 
             if (ret < 0) {
                 error_report("Failed to send control buffer");
-                goto out;
+                goto err;
             }
             break;
         case RDMA_CONTROL_REGISTER_RESULT:
             error_report("Invalid RESULT message at dest.");
             ret = -EIO;
-            goto out;
+            goto err;
         default:
             error_report("Unknown control message %s", control_desc(head.type));
             ret = -EIO;
-            goto out;
+            goto err;
         }
     } while (1);
-out:
-    if (ret < 0) {
-        rdma->error_state = ret;
-    }
-    return ret;
+
+err:
+    rdma->error_state = ret;
+    return -1;
 }
 
 /* Destination:
@@ -3805,7 +3803,7 @@ rdma_block_notification_handle(QEMUFile *f, const char *name)
     rdma = qatomic_rcu_read(&rioc->rdmain);
 
     if (!rdma) {
-        return -EIO;
+        return -1;
     }
 
     /* Find the matching RAMBlock in our local list */
@@ -3818,7 +3816,7 @@ rdma_block_notification_handle(QEMUFile *f, const char *name)
 
     if (found == -1) {
         error_report("RAMBlock '%s' not found on destination", name);
-        return -ENOENT;
+        return -1;
     }
 
     rdma->local_ram_blocks.block[curr].src_index = rdma->next_src_index;
@@ -3848,7 +3846,6 @@ static int qemu_rdma_registration_start(QEMUFile *f,
 {
     QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
     RDMAContext *rdma;
-    int ret;
 
     if (migration_in_postcopy()) {
         return 0;
@@ -3857,12 +3854,11 @@ static int qemu_rdma_registration_start(QEMUFile *f,
     RCU_READ_LOCK_GUARD();
     rdma = qatomic_rcu_read(&rioc->rdmaout);
     if (!rdma) {
-        return -EIO;
+        return -1;
     }
 
-    ret = check_error_state(rdma);
-    if (ret) {
-        return ret;
+    if (check_error_state(rdma)) {
+        return -1;
     }
 
     trace_qemu_rdma_registration_start(flags);
@@ -3891,12 +3887,11 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
     RCU_READ_LOCK_GUARD();
     rdma = qatomic_rcu_read(&rioc->rdmaout);
     if (!rdma) {
-        return -EIO;
+        return -1;
     }
 
-    ret = check_error_state(rdma);
-    if (ret) {
-        return ret;
+    if (check_error_state(rdma)) {
+        return -1;
     }
 
     qemu_fflush(f);
@@ -3927,7 +3922,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
                     qemu_rdma_reg_whole_ram_blocks : NULL);
         if (ret < 0) {
             fprintf(stderr, "receiving remote info!");
-            return ret;
+            return -1;
         }
 
         nb_dest_blocks = resp.len / sizeof(RDMADestBlock);
@@ -3950,7 +3945,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
                     "not identical on both the source and destination.",
                     local->nb_blocks, nb_dest_blocks);
             rdma->error_state = -EINVAL;
-            return -EINVAL;
+            return -1;
         }
 
         qemu_rdma_move_header(rdma, reg_result_idx, &resp);
@@ -3966,7 +3961,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
                         local->block[i].length,
                         rdma->dest_blocks[i].length);
                 rdma->error_state = -EINVAL;
-                return -EINVAL;
+                return -1;
             }
             local->block[i].remote_host_addr =
                     rdma->dest_blocks[i].remote_host_addr;
@@ -3986,7 +3981,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
     return 0;
 err:
     rdma->error_state = ret;
-    return ret;
+    return -1;
 }
 
 static const QEMUFileHooks rdma_read_hooks = {
-- 
2.41.0
Re: [PATCH 21/52] migration/rdma: Fix QEMUFileHooks method return values
Posted by Zhijian Li (Fujitsu) 11 months, 2 weeks ago

On 18/09/2023 22:41, Markus Armbruster wrote:
> The QEMUFileHooks methods don't come with a written contract.  Digging
> through the code calling them, we find:
> 
> * save_page():

I'm fine with this

> 
>    Negative values RAM_SAVE_CONTROL_DELAYED and
>    RAM_SAVE_CONTROL_NOT_SUPP are special.  Any other negative value is
>    an unspecified error.
> 
>    qemu_rdma_save_page() returns -EIO or rdma->error_state on error.  I
>    believe the latter is always negative.  Nothing stops either of them
>    to clash with the special values, though.  Feels unlikely, but fix
>    it anyway to return only the special values and -1.
> 
> * before_ram_iterate(), before_ram_iterate():

error code returned by before_ram_iterate() and before_ram_iterate() will be
assigned to QEMUFile for upper layer.
But it seems that no callers take care about the error ?  Shouldn't let the callers
to check the error instead ?



> 
>    Negative value means error.  qemu_rdma_registration_start() and
>    qemu_rdma_registration_stop() comply as far as I can tell.  Make
>    them comply *obviously*, by returning -1 on error.
> 
> * hook_ram_load:
> 
>    Negative value means error.  rdma_load_hook() already returns -1 on
>    error.  Leave it alone.

see inline reply below,

> 
> Signed-off-by: Markus Armbruster <armbru@redhat.com>
> ---
>   migration/rdma.c | 79 +++++++++++++++++++++++-------------------------
>   1 file changed, 37 insertions(+), 42 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index cc59155a50..46b5859268 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -3219,12 +3219,11 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
>       rdma = qatomic_rcu_read(&rioc->rdmaout);
>   
>       if (!rdma) {
> -        return -EIO;
> +        return -1;
>       }
>   
> -    ret = check_error_state(rdma);
> -    if (ret) {
> -        return ret;
> +    if (check_error_state(rdma)) {
> +        return -1;
>       }
>   
>       qemu_fflush(f);
> @@ -3290,9 +3289,10 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
>       }
>   
>       return RAM_SAVE_CONTROL_DELAYED;
> +
>   err:
>       rdma->error_state = ret;
> -    return ret;
> +    return -1;
>   }
>   
>   static void rdma_accept_incoming_migration(void *opaque);
> @@ -3538,12 +3538,11 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>       rdma = qatomic_rcu_read(&rioc->rdmain);
>   
>       if (!rdma) {
> -        return -EIO;
> +        return -1;

that's because EIO is not accurate here ?



>       }
>   
> -    ret = check_error_state(rdma);
> -    if (ret) {
> -        return ret;

Ditto


Thanks
Zhijian

> +    if (check_error_state(rdma)) {
> +        return -1;
>       }
>   
>       local = &rdma->local_ram_blocks;
> @@ -3576,7 +3575,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>                                (unsigned int)comp->block_idx,
>                                rdma->local_ram_blocks.nb_blocks);
>                   ret = -EIO;
> -                goto out;
> +                goto err;
>               }
>               block = &(rdma->local_ram_blocks.block[comp->block_idx]);
>   
> @@ -3588,7 +3587,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>   
>           case RDMA_CONTROL_REGISTER_FINISHED:
>               trace_qemu_rdma_registration_handle_finished();
> -            goto out;
> +            return 0;
>   
>           case RDMA_CONTROL_RAM_BLOCKS_REQUEST:
>               trace_qemu_rdma_registration_handle_ram_blocks();
> @@ -3609,7 +3608,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>                   if (ret) {
>                       error_report("rdma migration: error dest "
>                                       "registering ram blocks");
> -                    goto out;
> +                    goto err;
>                   }
>               }
>   
> @@ -3648,7 +3647,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>   
>               if (ret < 0) {
>                   error_report("rdma migration: error sending remote info");
> -                goto out;
> +                goto err;
>               }
>   
>               break;
> @@ -3675,7 +3674,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>                                    (unsigned int)reg->current_index,
>                                    rdma->local_ram_blocks.nb_blocks);
>                       ret = -ENOENT;
> -                    goto out;
> +                    goto err;
>                   }
>                   block = &(rdma->local_ram_blocks.block[reg->current_index]);
>                   if (block->is_ram_block) {
> @@ -3685,7 +3684,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>                               block->block_name, block->offset,
>                               reg->key.current_addr);
>                           ret = -ERANGE;
> -                        goto out;
> +                        goto err;
>                       }
>                       host_addr = (block->local_host_addr +
>                                   (reg->key.current_addr - block->offset));
> @@ -3701,7 +3700,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>                               " chunk: %" PRIx64,
>                               block->block_name, reg->key.chunk);
>                           ret = -ERANGE;
> -                        goto out;
> +                        goto err;
>                       }
>                   }
>                   chunk_start = ram_chunk_start(block, chunk);
> @@ -3713,7 +3712,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>                               chunk, chunk_start, chunk_end)) {
>                       error_report("cannot get rkey");
>                       ret = -EINVAL;
> -                    goto out;
> +                    goto err;
>                   }
>                   reg_result->rkey = tmp_rkey;
>   
> @@ -3730,7 +3729,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>   
>               if (ret < 0) {
>                   error_report("Failed to send control buffer");
> -                goto out;
> +                goto err;
>               }
>               break;
>           case RDMA_CONTROL_UNREGISTER_REQUEST:
> @@ -3753,7 +3752,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>                   if (ret != 0) {
>                       perror("rdma unregistration chunk failed");
>                       ret = -ret;
> -                    goto out;
> +                    goto err;
>                   }
>   
>                   rdma->total_registrations--;
> @@ -3766,24 +3765,23 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>   
>               if (ret < 0) {
>                   error_report("Failed to send control buffer");
> -                goto out;
> +                goto err;
>               }
>               break;
>           case RDMA_CONTROL_REGISTER_RESULT:
>               error_report("Invalid RESULT message at dest.");
>               ret = -EIO;
> -            goto out;
> +            goto err;
>           default:
>               error_report("Unknown control message %s", control_desc(head.type));
>               ret = -EIO;
> -            goto out;
> +            goto err;
>           }
>       } while (1);
> -out:
> -    if (ret < 0) {
> -        rdma->error_state = ret;
> -    }
> -    return ret;
> +
> +err:
> +    rdma->error_state = ret;
> +    return -1;
>   }
>   
>   /* Destination:
> @@ -3805,7 +3803,7 @@ rdma_block_notification_handle(QEMUFile *f, const char *name)
>       rdma = qatomic_rcu_read(&rioc->rdmain);
>   
>       if (!rdma) {
> -        return -EIO;
> +        return -1;
>       }
>   
>       /* Find the matching RAMBlock in our local list */
> @@ -3818,7 +3816,7 @@ rdma_block_notification_handle(QEMUFile *f, const char *name)
>   
>       if (found == -1) {
>           error_report("RAMBlock '%s' not found on destination", name);
> -        return -ENOENT;
> +        return -1;
>       }
>   
>       rdma->local_ram_blocks.block[curr].src_index = rdma->next_src_index;
> @@ -3848,7 +3846,6 @@ static int qemu_rdma_registration_start(QEMUFile *f,
>   {
>       QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
>       RDMAContext *rdma;
> -    int ret;
>   
>       if (migration_in_postcopy()) {
>           return 0;
> @@ -3857,12 +3854,11 @@ static int qemu_rdma_registration_start(QEMUFile *f,
>       RCU_READ_LOCK_GUARD();
>       rdma = qatomic_rcu_read(&rioc->rdmaout);
>       if (!rdma) {
> -        return -EIO;
> +        return -1;
>       }
>   
> -    ret = check_error_state(rdma);
> -    if (ret) {
> -        return ret;
> +    if (check_error_state(rdma)) {
> +        return -1;
>       }
>   
>       trace_qemu_rdma_registration_start(flags);
> @@ -3891,12 +3887,11 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
>       RCU_READ_LOCK_GUARD();
>       rdma = qatomic_rcu_read(&rioc->rdmaout);
>       if (!rdma) {
> -        return -EIO;
> +        return -1;
>       }
>   
> -    ret = check_error_state(rdma);
> -    if (ret) {
> -        return ret;
> +    if (check_error_state(rdma)) {
> +        return -1;
>       }
>   
>       qemu_fflush(f);
> @@ -3927,7 +3922,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
>                       qemu_rdma_reg_whole_ram_blocks : NULL);
>           if (ret < 0) {
>               fprintf(stderr, "receiving remote info!");
> -            return ret;
> +            return -1;
>           }
>   
>           nb_dest_blocks = resp.len / sizeof(RDMADestBlock);
> @@ -3950,7 +3945,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
>                       "not identical on both the source and destination.",
>                       local->nb_blocks, nb_dest_blocks);
>               rdma->error_state = -EINVAL;
> -            return -EINVAL;
> +            return -1;
>           }
>   
>           qemu_rdma_move_header(rdma, reg_result_idx, &resp);
> @@ -3966,7 +3961,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
>                           local->block[i].length,
>                           rdma->dest_blocks[i].length);
>                   rdma->error_state = -EINVAL;
> -                return -EINVAL;
> +                return -1;
>               }
>               local->block[i].remote_host_addr =
>                       rdma->dest_blocks[i].remote_host_addr;
> @@ -3986,7 +3981,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f,
>       return 0;
>   err:
>       rdma->error_state = ret;
> -    return ret;
> +    return -1;
>   }
>   
>   static const QEMUFileHooks rdma_read_hooks = {
Re: [PATCH 21/52] migration/rdma: Fix QEMUFileHooks method return values
Posted by Markus Armbruster 11 months, 2 weeks ago
"Zhijian Li (Fujitsu)" <lizhijian@fujitsu.com> writes:

> On 18/09/2023 22:41, Markus Armbruster wrote:
>> The QEMUFileHooks methods don't come with a written contract.  Digging
>> through the code calling them, we find:
>> 
>> * save_page():
>
> I'm fine with this
>
>> 
>>    Negative values RAM_SAVE_CONTROL_DELAYED and
>>    RAM_SAVE_CONTROL_NOT_SUPP are special.  Any other negative value is
>>    an unspecified error.
>> 
>>    qemu_rdma_save_page() returns -EIO or rdma->error_state on error.  I
>>    believe the latter is always negative.  Nothing stops either of them
>>    to clash with the special values, though.  Feels unlikely, but fix
>>    it anyway to return only the special values and -1.
>> 
>> * before_ram_iterate(), before_ram_iterate():
>
> error code returned by before_ram_iterate() and before_ram_iterate() will be
> assigned to QEMUFile for upper layer.
> But it seems that no callers take care about the error ?  Shouldn't let the callers
> to check the error instead ?

The error values returned by qemu_rdma_registration_start() and
qemu_rdma_registration_stop() carry no additional information a caller
could check.

Both return either -EIO or rdma->error_state on error.  The latter is
*not* a negative errno code.  Evidence: qio_channel_rdma_shutdown()
assigns -1, qemu_rdma_block_for_wrid() assigns the error value of
qemu_rdma_poll(), which can be the error value of ibv_poll_cq(), which
is an unspecified negative value, ...

I decided not to investigate what qemu-file.c does with the error values
after one quick glance at the code.  It's confusing, and quite possibly
confused.  But I'm already at 50+ patches, and am neither inclined nor
able to take on more migration cleanup at this time.

>>    Negative value means error.  qemu_rdma_registration_start() and
>>    qemu_rdma_registration_stop() comply as far as I can tell.  Make
>>    them comply *obviously*, by returning -1 on error.
>> 
>> * hook_ram_load:
>> 
>>    Negative value means error.  rdma_load_hook() already returns -1 on
>>    error.  Leave it alone.
>
> see inline reply below,
>
>> 
>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
>> ---
>>   migration/rdma.c | 79 +++++++++++++++++++++++-------------------------
>>   1 file changed, 37 insertions(+), 42 deletions(-)
>> 
>> diff --git a/migration/rdma.c b/migration/rdma.c
>> index cc59155a50..46b5859268 100644
>> --- a/migration/rdma.c
>> +++ b/migration/rdma.c
>> @@ -3219,12 +3219,11 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
>>       rdma = qatomic_rcu_read(&rioc->rdmaout);
>>   
>>       if (!rdma) {
>> -        return -EIO;
>> +        return -1;
>>       }
>>   
>> -    ret = check_error_state(rdma);
>> -    if (ret) {
>> -        return ret;
>> +    if (check_error_state(rdma)) {
>> +        return -1;
>>       }
>>   
>>       qemu_fflush(f);
>> @@ -3290,9 +3289,10 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
>>       }
>>   
>>       return RAM_SAVE_CONTROL_DELAYED;
>> +
>>   err:
>>       rdma->error_state = ret;
>> -    return ret;
>> +    return -1;
>>   }
>>   
>>   static void rdma_accept_incoming_migration(void *opaque);
>> @@ -3538,12 +3538,11 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>>       rdma = qatomic_rcu_read(&rioc->rdmain);
>>   
>>       if (!rdma) {
>> -        return -EIO;
>> +        return -1;
>
> that's because EIO is not accurate here ?

It's because the function does not return a negative errno code, and
returning -EIO is misleading readers into assuming it does.

>>       }
>>   
>> -    ret = check_error_state(rdma);
>> -    if (ret) {
>> -        return ret;
>
> Ditto

Likewise.

[...]
Re: [PATCH 21/52] migration/rdma: Fix QEMUFileHooks method return values
Posted by Zhijian Li (Fujitsu) 11 months, 2 weeks ago

On 25/09/2023 14:36, Markus Armbruster wrote:
> "Zhijian Li (Fujitsu)" <lizhijian@fujitsu.com> writes:
> 
>> On 18/09/2023 22:41, Markus Armbruster wrote:
>>> The QEMUFileHooks methods don't come with a written contract.  Digging
>>> through the code calling them, we find:
>>>
>>> * save_page():
>>
>> I'm fine with this
>>
>>>
>>>     Negative values RAM_SAVE_CONTROL_DELAYED and
>>>     RAM_SAVE_CONTROL_NOT_SUPP are special.  Any other negative value is
>>>     an unspecified error.
>>>
>>>     qemu_rdma_save_page() returns -EIO or rdma->error_state on error.  I
>>>     believe the latter is always negative.  Nothing stops either of them
>>>     to clash with the special values, though.  Feels unlikely, but fix
>>>     it anyway to return only the special values and -1.
>>>
>>> * before_ram_iterate(), before_ram_iterate():
>>
>> error code returned by before_ram_iterate() and before_ram_iterate() will be
>> assigned to QEMUFile for upper layer.
>> But it seems that no callers take care about the error ?  Shouldn't let the callers
>> to check the error instead ?
> 
> The error values returned by qemu_rdma_registration_start() and
> qemu_rdma_registration_stop() carry no additional information a caller
> could check.


I think qemu_file_get_error(f) can be used for callers to check the error code.



> 
> Both return either -EIO or rdma->error_state on error.  The latter is
> *not* a negative errno code.  Evidence: qio_channel_rdma_shutdown()
> assigns -1, qemu_rdma_block_for_wrid() assigns the error value of
> qemu_rdma_poll(), which can be the error value of ibv_poll_cq(), which
> is an unspecified negative value, ...
> 
you are right.


> I decided not to investigate what qemu-file.c does with the error values
> after one quick glance at the code.  It's confusing, and quite possibly
> confused.  But I'm already at 50+ patches, and am neither inclined nor
> able to take on more migration cleanup at this time.

Yeah, it's already big enough patch set.

very thanks

Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>


> 
>>>     Negative value means error.  qemu_rdma_registration_start() and
>>>     qemu_rdma_registration_stop() comply as far as I can tell.  Make
>>>     them comply *obviously*, by returning -1 on error.
>>>
>>> * hook_ram_load:
>>>
>>>     Negative value means error.  rdma_load_hook() already returns -1 on
>>>     error.  Leave it alone.
>>
>> see inline reply below,
>>
>>>
>>> Signed-off-by: Markus Armbruster <armbru@redhat.com>
>>> ---
>>>    migration/rdma.c | 79 +++++++++++++++++++++++-------------------------
>>>    1 file changed, 37 insertions(+), 42 deletions(-)
>>>
>>> diff --git a/migration/rdma.c b/migration/rdma.c
>>> index cc59155a50..46b5859268 100644
>>> --- a/migration/rdma.c
>>> +++ b/migration/rdma.c
>>> @@ -3219,12 +3219,11 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
>>>        rdma = qatomic_rcu_read(&rioc->rdmaout);
>>>    
>>>        if (!rdma) {
>>> -        return -EIO;
>>> +        return -1;
>>>        }
>>>    
>>> -    ret = check_error_state(rdma);
>>> -    if (ret) {
>>> -        return ret;
>>> +    if (check_error_state(rdma)) {
>>> +        return -1;
>>>        }
>>>    
>>>        qemu_fflush(f);
>>> @@ -3290,9 +3289,10 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
>>>        }
>>>    
>>>        return RAM_SAVE_CONTROL_DELAYED;
>>> +
>>>    err:
>>>        rdma->error_state = ret;
>>> -    return ret;
>>> +    return -1;
>>>    }
>>>    
>>>    static void rdma_accept_incoming_migration(void *opaque);
>>> @@ -3538,12 +3538,11 @@ static int qemu_rdma_registration_handle(QEMUFile *f)
>>>        rdma = qatomic_rcu_read(&rioc->rdmain);
>>>    
>>>        if (!rdma) {
>>> -        return -EIO;
>>> +        return -1;
>>
>> that's because EIO is not accurate here ?
> 
> It's because the function does not return a negative errno code, and
> returning -EIO is misleading readers into assuming it does
> 
>>>        }
>>>    
>>> -    ret = check_error_state(rdma);
>>> -    if (ret) {
>>> -        return ret;
>>
>> Ditto
> 
> Likewise.
> 
> [...]
>