[PATCH 43/52] migration/rdma: Convert qemu_rdma_alloc_pd_cq() to Error

Markus Armbruster posted 52 patches 11 months, 3 weeks ago
Maintainers: Juan Quintela <quintela@redhat.com>, Peter Xu <peterx@redhat.com>, Leonardo Bras <leobras@redhat.com>
There is a newer version of this series
[PATCH 43/52] migration/rdma: Convert qemu_rdma_alloc_pd_cq() to Error
Posted by Markus Armbruster 11 months, 3 weeks ago
Functions that use an Error **errp parameter to return errors should
not also report them to the user, because reporting is the caller's
job.  When the caller does, the error is reported twice.  When it
doesn't (because it recovered from the error), there is no error to
report, i.e. the report is bogus.

qemu_rdma_source_init() violates this principle: it calls
error_report() via qemu_rdma_alloc_pd_cq().  I elected not to
investigate how callers handle the error, i.e. precise impact is not
known.

Clean this up by converting qemu_rdma_alloc_pd_cq() to Error.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 migration/rdma.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index a727aa35d1..41f0ae4ddb 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -1046,19 +1046,19 @@ err_resolve_create_id:
 /*
  * Create protection domain and completion queues
  */
-static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma)
+static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma, Error **errp)
 {
     /* allocate pd */
     rdma->pd = ibv_alloc_pd(rdma->verbs);
     if (!rdma->pd) {
-        error_report("failed to allocate protection domain");
+        error_setg(errp, "failed to allocate protection domain");
         return -1;
     }
 
     /* create receive completion channel */
     rdma->recv_comp_channel = ibv_create_comp_channel(rdma->verbs);
     if (!rdma->recv_comp_channel) {
-        error_report("failed to allocate receive completion channel");
+        error_setg(errp, "failed to allocate receive completion channel");
         goto err_alloc_pd_cq;
     }
 
@@ -1068,21 +1068,21 @@ static int qemu_rdma_alloc_pd_cq(RDMAContext *rdma)
     rdma->recv_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3),
                                   NULL, rdma->recv_comp_channel, 0);
     if (!rdma->recv_cq) {
-        error_report("failed to allocate receive completion queue");
+        error_setg(errp, "failed to allocate receive completion queue");
         goto err_alloc_pd_cq;
     }
 
     /* create send completion channel */
     rdma->send_comp_channel = ibv_create_comp_channel(rdma->verbs);
     if (!rdma->send_comp_channel) {
-        error_report("failed to allocate send completion channel");
+        error_setg(errp, "failed to allocate send completion channel");
         goto err_alloc_pd_cq;
     }
 
     rdma->send_cq = ibv_create_cq(rdma->verbs, (RDMA_SIGNALED_SEND_MAX * 3),
                                   NULL, rdma->send_comp_channel, 0);
     if (!rdma->send_cq) {
-        error_report("failed to allocate send completion queue");
+        error_setg(errp, "failed to allocate send completion queue");
         goto err_alloc_pd_cq;
     }
 
@@ -2451,6 +2451,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
 
 static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp)
 {
+    ERRP_GUARD();
     int ret, idx;
 
     /*
@@ -2464,12 +2465,12 @@ static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp)
         goto err_rdma_source_init;
     }
 
-    ret = qemu_rdma_alloc_pd_cq(rdma);
+    ret = qemu_rdma_alloc_pd_cq(rdma, errp);
     if (ret < 0) {
-        error_setg(errp, "RDMA ERROR: "
-                   "rdma migration: error allocating pd and cq! Your mlock()"
-                   " limits may be too low. Please check $ ulimit -a # and "
-                   "search for 'ulimit -l' in the output");
+        error_append_hint(errp,
+                          "Your mlock() limits may be too low. "
+                          "Please check $ ulimit -a # and "
+                          "search for 'ulimit -l' in the output\n");
         goto err_rdma_source_init;
     }
 
@@ -3450,9 +3451,9 @@ static int qemu_rdma_accept(RDMAContext *rdma)
 
     qemu_rdma_dump_id("dest_init", verbs);
 
-    ret = qemu_rdma_alloc_pd_cq(rdma);
+    ret = qemu_rdma_alloc_pd_cq(rdma, &err);
     if (ret < 0) {
-        error_report("rdma migration: error allocating pd and cq!");
+        error_report_err(err);
         goto err_rdma_dest_wait;
     }
 
-- 
2.41.0
Re: [PATCH 43/52] migration/rdma: Convert qemu_rdma_alloc_pd_cq() to Error
Posted by Zhijian Li (Fujitsu) 11 months, 2 weeks ago

On 18/09/2023 22:41, Markus Armbruster wrote:
> Functions that use an Error **errp parameter to return errors should
> not also report them to the user, because reporting is the caller's
> job.  When the caller does, the error is reported twice.  When it
> doesn't (because it recovered from the error), there is no error to
> report, i.e. the report is bogus.
> 
> qemu_rdma_source_init() violates this principle: it calls
> error_report() via qemu_rdma_alloc_pd_cq().  I elected not to
> investigate how callers handle the error, i.e. precise impact is not
> known.
> 
> Clean this up by converting qemu_rdma_alloc_pd_cq() to Error.
> 
> Signed-off-by: Markus Armbruster<armbru@redhat.com>
> ---
>   migration/rdma.c | 27 ++++++++++++++-------------
>   1 file changed, 14 insertions(+), 13 deletions(-)

[...]


> @@ -2451,6 +2451,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
>   
>   static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp)
>   {
> +    ERRP_GUARD();
>       int ret, idx;
>   
>       /*
> @@ -2464,12 +2465,12 @@ static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp)
>           goto err_rdma_source_init;
>       }
>   
> -    ret = qemu_rdma_alloc_pd_cq(rdma);
> +    ret = qemu_rdma_alloc_pd_cq(rdma, errp);
>       if (ret < 0) {
> -        error_setg(errp, "RDMA ERROR: "
> -                   "rdma migration: error allocating pd and cq! Your mlock()"
> -                   " limits may be too low. Please check $ ulimit -a # and "
> -                   "search for 'ulimit -l' in the output");
> +        error_append_hint(errp,
> +                          "Your mlock() limits may be too low. "
> +                          "Please check $ ulimit -a # and "
> +                          "search for 'ulimit -l' in the output\n");


I think we could freely remove this error message as well, it may neither a exact resolution
nor some one will take care. Just report the error qemu_rdma_alloc_pd_cq() tell us.

Anyway

Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>


>           goto err_rdma_source_init;
>       }
Re: [PATCH 43/52] migration/rdma: Convert qemu_rdma_alloc_pd_cq() to Error
Posted by Markus Armbruster 11 months, 2 weeks ago
"Zhijian Li (Fujitsu)" <lizhijian@fujitsu.com> writes:

> On 18/09/2023 22:41, Markus Armbruster wrote:
>> Functions that use an Error **errp parameter to return errors should
>> not also report them to the user, because reporting is the caller's
>> job.  When the caller does, the error is reported twice.  When it
>> doesn't (because it recovered from the error), there is no error to
>> report, i.e. the report is bogus.
>> 
>> qemu_rdma_source_init() violates this principle: it calls
>> error_report() via qemu_rdma_alloc_pd_cq().  I elected not to
>> investigate how callers handle the error, i.e. precise impact is not
>> known.
>> 
>> Clean this up by converting qemu_rdma_alloc_pd_cq() to Error.
>> 
>> Signed-off-by: Markus Armbruster<armbru@redhat.com>
>> ---
>>   migration/rdma.c | 27 ++++++++++++++-------------
>>   1 file changed, 14 insertions(+), 13 deletions(-)
>
> [...]
>
>
>> @@ -2451,6 +2451,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
>>   
>>   static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp)
>>   {
>> +    ERRP_GUARD();
>>       int ret, idx;
>>   
>>       /*
>> @@ -2464,12 +2465,12 @@ static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp)
>>           goto err_rdma_source_init;
>>       }
>>   
>> -    ret = qemu_rdma_alloc_pd_cq(rdma);
>> +    ret = qemu_rdma_alloc_pd_cq(rdma, errp);
>>       if (ret < 0) {
>> -        error_setg(errp, "RDMA ERROR: "
>> -                   "rdma migration: error allocating pd and cq! Your mlock()"
>> -                   " limits may be too low. Please check $ ulimit -a # and "
>> -                   "search for 'ulimit -l' in the output");
>> +        error_append_hint(errp,
>> +                          "Your mlock() limits may be too low. "
>> +                          "Please check $ ulimit -a # and "
>> +                          "search for 'ulimit -l' in the output\n");
>
>
> I think we could freely remove this error message as well, it may neither a exact resolution
> nor some one will take care. Just report the error qemu_rdma_alloc_pd_cq() tell us.

Double-checking: you recommend to drop error_append_hint()?

> Anyway
>
> Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
>
>
>>           goto err_rdma_source_init;
>>       }

Thanks!
Re: [PATCH 43/52] migration/rdma: Convert qemu_rdma_alloc_pd_cq() to Error
Posted by Zhijian Li (Fujitsu) 11 months, 2 weeks ago

On 26/09/2023 14:41, Markus Armbruster wrote:
> "Zhijian Li (Fujitsu)" <lizhijian@fujitsu.com> writes:
> 
>> On 18/09/2023 22:41, Markus Armbruster wrote:
>>> Functions that use an Error **errp parameter to return errors should
>>> not also report them to the user, because reporting is the caller's
>>> job.  When the caller does, the error is reported twice.  When it
>>> doesn't (because it recovered from the error), there is no error to
>>> report, i.e. the report is bogus.
>>>
>>> qemu_rdma_source_init() violates this principle: it calls
>>> error_report() via qemu_rdma_alloc_pd_cq().  I elected not to
>>> investigate how callers handle the error, i.e. precise impact is not
>>> known.
>>>
>>> Clean this up by converting qemu_rdma_alloc_pd_cq() to Error.
>>>
>>> Signed-off-by: Markus Armbruster<armbru@redhat.com>
>>> ---
>>>    migration/rdma.c | 27 ++++++++++++++-------------
>>>    1 file changed, 14 insertions(+), 13 deletions(-)
>>
>> [...]
>>
>>
>>> @@ -2451,6 +2451,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
>>>    
>>>    static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp)
>>>    {
>>> +    ERRP_GUARD();
>>>        int ret, idx;
>>>    
>>>        /*
>>> @@ -2464,12 +2465,12 @@ static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp)
>>>            goto err_rdma_source_init;
>>>        }
>>>    
>>> -    ret = qemu_rdma_alloc_pd_cq(rdma);
>>> +    ret = qemu_rdma_alloc_pd_cq(rdma, errp);
>>>        if (ret < 0) {
>>> -        error_setg(errp, "RDMA ERROR: "
>>> -                   "rdma migration: error allocating pd and cq! Your mlock()"
>>> -                   " limits may be too low. Please check $ ulimit -a # and "
>>> -                   "search for 'ulimit -l' in the output");
>>> +        error_append_hint(errp,
>>> +                          "Your mlock() limits may be too low. "
>>> +                          "Please check $ ulimit -a # and "
>>> +                          "search for 'ulimit -l' in the output\n");
>>
>>
>> I think we could freely remove this error message as well, it may neither a exact resolution
>> nor some one will take care. Just report the error qemu_rdma_alloc_pd_cq() tell us.
> 
> Double-checking: you recommend to drop error_append_hint()?


Yes





> 
>> Anyway
>>
>> Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
>>
>>
>>>            goto err_rdma_source_init;
>>>        }
> 
> Thanks!
>