[Qemu-devel] [PATCH 1/5] Fix segmentation fault when qemu_signal_init fails

Fei Li posted 5 patches 7 years, 2 months ago
There is a newer version of this series
[Qemu-devel] [PATCH 1/5] Fix segmentation fault when qemu_signal_init fails
Posted by Fei Li 7 years, 2 months ago
Currently, when qemu_signal_init() fails it only returns a non-zero
value but without propagating any Error. But its callers need a
non-null err when runs error_report_err(err), or else 0->msg occurs.

To avoid such segmentation fault, add a new Error parameter to make
the call trace to propagate the err to the final caller.

Signed-off-by: Fei Li <fli@suse.com>
---
 include/qemu/osdep.h |  2 +-
 util/compatfd.c      | 17 ++++++++++++-----
 util/main-loop.c     | 11 +++++++----
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index a91068df0e..09ed85fcb8 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -421,7 +421,7 @@ struct qemu_signalfd_siginfo {
                              additional fields in the future) */
 };
 
-int qemu_signalfd(const sigset_t *mask);
+int qemu_signalfd(const sigset_t *mask, Error **errp);
 void sigaction_invoke(struct sigaction *action,
                       struct qemu_signalfd_siginfo *info);
 #endif
diff --git a/util/compatfd.c b/util/compatfd.c
index 980bd33e52..65501de622 100644
--- a/util/compatfd.c
+++ b/util/compatfd.c
@@ -16,6 +16,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/thread.h"
+#include "qapi/error.h"
 
 #include <sys/syscall.h>
 
@@ -65,7 +66,7 @@ static void *sigwait_compat(void *opaque)
     }
 }
 
-static int qemu_signalfd_compat(const sigset_t *mask)
+static int qemu_signalfd_compat(const sigset_t *mask, Error **errp)
 {
     struct sigfd_compat_info *info;
     QemuThread thread;
@@ -73,11 +74,13 @@ static int qemu_signalfd_compat(const sigset_t *mask)
 
     info = malloc(sizeof(*info));
     if (info == NULL) {
+        error_setg(errp, "Failed to malloc in %s", __func__);
         errno = ENOMEM;
         return -1;
     }
 
     if (pipe(fds) == -1) {
+        error_setg(errp, "Failed to create a pipe in %s", __func__);
         free(info);
         return -1;
     }
@@ -94,17 +97,21 @@ static int qemu_signalfd_compat(const sigset_t *mask)
     return fds[0];
 }
 
-int qemu_signalfd(const sigset_t *mask)
+int qemu_signalfd(const sigset_t *mask, Error **errp)
 {
-#if defined(CONFIG_SIGNALFD)
     int ret;
+    Error *local_err = NULL;
 
+#if defined(CONFIG_SIGNALFD)
     ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
     if (ret != -1) {
         qemu_set_cloexec(ret);
         return ret;
     }
 #endif
-
-    return qemu_signalfd_compat(mask);
+    ret = qemu_signalfd_compat(mask, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+    }
+    return ret;
 }
diff --git a/util/main-loop.c b/util/main-loop.c
index affe0403c5..20f6ad3849 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -71,10 +71,11 @@ static void sigfd_handler(void *opaque)
     }
 }
 
-static int qemu_signal_init(void)
+static int qemu_signal_init(Error **errp)
 {
     int sigfd;
     sigset_t set;
+    Error *local_err = NULL;
 
     /*
      * SIG_IPI must be blocked in the main thread and must not be caught
@@ -94,9 +95,10 @@ static int qemu_signal_init(void)
     pthread_sigmask(SIG_BLOCK, &set, NULL);
 
     sigdelset(&set, SIG_IPI);
-    sigfd = qemu_signalfd(&set);
+    sigfd = qemu_signalfd(&set, &local_err);
     if (sigfd == -1) {
         fprintf(stderr, "failed to create signalfd\n");
+        error_propagate(errp, local_err);
         return -errno;
     }
 
@@ -109,7 +111,7 @@ static int qemu_signal_init(void)
 
 #else /* _WIN32 */
 
-static int qemu_signal_init(void)
+static int qemu_signal_init(Error **errp)
 {
     return 0;
 }
@@ -148,8 +150,9 @@ int qemu_init_main_loop(Error **errp)
 
     init_clocks(qemu_timer_notify_cb);
 
-    ret = qemu_signal_init();
+    ret = qemu_signal_init(&local_error);
     if (ret) {
+        error_propagate(errp, local_error);
         return ret;
     }
 
-- 
2.13.7


Re: [Qemu-devel] [PATCH 1/5] Fix segmentation fault when qemu_signal_init fails
Posted by Daniel P. Berrangé 7 years, 2 months ago
On Tue, Sep 04, 2018 at 07:08:18PM +0800, Fei Li wrote:
> Currently, when qemu_signal_init() fails it only returns a non-zero
> value but without propagating any Error. But its callers need a
> non-null err when runs error_report_err(err), or else 0->msg occurs.
> 
> To avoid such segmentation fault, add a new Error parameter to make
> the call trace to propagate the err to the final caller.
> 
> Signed-off-by: Fei Li <fli@suse.com>
> ---
>  include/qemu/osdep.h |  2 +-
>  util/compatfd.c      | 17 ++++++++++++-----
>  util/main-loop.c     | 11 +++++++----
>  3 files changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> index a91068df0e..09ed85fcb8 100644
> --- a/include/qemu/osdep.h
> +++ b/include/qemu/osdep.h
> @@ -421,7 +421,7 @@ struct qemu_signalfd_siginfo {
>                               additional fields in the future) */
>  };
>  
> -int qemu_signalfd(const sigset_t *mask);
> +int qemu_signalfd(const sigset_t *mask, Error **errp);
>  void sigaction_invoke(struct sigaction *action,
>                        struct qemu_signalfd_siginfo *info);
>  #endif
> diff --git a/util/compatfd.c b/util/compatfd.c
> index 980bd33e52..65501de622 100644
> --- a/util/compatfd.c
> +++ b/util/compatfd.c
> @@ -16,6 +16,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu-common.h"
>  #include "qemu/thread.h"
> +#include "qapi/error.h"
>  
>  #include <sys/syscall.h>
>  
> @@ -65,7 +66,7 @@ static void *sigwait_compat(void *opaque)
>      }
>  }
>  
> -static int qemu_signalfd_compat(const sigset_t *mask)
> +static int qemu_signalfd_compat(const sigset_t *mask, Error **errp)
>  {
>      struct sigfd_compat_info *info;
>      QemuThread thread;
> @@ -73,11 +74,13 @@ static int qemu_signalfd_compat(const sigset_t *mask)
>  
>      info = malloc(sizeof(*info));
>      if (info == NULL) {
> +        error_setg(errp, "Failed to malloc in %s", __func__);

I'd avoid using __func__ in error messages. Instead

   "Failed to allocate signalfd memory"

>          errno = ENOMEM;
>          return -1;
>      }
>  
>      if (pipe(fds) == -1) {
> +        error_setg(errp, "Failed to create a pipe in %s", __func__);

    "Failed to create signalfd pipe"

>          free(info);
>          return -1;
>      }
> @@ -94,17 +97,21 @@ static int qemu_signalfd_compat(const sigset_t *mask)
>      return fds[0];
>  }
>  
> -int qemu_signalfd(const sigset_t *mask)
> +int qemu_signalfd(const sigset_t *mask, Error **errp)
>  {
> -#if defined(CONFIG_SIGNALFD)
>      int ret;
> +    Error *local_err = NULL;
>  
> +#if defined(CONFIG_SIGNALFD)
>      ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
>      if (ret != -1) {
>          qemu_set_cloexec(ret);
>          return ret;
>      }
>  #endif
> -
> -    return qemu_signalfd_compat(mask);
> +    ret = qemu_signalfd_compat(mask, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +    }

Using a local_err is not required - you can just pass errp stright
to qemu_signalfd_compat() and then check

   if (ret < 0)

> +    return ret;
>  }
> diff --git a/util/main-loop.c b/util/main-loop.c
> index affe0403c5..20f6ad3849 100644
> --- a/util/main-loop.c
> +++ b/util/main-loop.c
> @@ -71,10 +71,11 @@ static void sigfd_handler(void *opaque)
>      }
>  }
>  
> -static int qemu_signal_init(void)
> +static int qemu_signal_init(Error **errp)
>  {
>      int sigfd;
>      sigset_t set;
> +    Error *local_err = NULL;
>  
>      /*
>       * SIG_IPI must be blocked in the main thread and must not be caught
> @@ -94,9 +95,10 @@ static int qemu_signal_init(void)
>      pthread_sigmask(SIG_BLOCK, &set, NULL);
>  
>      sigdelset(&set, SIG_IPI);
> -    sigfd = qemu_signalfd(&set);
> +    sigfd = qemu_signalfd(&set, &local_err);
>      if (sigfd == -1) {
>          fprintf(stderr, "failed to create signalfd\n");
> +        error_propagate(errp, local_err);
>          return -errno;
>      }

Again, no need for local_err - just pass errp stright in

>  
> @@ -109,7 +111,7 @@ static int qemu_signal_init(void)
>  
>  #else /* _WIN32 */
>  
> -static int qemu_signal_init(void)
> +static int qemu_signal_init(Error **errp)
>  {
>      return 0;
>  }
> @@ -148,8 +150,9 @@ int qemu_init_main_loop(Error **errp)
>  
>      init_clocks(qemu_timer_notify_cb);
>  
> -    ret = qemu_signal_init();
> +    ret = qemu_signal_init(&local_error);
>      if (ret) {
> +        error_propagate(errp, local_error);
>          return ret;
>      }

Again, no need for local_error.


Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Re: [Qemu-devel] [PATCH 1/5] Fix segmentation fault when qemu_signal_init fails
Posted by Fei Li 7 years, 2 months ago
Thanks for the review! :)


On 09/04/2018 07:26 PM, Daniel P. Berrangé wrote:
> On Tue, Sep 04, 2018 at 07:08:18PM +0800, Fei Li wrote:
>> Currently, when qemu_signal_init() fails it only returns a non-zero
>> value but without propagating any Error. But its callers need a
>> non-null err when runs error_report_err(err), or else 0->msg occurs.
>>
>> To avoid such segmentation fault, add a new Error parameter to make
>> the call trace to propagate the err to the final caller.
>>
>> Signed-off-by: Fei Li <fli@suse.com>
>> ---
>>   include/qemu/osdep.h |  2 +-
>>   util/compatfd.c      | 17 ++++++++++++-----
>>   util/main-loop.c     | 11 +++++++----
>>   3 files changed, 20 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
>> index a91068df0e..09ed85fcb8 100644
>> --- a/include/qemu/osdep.h
>> +++ b/include/qemu/osdep.h
>> @@ -421,7 +421,7 @@ struct qemu_signalfd_siginfo {
>>                                additional fields in the future) */
>>   };
>>   
>> -int qemu_signalfd(const sigset_t *mask);
>> +int qemu_signalfd(const sigset_t *mask, Error **errp);
>>   void sigaction_invoke(struct sigaction *action,
>>                         struct qemu_signalfd_siginfo *info);
>>   #endif
>> diff --git a/util/compatfd.c b/util/compatfd.c
>> index 980bd33e52..65501de622 100644
>> --- a/util/compatfd.c
>> +++ b/util/compatfd.c
>> @@ -16,6 +16,7 @@
>>   #include "qemu/osdep.h"
>>   #include "qemu-common.h"
>>   #include "qemu/thread.h"
>> +#include "qapi/error.h"
>>   
>>   #include <sys/syscall.h>
>>   
>> @@ -65,7 +66,7 @@ static void *sigwait_compat(void *opaque)
>>       }
>>   }
>>   
>> -static int qemu_signalfd_compat(const sigset_t *mask)
>> +static int qemu_signalfd_compat(const sigset_t *mask, Error **errp)
>>   {
>>       struct sigfd_compat_info *info;
>>       QemuThread thread;
>> @@ -73,11 +74,13 @@ static int qemu_signalfd_compat(const sigset_t *mask)
>>   
>>       info = malloc(sizeof(*info));
>>       if (info == NULL) {
>> +        error_setg(errp, "Failed to malloc in %s", __func__);
> I'd avoid using __func__ in error messages. Instead
>
>     "Failed to allocate signalfd memory"
Ok, thanks.
>
>>           errno = ENOMEM;
>>           return -1;
>>       }
>>   
>>       if (pipe(fds) == -1) {
>> +        error_setg(errp, "Failed to create a pipe in %s", __func__);
>      "Failed to create signalfd pipe"
OK.
>
>>           free(info);
>>           return -1;
>>       }
>> @@ -94,17 +97,21 @@ static int qemu_signalfd_compat(const sigset_t *mask)
>>       return fds[0];
>>   }
>>   
>> -int qemu_signalfd(const sigset_t *mask)
>> +int qemu_signalfd(const sigset_t *mask, Error **errp)
>>   {
>> -#if defined(CONFIG_SIGNALFD)
>>       int ret;
>> +    Error *local_err = NULL;
>>   
>> +#if defined(CONFIG_SIGNALFD)
>>       ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
>>       if (ret != -1) {
>>           qemu_set_cloexec(ret);
>>           return ret;
>>       }
>>   #endif
>> -
>> -    return qemu_signalfd_compat(mask);
>> +    ret = qemu_signalfd_compat(mask, &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +    }
> Using a local_err is not required - you can just pass errp stright
> to qemu_signalfd_compat() and then check
>
>     if (ret < 0)
For the use of a local error object & error_propagate call, I'd like to 
explain here. :)
In our code, the initial caller passes two kinds of Error to the call 
trace, one is
something like &error_abort and &error_fatal, the other is NULL.

For the former, the exit() occurs in the functions where 
error_handle_fatal() is called
(e.g. called by error_propagate/error_setg/...). The patch3: 
qemu_init_vcpu is the case,
that means the system will exit in the final callee: 
qemu_thread_create(), instead of
the initial caller pc_new_cpu(). In such case, I think propagating seems 
more reasonable.

For the latter, suppose we pass errp straightly. As NULL is passed, the 
error occurs in the
final callee will automatically "propagating" to the initial caller, and 
let it handle this error.
The patch1: qemu_signal_init is this case, let the caller of 
qemu_init_main_loop handle.
In such case, the error_propagate is indeed needless.

How do you think passing errp straightly for the latter case, and use a 
local error object &
error_propagate for the former case? This is a distinct treatment, but 
would shorten the code.

Have a nice day, thanks again
Fei
>> +    return ret;
>>   }
>> diff --git a/util/main-loop.c b/util/main-loop.c
>> index affe0403c5..20f6ad3849 100644
>> --- a/util/main-loop.c
>> +++ b/util/main-loop.c
>> @@ -71,10 +71,11 @@ static void sigfd_handler(void *opaque)
>>       }
>>   }
>>   
>> -static int qemu_signal_init(void)
>> +static int qemu_signal_init(Error **errp)
>>   {
>>       int sigfd;
>>       sigset_t set;
>> +    Error *local_err = NULL;
>>   
>>       /*
>>        * SIG_IPI must be blocked in the main thread and must not be caught
>> @@ -94,9 +95,10 @@ static int qemu_signal_init(void)
>>       pthread_sigmask(SIG_BLOCK, &set, NULL);
>>   
>>       sigdelset(&set, SIG_IPI);
>> -    sigfd = qemu_signalfd(&set);
>> +    sigfd = qemu_signalfd(&set, &local_err);
>>       if (sigfd == -1) {
>>           fprintf(stderr, "failed to create signalfd\n");
>> +        error_propagate(errp, local_err);
>>           return -errno;
>>       }
> Again, no need for local_err - just pass errp stright in
>
>>   
>> @@ -109,7 +111,7 @@ static int qemu_signal_init(void)
>>   
>>   #else /* _WIN32 */
>>   
>> -static int qemu_signal_init(void)
>> +static int qemu_signal_init(Error **errp)
>>   {
>>       return 0;
>>   }
>> @@ -148,8 +150,9 @@ int qemu_init_main_loop(Error **errp)
>>   
>>       init_clocks(qemu_timer_notify_cb);
>>   
>> -    ret = qemu_signal_init();
>> +    ret = qemu_signal_init(&local_error);
>>       if (ret) {
>> +        error_propagate(errp, local_error);
>>           return ret;
>>       }
> Again, no need for local_error.
>
>
> Regards,
> Daniel


Re: [Qemu-devel] [PATCH 1/5] Fix segmentation fault when qemu_signal_init fails
Posted by Daniel P. Berrangé 7 years, 2 months ago
On Wed, Sep 05, 2018 at 12:17:24PM +0800, Fei Li wrote:
> Thanks for the review! :)
> 
> 
> On 09/04/2018 07:26 PM, Daniel P. Berrangé wrote:
> > On Tue, Sep 04, 2018 at 07:08:18PM +0800, Fei Li wrote:
> > > Currently, when qemu_signal_init() fails it only returns a non-zero
> > > value but without propagating any Error. But its callers need a
> > > non-null err when runs error_report_err(err), or else 0->msg occurs.
> > > 
> > > To avoid such segmentation fault, add a new Error parameter to make
> > > the call trace to propagate the err to the final caller.
> > > 
> > > Signed-off-by: Fei Li <fli@suse.com>
> > > ---
> > >   include/qemu/osdep.h |  2 +-
> > >   util/compatfd.c      | 17 ++++++++++++-----
> > >   util/main-loop.c     | 11 +++++++----
> > >   3 files changed, 20 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> > > index a91068df0e..09ed85fcb8 100644
> > > --- a/include/qemu/osdep.h
> > > +++ b/include/qemu/osdep.h
> > > @@ -421,7 +421,7 @@ struct qemu_signalfd_siginfo {
> > >                                additional fields in the future) */
> > >   };
> > > -int qemu_signalfd(const sigset_t *mask);
> > > +int qemu_signalfd(const sigset_t *mask, Error **errp);
> > >   void sigaction_invoke(struct sigaction *action,
> > >                         struct qemu_signalfd_siginfo *info);
> > >   #endif
> > > diff --git a/util/compatfd.c b/util/compatfd.c
> > > index 980bd33e52..65501de622 100644
> > > --- a/util/compatfd.c
> > > +++ b/util/compatfd.c
> > > @@ -16,6 +16,7 @@
> > >   #include "qemu/osdep.h"
> > >   #include "qemu-common.h"
> > >   #include "qemu/thread.h"
> > > +#include "qapi/error.h"
> > >   #include <sys/syscall.h>
> > > @@ -65,7 +66,7 @@ static void *sigwait_compat(void *opaque)
> > >       }
> > >   }
> > > -static int qemu_signalfd_compat(const sigset_t *mask)
> > > +static int qemu_signalfd_compat(const sigset_t *mask, Error **errp)
> > >   {
> > >       struct sigfd_compat_info *info;
> > >       QemuThread thread;
> > > @@ -73,11 +74,13 @@ static int qemu_signalfd_compat(const sigset_t *mask)
> > >       info = malloc(sizeof(*info));
> > >       if (info == NULL) {
> > > +        error_setg(errp, "Failed to malloc in %s", __func__);
> > I'd avoid using __func__ in error messages. Instead
> > 
> >     "Failed to allocate signalfd memory"
> Ok, thanks.
> > 
> > >           errno = ENOMEM;
> > >           return -1;
> > >       }
> > >       if (pipe(fds) == -1) {
> > > +        error_setg(errp, "Failed to create a pipe in %s", __func__);
> >      "Failed to create signalfd pipe"
> OK.
> > 
> > >           free(info);
> > >           return -1;
> > >       }
> > > @@ -94,17 +97,21 @@ static int qemu_signalfd_compat(const sigset_t *mask)
> > >       return fds[0];
> > >   }
> > > -int qemu_signalfd(const sigset_t *mask)
> > > +int qemu_signalfd(const sigset_t *mask, Error **errp)
> > >   {
> > > -#if defined(CONFIG_SIGNALFD)
> > >       int ret;
> > > +    Error *local_err = NULL;
> > > +#if defined(CONFIG_SIGNALFD)
> > >       ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
> > >       if (ret != -1) {
> > >           qemu_set_cloexec(ret);
> > >           return ret;
> > >       }
> > >   #endif
> > > -
> > > -    return qemu_signalfd_compat(mask);
> > > +    ret = qemu_signalfd_compat(mask, &local_err);
> > > +    if (local_err) {
> > > +        error_propagate(errp, local_err);
> > > +    }
> > Using a local_err is not required - you can just pass errp stright
> > to qemu_signalfd_compat() and then check
> > 
> >     if (ret < 0)
> For the use of a local error object & error_propagate call, I'd like to
> explain here. :)
> In our code, the initial caller passes two kinds of Error to the call trace,
> one is
> something like &error_abort and &error_fatal, the other is NULL.
> 
> For the former, the exit() occurs in the functions where
> error_handle_fatal() is called
> (e.g. called by error_propagate/error_setg/...). The patch3: qemu_init_vcpu
> is the case,
> that means the system will exit in the final callee: qemu_thread_create(),
> instead of
> the initial caller pc_new_cpu(). In such case, I think propagating seems
> more reasonable.

I don't really agree. It is preferrable to abort immediately at the deepest
place which raises the error. The stack trace will thus show the full call
chain leading upto the problem.

> How do you think passing errp straightly for the latter case, and use a
> local error object &
> error_propagate for the former case? This is a distinct treatment, but would
> shorten the code.

It is inappropriate to second-guess whether the caller is a passing in
NULL or &error_abort, or another Error object. What is passed in can
change at any time in the future. 

We should only ever use a local error where the local method has a need
to look at the error contents before returning to the caller. Any other
case should just use the errp directly.

Regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Re: [Qemu-devel] [PATCH 1/5] Fix segmentation fault when qemu_signal_init fails
Posted by Fei Li 7 years, 2 months ago

On 09/05/2018 04:36 PM, Daniel P. Berrangé wrote:
> On Wed, Sep 05, 2018 at 12:17:24PM +0800, Fei Li wrote:
>> Thanks for the review! :)
>>
>>
>> On 09/04/2018 07:26 PM, Daniel P. Berrangé wrote:
>>> On Tue, Sep 04, 2018 at 07:08:18PM +0800, Fei Li wrote:
>>>
... snip ...
>>>>            free(info);
>>>>            return -1;
>>>>        }
>>>> @@ -94,17 +97,21 @@ static int qemu_signalfd_compat(const sigset_t *mask)
>>>>        return fds[0];
>>>>    }
>>>> -int qemu_signalfd(const sigset_t *mask)
>>>> +int qemu_signalfd(const sigset_t *mask, Error **errp)
>>>>    {
>>>> -#if defined(CONFIG_SIGNALFD)
>>>>        int ret;
>>>> +    Error *local_err = NULL;
>>>> +#if defined(CONFIG_SIGNALFD)
>>>>        ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
>>>>        if (ret != -1) {
>>>>            qemu_set_cloexec(ret);
>>>>            return ret;
>>>>        }
>>>>    #endif
>>>> -
>>>> -    return qemu_signalfd_compat(mask);
>>>> +    ret = qemu_signalfd_compat(mask, &local_err);
>>>> +    if (local_err) {
>>>> +        error_propagate(errp, local_err);
>>>> +    }
>>> Using a local_err is not required - you can just pass errp stright
>>> to qemu_signalfd_compat() and then check
>>>
>>>      if (ret < 0)
>> For the use of a local error object & error_propagate call, I'd like to
>> explain here. :)
>> In our code, the initial caller passes two kinds of Error to the call trace,
>> one is
>> something like &error_abort and &error_fatal, the other is NULL.
>>
>> For the former, the exit() occurs in the functions where
>> error_handle_fatal() is called
>> (e.g. called by error_propagate/error_setg/...). The patch3: qemu_init_vcpu
>> is the case,
>> that means the system will exit in the final callee: qemu_thread_create(),
>> instead of
>> the initial caller pc_new_cpu(). In such case, I think propagating seems
>> more reasonable.
> I don't really agree. It is preferrable to abort immediately at the deepest
> place which raises the error. The stack trace will thus show the full call
> chain leading upto the problem.
Sorry for the above example, it is not exactly correct: for the patch3 
case, the
system will exit in device_set_realized(), where the first 
error_propagate() is called
if we pass errp directly, but not in the final callee.. Sorry for the 
misleading.

For another example, its call trace:
qemu_thread_create(, NULL)
<= iothread_complete(, NULL)
<== user_creatable_complete(, NULL)
<=== object_new_with_propv(, errp)
<==== object_new_with_props(, errp) {... error_propagate(errp, 
local_err); ...}
<===== iothread_create(, &error_abort)
The exit occurs in object_new_with_props where the first error_propagate 
is called.

Either the device_set_realized() or object_new_with_props() is a middle 
caller, thus
we can only see the top half stack trace until where 
error_handle_fatal() is called.

In other words, the exit() occurs neither in the final callee nor the 
initial caller.
Sorry for the misleading example again..
>
>> How do you think passing errp straightly for the latter case, and use a
>> local error object &
>> error_propagate for the former case? This is a distinct treatment, but would
>> shorten the code.
> It is inappropriate to second-guess whether the caller is a passing in
> NULL or &error_abort, or another Error object. What is passed in can
> change at any time in the future.
ok.
>
> We should only ever use a local error where the local method has a need
> to look at the error contents before returning to the caller. Any other
> case should just use the errp directly.
>
> Regards,
> Daniel
Have a nice day, thanks
Fei

Re: [Qemu-devel] [PATCH 1/5] Fix segmentation fault when qemu_signal_init fails
Posted by Fam Zheng 7 years, 2 months ago
On Wed, 09/05 19:20, Fei Li wrote:
> 
> 
> On 09/05/2018 04:36 PM, Daniel P. Berrangé wrote:
> > On Wed, Sep 05, 2018 at 12:17:24PM +0800, Fei Li wrote:
> > > Thanks for the review! :)
> > > 
> > > 
> > > On 09/04/2018 07:26 PM, Daniel P. Berrangé wrote:
> > > > On Tue, Sep 04, 2018 at 07:08:18PM +0800, Fei Li wrote:
> > > > 
> ... snip ...
> > > > >            free(info);
> > > > >            return -1;
> > > > >        }
> > > > > @@ -94,17 +97,21 @@ static int qemu_signalfd_compat(const sigset_t *mask)
> > > > >        return fds[0];
> > > > >    }
> > > > > -int qemu_signalfd(const sigset_t *mask)
> > > > > +int qemu_signalfd(const sigset_t *mask, Error **errp)
> > > > >    {
> > > > > -#if defined(CONFIG_SIGNALFD)
> > > > >        int ret;
> > > > > +    Error *local_err = NULL;
> > > > > +#if defined(CONFIG_SIGNALFD)
> > > > >        ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
> > > > >        if (ret != -1) {
> > > > >            qemu_set_cloexec(ret);
> > > > >            return ret;
> > > > >        }
> > > > >    #endif
> > > > > -
> > > > > -    return qemu_signalfd_compat(mask);
> > > > > +    ret = qemu_signalfd_compat(mask, &local_err);
> > > > > +    if (local_err) {
> > > > > +        error_propagate(errp, local_err);
> > > > > +    }
> > > > Using a local_err is not required - you can just pass errp stright
> > > > to qemu_signalfd_compat() and then check
> > > > 
> > > >      if (ret < 0)
> > > For the use of a local error object & error_propagate call, I'd like to
> > > explain here. :)
> > > In our code, the initial caller passes two kinds of Error to the call trace,
> > > one is
> > > something like &error_abort and &error_fatal, the other is NULL.
> > > 
> > > For the former, the exit() occurs in the functions where
> > > error_handle_fatal() is called
> > > (e.g. called by error_propagate/error_setg/...). The patch3: qemu_init_vcpu
> > > is the case,
> > > that means the system will exit in the final callee: qemu_thread_create(),
> > > instead of
> > > the initial caller pc_new_cpu(). In such case, I think propagating seems
> > > more reasonable.
> > I don't really agree. It is preferrable to abort immediately at the deepest
> > place which raises the error. The stack trace will thus show the full call
> > chain leading upto the problem.
> Sorry for the above example, it is not exactly correct: for the patch3 case,
> the
> system will exit in device_set_realized(), where the first error_propagate()
> is called
> if we pass errp directly, but not in the final callee.. Sorry for the
> misleading.
> 
> For another example, its call trace:
> qemu_thread_create(, NULL)
> <= iothread_complete(, NULL)
> <== user_creatable_complete(, NULL)
> <=== object_new_with_propv(, errp)
> <==== object_new_with_props(, errp) {... error_propagate(errp, local_err);
> ...}
> <===== iothread_create(, &error_abort)
> The exit occurs in object_new_with_props where the first error_propagate is
> called.
> 
> Either the device_set_realized() or object_new_with_props() is a middle
> caller, thus
> we can only see the top half stack trace until where error_handle_fatal() is
> called.
> 
> In other words, the exit() occurs neither in the final callee nor the
> initial caller.
> Sorry for the misleading example again..

This means using error_propagate can potentially lose the final callee in the
error_abort cases. That is why it's preferrable to just pass errp down the
calling chain when possible. The reason why object_new_with_propv uses
error_propagate is because both object_property_add_child and
user_creatable_complete return void, thus cannot flag success/failure to its
caller via their return values. To check whether they succeed,
object_new_with_propv wants a non-NULL err parameter. But like you said, errp
passed to object_new_with_propv may or may not be NULL, so a local_err local
variable is defined to cope with that.

Alternatively it could do this instead:

{

    ...

    if (errp) {
        object_property_add_child(parent, id, obj, errp);
        if (*errp) {
            goto error;
        }
    } else {
        Error *local_err = NULL;
        object_property_add_child(parent, id, obj, &local_err);
        if (local_err) {
            goto error;
        }
    }
    ...

}

This way if error_abort was passed and object_property_add_child failed, the
abort point would be in the innermost function. But this is boilerplate code so
it's not used.

On the contrary, using error_propagate when not necessary also means more lines
of code but gives less info on the call trace when aborted.

So I fully agree with Dan.

Fam

> > 
> > > How do you think passing errp straightly for the latter case, and use a
> > > local error object &
> > > error_propagate for the former case? This is a distinct treatment, but would
> > > shorten the code.
> > It is inappropriate to second-guess whether the caller is a passing in
> > NULL or &error_abort, or another Error object. What is passed in can
> > change at any time in the future.
> ok.
> > 
> > We should only ever use a local error where the local method has a need
> > to look at the error contents before returning to the caller. Any other
> > case should just use the errp directly.
> > 
> > Regards,
> > Daniel
> Have a nice day, thanks
> Fei
> 

Re: [Qemu-devel] [PATCH 1/5] Fix segmentation fault when qemu_signal_init fails
Posted by Fei Li 7 years, 2 months ago

On 09/05/2018 10:38 PM, Fam Zheng wrote:
> On Wed, 09/05 19:20, Fei Li wrote:
>>
>> On 09/05/2018 04:36 PM, Daniel P. Berrangé wrote:
>>> On Wed, Sep 05, 2018 at 12:17:24PM +0800, Fei Li wrote:
>>>> Thanks for the review! :)
>>>>
>>>>
>>>> On 09/04/2018 07:26 PM, Daniel P. Berrangé wrote:
>>>>> On Tue, Sep 04, 2018 at 07:08:18PM +0800, Fei Li wrote:
>>>>>
>> ... snip ...
>>>>>>             free(info);
>>>>>>             return -1;
>>>>>>         }
>>>>>> @@ -94,17 +97,21 @@ static int qemu_signalfd_compat(const sigset_t *mask)
>>>>>>         return fds[0];
>>>>>>     }
>>>>>> -int qemu_signalfd(const sigset_t *mask)
>>>>>> +int qemu_signalfd(const sigset_t *mask, Error **errp)
>>>>>>     {
>>>>>> -#if defined(CONFIG_SIGNALFD)
>>>>>>         int ret;
>>>>>> +    Error *local_err = NULL;
>>>>>> +#if defined(CONFIG_SIGNALFD)
>>>>>>         ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
>>>>>>         if (ret != -1) {
>>>>>>             qemu_set_cloexec(ret);
>>>>>>             return ret;
>>>>>>         }
>>>>>>     #endif
>>>>>> -
>>>>>> -    return qemu_signalfd_compat(mask);
>>>>>> +    ret = qemu_signalfd_compat(mask, &local_err);
>>>>>> +    if (local_err) {
>>>>>> +        error_propagate(errp, local_err);
>>>>>> +    }
>>>>> Using a local_err is not required - you can just pass errp stright
>>>>> to qemu_signalfd_compat() and then check
>>>>>
>>>>>       if (ret < 0)
>>>> For the use of a local error object & error_propagate call, I'd like to
>>>> explain here. :)
>>>> In our code, the initial caller passes two kinds of Error to the call trace,
>>>> one is
>>>> something like &error_abort and &error_fatal, the other is NULL.
>>>>
>>>> For the former, the exit() occurs in the functions where
>>>> error_handle_fatal() is called
>>>> (e.g. called by error_propagate/error_setg/...). The patch3: qemu_init_vcpu
>>>> is the case,
>>>> that means the system will exit in the final callee: qemu_thread_create(),
>>>> instead of
>>>> the initial caller pc_new_cpu(). In such case, I think propagating seems
>>>> more reasonable.
>>> I don't really agree. It is preferrable to abort immediately at the deepest
>>> place which raises the error. The stack trace will thus show the full call
>>> chain leading upto the problem.
>> Sorry for the above example, it is not exactly correct: for the patch3 case,
>> the
>> system will exit in device_set_realized(), where the first error_propagate()
>> is called
>> if we pass errp directly, but not in the final callee.. Sorry for the
>> misleading.
>>
>> For another example, its call trace:
>> qemu_thread_create(, NULL)
>> <= iothread_complete(, NULL)
>> <== user_creatable_complete(, NULL)
>> <=== object_new_with_propv(, errp)
>> <==== object_new_with_props(, errp) {... error_propagate(errp, local_err);
>> ...}
>> <===== iothread_create(, &error_abort)
>> The exit occurs in object_new_with_props where the first error_propagate is
>> called.
>>
>> Either the device_set_realized() or object_new_with_props() is a middle
>> caller, thus
>> we can only see the top half stack trace until where error_handle_fatal() is
>> called.
>>
>> In other words, the exit() occurs neither in the final callee nor the
>> initial caller.
>> Sorry for the misleading example again..
> This means using error_propagate can potentially lose the final callee in the
> error_abort cases. That is why it's preferrable to just pass errp down the
> calling chain when possible. The reason why object_new_with_propv uses
> error_propagate is because both object_property_add_child and
> user_creatable_complete return void, thus cannot flag success/failure to its
> caller via their return values. To check whether they succeed,
> object_new_with_propv wants a non-NULL err parameter. But like you said, errp
> passed to object_new_with_propv may or may not be NULL, so a local_err local
> variable is defined to cope with that.
>
> Alternatively it could do this instead:
>
> {
>
>      ...
>
>      if (errp) {
>          object_property_add_child(parent, id, obj, errp);
>          if (*errp) {
>              goto error;
>          }
>      } else {
>          Error *local_err = NULL;
>          object_property_add_child(parent, id, obj, &local_err);
>          if (local_err) {
>              goto error;
>          }
>      }
>      ...
>
> }
>
> This way if error_abort was passed and object_property_add_child failed, the
> abort point would be in the innermost function. But this is boilerplate code so
> it's not used.
>
> On the contrary, using error_propagate when not necessary also means more lines
> of code but gives less info on the call trace when aborted.
>
> So I fully agree with Dan.
>
> Fam
Got, thanks for the review. :)

Have a nice day
Fei
>>>> How do you think passing errp straightly for the latter case, and use a
>>>> local error object &
>>>> error_propagate for the former case? This is a distinct treatment, but would
>>>> shorten the code.
>>> It is inappropriate to second-guess whether the caller is a passing in
>>> NULL or &error_abort, or another Error object. What is passed in can
>>> change at any time in the future.
>> ok.
>>> We should only ever use a local error where the local method has a need
>>> to look at the error contents before returning to the caller. Any other
>>> case should just use the errp directly.
>>>
>>> Regards,
>>> Daniel
>> Have a nice day, thanks
>> Fei
>>
>


Re: [Qemu-devel] [PATCH 1/5] Fix segmentation fault when qemu_signal_init fails
Posted by Markus Armbruster 7 years, 1 month ago
Fam Zheng <famz@redhat.com> writes:

> On Wed, 09/05 19:20, Fei Li wrote:
>> 
>> 
>> On 09/05/2018 04:36 PM, Daniel P. Berrangé wrote:
>> > On Wed, Sep 05, 2018 at 12:17:24PM +0800, Fei Li wrote:
>> > > Thanks for the review! :)
>> > > 
>> > > 
>> > > On 09/04/2018 07:26 PM, Daniel P. Berrangé wrote:
>> > > > On Tue, Sep 04, 2018 at 07:08:18PM +0800, Fei Li wrote:
>> > > > 
>> ... snip ...
>> > > > >            free(info);
>> > > > >            return -1;
>> > > > >        }
>> > > > > @@ -94,17 +97,21 @@ static int qemu_signalfd_compat(const sigset_t *mask)
>> > > > >        return fds[0];
>> > > > >    }
>> > > > > -int qemu_signalfd(const sigset_t *mask)
>> > > > > +int qemu_signalfd(const sigset_t *mask, Error **errp)
>> > > > >    {
>> > > > > -#if defined(CONFIG_SIGNALFD)
>> > > > >        int ret;
>> > > > > +    Error *local_err = NULL;
>> > > > > +#if defined(CONFIG_SIGNALFD)
>> > > > >        ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
>> > > > >        if (ret != -1) {
>> > > > >            qemu_set_cloexec(ret);
>> > > > >            return ret;
>> > > > >        }
>> > > > >    #endif
>> > > > > -
>> > > > > -    return qemu_signalfd_compat(mask);
>> > > > > +    ret = qemu_signalfd_compat(mask, &local_err);
>> > > > > +    if (local_err) {
>> > > > > +        error_propagate(errp, local_err);
>> > > > > +    }
>> > > > Using a local_err is not required - you can just pass errp stright
>> > > > to qemu_signalfd_compat() and then check
>> > > > 
>> > > >      if (ret < 0)
>> > > For the use of a local error object & error_propagate call, I'd like to
>> > > explain here. :)
>> > > In our code, the initial caller passes two kinds of Error to the call trace,
>> > > one is
>> > > something like &error_abort and &error_fatal, the other is NULL.
>> > > 
>> > > For the former, the exit() occurs in the functions where
>> > > error_handle_fatal() is called
>> > > (e.g. called by error_propagate/error_setg/...). The patch3: qemu_init_vcpu
>> > > is the case,
>> > > that means the system will exit in the final callee: qemu_thread_create(),
>> > > instead of
>> > > the initial caller pc_new_cpu(). In such case, I think propagating seems
>> > > more reasonable.
>> > I don't really agree. It is preferrable to abort immediately at the deepest
>> > place which raises the error. The stack trace will thus show the full call
>> > chain leading upto the problem.
>> Sorry for the above example, it is not exactly correct: for the patch3 case,
>> the
>> system will exit in device_set_realized(), where the first error_propagate()
>> is called
>> if we pass errp directly, but not in the final callee.. Sorry for the
>> misleading.
>> 
>> For another example, its call trace:
>> qemu_thread_create(, NULL)
>> <= iothread_complete(, NULL)
>> <== user_creatable_complete(, NULL)
>> <=== object_new_with_propv(, errp)
>> <==== object_new_with_props(, errp) {... error_propagate(errp, local_err);
>> ...}
>> <===== iothread_create(, &error_abort)
>> The exit occurs in object_new_with_props where the first error_propagate is
>> called.
>> 
>> Either the device_set_realized() or object_new_with_props() is a middle
>> caller, thus
>> we can only see the top half stack trace until where error_handle_fatal() is
>> called.
>> 
>> In other words, the exit() occurs neither in the final callee nor the
>> initial caller.
>> Sorry for the misleading example again..
>
> This means using error_propagate can potentially lose the final callee in the
> error_abort cases. That is why it's preferrable to just pass errp down the
> calling chain when possible. The reason why object_new_with_propv uses
> error_propagate is because both object_property_add_child and
> user_creatable_complete return void, thus cannot flag success/failure to its
> caller via their return values. To check whether they succeed,
> object_new_with_propv wants a non-NULL err parameter. But like you said, errp
> passed to object_new_with_propv may or may not be NULL, so a local_err local
> variable is defined to cope with that.
>
> Alternatively it could do this instead:
>
> {
>
>     ...
>
>     if (errp) {
>         object_property_add_child(parent, id, obj, errp);
>         if (*errp) {
>             goto error;
>         }
>     } else {
>         Error *local_err = NULL;
>         object_property_add_child(parent, id, obj, &local_err);
>         if (local_err) {
>             goto error;
>         }
>     }
>     ...
>
> }

Pretty awful, isn't it?

> This way if error_abort was passed and object_property_add_child failed, the
> abort point would be in the innermost function. But this is boilerplate code so
> it's not used.
>
> On the contrary, using error_propagate when not necessary also means more lines
> of code but gives less info on the call trace when aborted.
>
> So I fully agree with Dan.

So do I: simply pass down the errp argument when you can.

Sometimes, you can't because the callee doesn't return a useful value.
As I explained elsewhere (pointed to by Eric), I'd solve that by making
the callee return a useful value.  It's what functions do.

>
> Fam
>
>> > 
>> > > How do you think passing errp straightly for the latter case, and use a
>> > > local error object &
>> > > error_propagate for the former case? This is a distinct treatment, but would
>> > > shorten the code.
>> > It is inappropriate to second-guess whether the caller is a passing in
>> > NULL or &error_abort, or another Error object. What is passed in can
>> > change at any time in the future.
>> ok.
>> > 
>> > We should only ever use a local error where the local method has a need
>> > to look at the error contents before returning to the caller. Any other
>> > case should just use the errp directly.
>> > 
>> > Regards,
>> > Daniel
>> Have a nice day, thanks
>> Fei
>>