[v3] jobs: Job Exit Refactoring Pt 1

[Qemu-devel] [PATCH v3 2/9] jobs: canonize Error object

Posted by John Snow 7 years, 5 months ago

Jobs presently use both an Error object in the case of the create job,
and char strings in the case of generic errors elsewhere.

Unify the two paths as just j->err, and remove the extra argument from
job_completed. The integer error code for job_completed is kept for now,
to be removed shortly in a separate patch.

Signed-off-by: John Snow <jsnow@redhat.com>
---
 block/backup.c            |  2 +-
 block/commit.c            |  2 +-
 block/create.c            |  5 ++---
 block/mirror.c            |  2 +-
 block/stream.c            |  2 +-
 include/qemu/job.h        | 14 ++++++++------
 job-qmp.c                 |  5 +++--
 job.c                     | 18 ++++++------------
 tests/test-bdrv-drain.c   |  2 +-
 tests/test-blockjob-txn.c |  2 +-
 tests/test-blockjob.c     |  2 +-
 11 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 5d47781840..1e965d54e5 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -388,7 +388,7 @@ static void backup_complete(Job *job, void *opaque)
 {
     BackupCompleteData *data = opaque;
 
-    job_completed(job, data->ret, NULL);
+    job_completed(job, data->ret);
     g_free(data);
 }
 
diff --git a/block/commit.c b/block/commit.c
index a0ea86ff64..4a17bb73ec 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -117,7 +117,7 @@ static void commit_complete(Job *job, void *opaque)
      * bdrv_set_backing_hd() to fail. */
     block_job_remove_all_bdrv(bjob);
 
-    job_completed(job, ret, NULL);
+    job_completed(job, ret);
     g_free(data);
 
     /* If bdrv_drop_intermediate() didn't already do that, remove the commit
diff --git a/block/create.c b/block/create.c
index 04733c3618..26a385c6c7 100644
--- a/block/create.c
+++ b/block/create.c
@@ -35,14 +35,13 @@ typedef struct BlockdevCreateJob {
     BlockDriver *drv;
     BlockdevCreateOptions *opts;
     int ret;
-    Error *err;
 } BlockdevCreateJob;
 
 static void blockdev_create_complete(Job *job, void *opaque)
 {
     BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common);
 
-    job_completed(job, s->ret, s->err);
+    job_completed(job, s->ret);
 }
 
 static int coroutine_fn blockdev_create_run(Job *job, Error **errp)
@@ -50,7 +49,7 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp)
     BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common);
 
     job_progress_set_remaining(&s->common, 1);
-    s->ret = s->drv->bdrv_co_create(s->opts, &s->err);
+    s->ret = s->drv->bdrv_co_create(s->opts, errp);
     job_progress_update(&s->common, 1);
 
     qapi_free_BlockdevCreateOptions(s->opts);
diff --git a/block/mirror.c b/block/mirror.c
index 691763db41..be5dc6b7b0 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -710,7 +710,7 @@ static void mirror_exit(Job *job, void *opaque)
     blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
 
     bs_opaque->job = NULL;
-    job_completed(job, data->ret, NULL);
+    job_completed(job, data->ret);
 
     g_free(data);
     bdrv_drained_end(src);
diff --git a/block/stream.c b/block/stream.c
index b4b987df7e..26a775386b 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -93,7 +93,7 @@ out:
     }
 
     g_free(s->backing_file_str);
-    job_completed(job, data->ret, NULL);
+    job_completed(job, data->ret);
     g_free(data);
 }
 
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 9cf463d228..e0e99870a1 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -124,12 +124,16 @@ typedef struct Job {
     /** Estimated progress_current value at the completion of the job */
     int64_t progress_total;
 
-    /** Error string for a failed job (NULL if, and only if, job->ret == 0) */
-    char *error;
-
     /** ret code passed to job_completed. */
     int ret;
 
+    /**
+     * Error object for a failed job.
+     * If job->ret is nonzero and an error object was not set, it will be set
+     * to strerror(-job->ret) during job_completed.
+     */
+    Error *err;
+
     /** The completion function that will be called when the job completes.  */
     BlockCompletionFunc *cb;
 
@@ -484,15 +488,13 @@ void job_transition_to_ready(Job *job);
 /**
  * @job: The job being completed.
  * @ret: The status code.
- * @error: The error message for a failing job (only with @ret < 0). If @ret is
- *         negative, but NULL is given for @error, strerror() is used.
  *
  * Marks @job as completed. If @ret is non-zero, the job transaction it is part
  * of is aborted. If @ret is zero, the job moves into the WAITING state. If it
  * is the last job to complete in its transaction, all jobs in the transaction
  * move from WAITING to PENDING.
  */
-void job_completed(Job *job, int ret, Error *error);
+void job_completed(Job *job, int ret);
 
 /** Asynchronously complete the specified @job. */
 void job_complete(Job *job, Error **errp);
diff --git a/job-qmp.c b/job-qmp.c
index 410775df61..a969b2bbf0 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -146,8 +146,9 @@ static JobInfo *job_query_single(Job *job, Error **errp)
         .status             = job->status,
         .current_progress   = job->progress_current,
         .total_progress     = job->progress_total,
-        .has_error          = !!job->error,
-        .error              = g_strdup(job->error),
+        .has_error          = !!job->err,
+        .error              = job->err ? \
+                              g_strdup(error_get_pretty(job->err)) : NULL,
     };
 
     return info;
diff --git a/job.c b/job.c
index 76988f6678..bc1d970df4 100644
--- a/job.c
+++ b/job.c
@@ -369,7 +369,7 @@ void job_unref(Job *job)
 
         QLIST_REMOVE(job, job_list);
 
-        g_free(job->error);
+        error_free(job->err);
         g_free(job->id);
         g_free(job);
     }
@@ -546,7 +546,7 @@ static void coroutine_fn job_co_entry(void *opaque)
 
     assert(job && job->driver && job->driver->run);
     job_pause_point(job);
-    job->ret = job->driver->run(job, NULL);
+    job->ret = job->driver->run(job, &job->err);
 }
 
 
@@ -666,8 +666,8 @@ static void job_update_rc(Job *job)
         job->ret = -ECANCELED;
     }
     if (job->ret) {
-        if (!job->error) {
-            job->error = g_strdup(strerror(-job->ret));
+        if (!job->err) {
+            error_setg(&job->err, "%s", g_strdup(strerror(-job->ret)));
         }
         job_state_transition(job, JOB_STATUS_ABORTING);
     }
@@ -865,17 +865,11 @@ static void job_completed_txn_success(Job *job)
     }
 }
 
-void job_completed(Job *job, int ret, Error *error)
+void job_completed(Job *job, int ret)
 {
     assert(job && job->txn && !job_is_completed(job));
 
     job->ret = ret;
-    if (error) {
-        assert(job->ret < 0);
-        job->error = g_strdup(error_get_pretty(error));
-        error_free(error);
-    }
-
     job_update_rc(job);
     trace_job_completed(job, ret, job->ret);
     if (job->ret) {
@@ -893,7 +887,7 @@ void job_cancel(Job *job, bool force)
     }
     job_cancel_async(job, force);
     if (!job_started(job)) {
-        job_completed(job, -ECANCELED, NULL);
+        job_completed(job, -ECANCELED);
     } else if (job->deferred_to_main_loop) {
         job_completed_txn_abort(job);
     } else {
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index a7533861f6..00604dfc0c 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -754,7 +754,7 @@ typedef struct TestBlockJob {
 
 static void test_job_completed(Job *job, void *opaque)
 {
-    job_completed(job, 0, NULL);
+    job_completed(job, 0);
 }
 
 static int coroutine_fn test_job_run(Job *job, Error **errp)
diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c
index 3194924071..82cedee78b 100644
--- a/tests/test-blockjob-txn.c
+++ b/tests/test-blockjob-txn.c
@@ -34,7 +34,7 @@ static void test_block_job_complete(Job *job, void *opaque)
         rc = -ECANCELED;
     }
 
-    job_completed(job, rc, NULL);
+    job_completed(job, rc);
     bdrv_unref(bs);
 }
 
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index b0462bfdec..408a226939 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -167,7 +167,7 @@ static void cancel_job_completed(Job *job, void *opaque)
 {
     CancelJob *s = opaque;
     s->completed = true;
-    job_completed(job, 0, NULL);
+    job_completed(job, 0);
 }
 
 static void cancel_job_complete(Job *job, Error **errp)
-- 
2.14.4

Re: [Qemu-devel] [PATCH v3 2/9] jobs: canonize Error object

Posted by Eric Blake 7 years, 5 months ago

On 08/29/2018 08:57 PM, John Snow wrote:
> Jobs presently use both an Error object in the case of the create job,
> and char strings in the case of generic errors elsewhere.
> 
> Unify the two paths as just j->err, and remove the extra argument from
> job_completed. The integer error code for job_completed is kept for now,
> to be removed shortly in a separate patch.
> 
> Signed-off-by: John Snow <jsnow@redhat.com>
> ---

> +++ b/job.c

> @@ -666,8 +666,8 @@ static void job_update_rc(Job *job)
>           job->ret = -ECANCELED;
>       }
>       if (job->ret) {
> -        if (!job->error) {
> -            job->error = g_strdup(strerror(-job->ret));
> +        if (!job->err) {
> +            error_setg(&job->err, "%s", g_strdup(strerror(-job->ret)));

Memleak. Drop the g_strdup(), and just directly pass strerror() results 
to error_setg().  (I guess we can't quite use error_setg_errno() unless 
we add additional text beyond the strerror() results).

With that fixed,
Reviewed-by: Eric Blake <eblake@redhat.com>

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.           +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH v3 2/9] jobs: canonize Error object

Posted by Markus Armbruster 7 years, 5 months ago

Eric Blake <eblake@redhat.com> writes:

> On 08/29/2018 08:57 PM, John Snow wrote:
>> Jobs presently use both an Error object in the case of the create job,
>> and char strings in the case of generic errors elsewhere.
>>
>> Unify the two paths as just j->err, and remove the extra argument from
>> job_completed. The integer error code for job_completed is kept for now,
>> to be removed shortly in a separate patch.
>>
>> Signed-off-by: John Snow <jsnow@redhat.com>
>> ---
>
>> +++ b/job.c
>
>> @@ -666,8 +666,8 @@ static void job_update_rc(Job *job)
>>           job->ret = -ECANCELED;
>>       }
>>       if (job->ret) {
>> -        if (!job->error) {
>> -            job->error = g_strdup(strerror(-job->ret));
>> +        if (!job->err) {
>> +            error_setg(&job->err, "%s", g_strdup(strerror(-job->ret)));
>
> Memleak. Drop the g_strdup(), and just directly pass strerror()
> results to error_setg().  (I guess we can't quite use
> error_setg_errno() unless we add additional text beyond the strerror()
> results).

Adding such text might well be an improvement.  I'm not telling you to
do so (not having looked at the context myself), just to think about it.

> With that fixed,
> Reviewed-by: Eric Blake <eblake@redhat.com>

Re: [Qemu-devel] [PATCH v3 2/9] jobs: canonize Error object

Posted by John Snow 7 years, 5 months ago

On 08/31/2018 02:08 AM, Markus Armbruster wrote:
> Eric Blake <eblake@redhat.com> writes:
> 
>> On 08/29/2018 08:57 PM, John Snow wrote:
>>> Jobs presently use both an Error object in the case of the create job,
>>> and char strings in the case of generic errors elsewhere.
>>>
>>> Unify the two paths as just j->err, and remove the extra argument from
>>> job_completed. The integer error code for job_completed is kept for now,
>>> to be removed shortly in a separate patch.
>>>
>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>> ---
>>
>>> +++ b/job.c
>>
>>> @@ -666,8 +666,8 @@ static void job_update_rc(Job *job)
>>>           job->ret = -ECANCELED;
>>>       }
>>>       if (job->ret) {
>>> -        if (!job->error) {
>>> -            job->error = g_strdup(strerror(-job->ret));
>>> +        if (!job->err) {
>>> +            error_setg(&job->err, "%s", g_strdup(strerror(-job->ret)));
>>
>> Memleak. Drop the g_strdup(), and just directly pass strerror()
>> results to error_setg().  (I guess we can't quite use
>> error_setg_errno() unless we add additional text beyond the strerror()
>> results).
> 
> Adding such text might well be an improvement.  I'm not telling you to
> do so (not having looked at the context myself), just to think about it.
> 

In this case, and I agree with Kevin who suggested it; we ought to be
moving away from the retcode in general and using first-class error
objects for all of our jobs anyway.

In this case, the job has failed with a retcode and we wish to give the
user some hope of understanding why, but at this point in the code all
we know is what the strerror can tell us, so a generic prefix like "The
job failed" is not helpful because it will already be clear by events
and other things that the job failed.

The only text I can think of that would be useful is: "The job failed
and didn't give a more specific error message. Please email
qemu-devel@nongnu.org and harass the authors until they fix it. Anyway,
nice chatting to you, the generic error message is: %s"

--js

>> With that fixed,
>> Reviewed-by: Eric Blake <eblake@redhat.com>

Re: [Qemu-devel] [PATCH v3 2/9] jobs: canonize Error object

Posted by Markus Armbruster 7 years, 5 months ago

John Snow <jsnow@redhat.com> writes:

> On 08/31/2018 02:08 AM, Markus Armbruster wrote:
>> Eric Blake <eblake@redhat.com> writes:
>> 
>>> On 08/29/2018 08:57 PM, John Snow wrote:
>>>> Jobs presently use both an Error object in the case of the create job,
>>>> and char strings in the case of generic errors elsewhere.
>>>>
>>>> Unify the two paths as just j->err, and remove the extra argument from
>>>> job_completed. The integer error code for job_completed is kept for now,
>>>> to be removed shortly in a separate patch.
>>>>
>>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>>> ---
>>>
>>>> +++ b/job.c
>>>
>>>> @@ -666,8 +666,8 @@ static void job_update_rc(Job *job)
>>>>           job->ret = -ECANCELED;
>>>>       }
>>>>       if (job->ret) {
>>>> -        if (!job->error) {
>>>> -            job->error = g_strdup(strerror(-job->ret));
>>>> +        if (!job->err) {
>>>> +            error_setg(&job->err, "%s", g_strdup(strerror(-job->ret)));
>>>
>>> Memleak. Drop the g_strdup(), and just directly pass strerror()
>>> results to error_setg().  (I guess we can't quite use
>>> error_setg_errno() unless we add additional text beyond the strerror()
>>> results).
>> 
>> Adding such text might well be an improvement.  I'm not telling you to
>> do so (not having looked at the context myself), just to think about it.
>> 
>
> In this case, and I agree with Kevin who suggested it; we ought to be
> moving away from the retcode in general and using first-class error
> objects for all of our jobs anyway.
>
> In this case, the job has failed with a retcode and we wish to give the
> user some hope of understanding why, but at this point in the code all
> we know is what the strerror can tell us, so a generic prefix like "The
> job failed" is not helpful because it will already be clear by events
> and other things that the job failed.
>
> The only text I can think of that would be useful is: "The job failed
> and didn't give a more specific error message. Please email
> qemu-devel@nongnu.org and harass the authors until they fix it. Anyway,
> nice chatting to you, the generic error message is: %s"

That might well be an improvement ;)

Since I don't have a realistic example ready, I'm making up a contrieved
one:

--> {"execute": "job-frobnicate"}
<-- {"error": {"class": "GenericError", "desc": "Device or resource busy"}}

Would a reply

<-- {"error": {"class": "GenericError", "desc": "Job failed: Device or resource busy"}}

be better, worse, or a wash?

If it's a wash, maintainability breaks the tie.  So let's have a look at
the code.  It's either

            error_setg(&job->err, "%s", strerror(-job->ret));

or

            error_setg_errno(&job->err, -job->ret, "Job failed");

I'd prefer the latter, because it's the common way to put an errno code
into an Error object, and it lets me grep for the message more easily.

Re: [Qemu-devel] [PATCH v3 2/9] jobs: canonize Error object

Posted by Kevin Wolf 7 years, 5 months ago

Am 01.09.2018 um 09:54 hat Markus Armbruster geschrieben:
> John Snow <jsnow@redhat.com> writes:
> 
> > On 08/31/2018 02:08 AM, Markus Armbruster wrote:
> >> Eric Blake <eblake@redhat.com> writes:
> >> 
> >>> On 08/29/2018 08:57 PM, John Snow wrote:
> >>>> Jobs presently use both an Error object in the case of the create job,
> >>>> and char strings in the case of generic errors elsewhere.
> >>>>
> >>>> Unify the two paths as just j->err, and remove the extra argument from
> >>>> job_completed. The integer error code for job_completed is kept for now,
> >>>> to be removed shortly in a separate patch.
> >>>>
> >>>> Signed-off-by: John Snow <jsnow@redhat.com>
> >>>> ---
> >>>
> >>>> +++ b/job.c
> >>>
> >>>> @@ -666,8 +666,8 @@ static void job_update_rc(Job *job)
> >>>>           job->ret = -ECANCELED;
> >>>>       }
> >>>>       if (job->ret) {
> >>>> -        if (!job->error) {
> >>>> -            job->error = g_strdup(strerror(-job->ret));
> >>>> +        if (!job->err) {
> >>>> +            error_setg(&job->err, "%s", g_strdup(strerror(-job->ret)));
> >>>
> >>> Memleak. Drop the g_strdup(), and just directly pass strerror()
> >>> results to error_setg().  (I guess we can't quite use
> >>> error_setg_errno() unless we add additional text beyond the strerror()
> >>> results).
> >> 
> >> Adding such text might well be an improvement.  I'm not telling you to
> >> do so (not having looked at the context myself), just to think about it.
> >> 
> >
> > In this case, and I agree with Kevin who suggested it; we ought to be
> > moving away from the retcode in general and using first-class error
> > objects for all of our jobs anyway.
> >
> > In this case, the job has failed with a retcode and we wish to give the
> > user some hope of understanding why, but at this point in the code all
> > we know is what the strerror can tell us, so a generic prefix like "The
> > job failed" is not helpful because it will already be clear by events
> > and other things that the job failed.
> >
> > The only text I can think of that would be useful is: "The job failed
> > and didn't give a more specific error message. Please email
> > qemu-devel@nongnu.org and harass the authors until they fix it. Anyway,
> > nice chatting to you, the generic error message is: %s"
> 
> That might well be an improvement ;)
> 
> Since I don't have a realistic example ready, I'm making up a contrieved
> one:
> 
> --> {"execute": "job-frobnicate"}
> <-- {"error": {"class": "GenericError", "desc": "Device or resource busy"}}
> 
> Would a reply
> 
> <-- {"error": {"class": "GenericError", "desc": "Job failed: Device or resource busy"}}
> 
> be better, worse, or a wash?
> 
> If it's a wash, maintainability breaks the tie.  So let's have a look at
> the code.  It's either
> 
>             error_setg(&job->err, "%s", strerror(-job->ret));
> 
> or
> 
>             error_setg_errno(&job->err, -job->ret, "Job failed");
> 
> I'd prefer the latter, because it's the common way to put an errno code
> into an Error object, and it lets me grep for the message more easily.

Basically, if I call "job-frobnicate", I don't want an error message to
tell me "job-frobnicate failed: foo". I already know that I called
"job-frobnicate", so the actually useful message is only "foo".

A prefix like "job-frobnicate failed" should be added when it's one of
multiple possible error sources. For example, if a higher level monitor
command internally involves job-frobnicate, but also three other
operations, this is the place where the prefix should be added to any
error message returned by job-frobnicate so that we can distinguish
which part of the operation failed.

Kevin

Re: [Qemu-devel] [PATCH v3 2/9] jobs: canonize Error object

Posted by Markus Armbruster 7 years, 5 months ago

Kevin Wolf <kwolf@redhat.com> writes:

> Am 01.09.2018 um 09:54 hat Markus Armbruster geschrieben:
>> John Snow <jsnow@redhat.com> writes:
>> 
>> > On 08/31/2018 02:08 AM, Markus Armbruster wrote:
>> >> Eric Blake <eblake@redhat.com> writes:
>> >> 
>> >>> On 08/29/2018 08:57 PM, John Snow wrote:
>> >>>> Jobs presently use both an Error object in the case of the create job,
>> >>>> and char strings in the case of generic errors elsewhere.
>> >>>>
>> >>>> Unify the two paths as just j->err, and remove the extra argument from
>> >>>> job_completed. The integer error code for job_completed is kept for now,
>> >>>> to be removed shortly in a separate patch.
>> >>>>
>> >>>> Signed-off-by: John Snow <jsnow@redhat.com>
>> >>>> ---
>> >>>
>> >>>> +++ b/job.c
>> >>>
>> >>>> @@ -666,8 +666,8 @@ static void job_update_rc(Job *job)
>> >>>>           job->ret = -ECANCELED;
>> >>>>       }
>> >>>>       if (job->ret) {
>> >>>> -        if (!job->error) {
>> >>>> -            job->error = g_strdup(strerror(-job->ret));
>> >>>> +        if (!job->err) {
>> >>>> +            error_setg(&job->err, "%s", g_strdup(strerror(-job->ret)));
>> >>>
>> >>> Memleak. Drop the g_strdup(), and just directly pass strerror()
>> >>> results to error_setg().  (I guess we can't quite use
>> >>> error_setg_errno() unless we add additional text beyond the strerror()
>> >>> results).
>> >> 
>> >> Adding such text might well be an improvement.  I'm not telling you to
>> >> do so (not having looked at the context myself), just to think about it.
>> >> 
>> >
>> > In this case, and I agree with Kevin who suggested it; we ought to be
>> > moving away from the retcode in general and using first-class error
>> > objects for all of our jobs anyway.
>> >
>> > In this case, the job has failed with a retcode and we wish to give the
>> > user some hope of understanding why, but at this point in the code all
>> > we know is what the strerror can tell us, so a generic prefix like "The
>> > job failed" is not helpful because it will already be clear by events
>> > and other things that the job failed.
>> >
>> > The only text I can think of that would be useful is: "The job failed
>> > and didn't give a more specific error message. Please email
>> > qemu-devel@nongnu.org and harass the authors until they fix it. Anyway,
>> > nice chatting to you, the generic error message is: %s"
>> 
>> That might well be an improvement ;)
>> 
>> Since I don't have a realistic example ready, I'm making up a contrieved
>> one:
>> 
>> --> {"execute": "job-frobnicate"}
>> <-- {"error": {"class": "GenericError", "desc": "Device or resource busy"}}
>> 
>> Would a reply
>> 
>> <-- {"error": {"class": "GenericError", "desc": "Job failed: Device or resource busy"}}
>> 
>> be better, worse, or a wash?
>> 
>> If it's a wash, maintainability breaks the tie.  So let's have a look at
>> the code.  It's either
>> 
>>             error_setg(&job->err, "%s", strerror(-job->ret));
>> 
>> or
>> 
>>             error_setg_errno(&job->err, -job->ret, "Job failed");
>> 
>> I'd prefer the latter, because it's the common way to put an errno code
>> into an Error object, and it lets me grep for the message more easily.
>
> Basically, if I call "job-frobnicate", I don't want an error message to
> tell me "job-frobnicate failed: foo". I already know that I called
> "job-frobnicate", so the actually useful message is only "foo".
>
> A prefix like "job-frobnicate failed" should be added when it's one of
> multiple possible error sources. For example, if a higher level monitor
> command internally involves job-frobnicate, but also three other
> operations, this is the place where the prefix should be added to any
> error message returned by job-frobnicate so that we can distinguish
> which part of the operation failed.

John's point is that the error message is bad no matter what.

My point is that if it's bad no matter what, we can just as well emit it
the usual way, with error_setg_errno(), rather than playing games with
strerror().

Re: [Qemu-devel] [PATCH v3 2/9] jobs: canonize Error object

Posted by John Snow 7 years, 5 months ago


On 09/03/2018 10:11 AM, Markus Armbruster wrote:
> Kevin Wolf <kwolf@redhat.com> writes:
> 
>> Am 01.09.2018 um 09:54 hat Markus Armbruster geschrieben:
>>> John Snow <jsnow@redhat.com> writes:
>>>
>>>> On 08/31/2018 02:08 AM, Markus Armbruster wrote:
>>>>> Eric Blake <eblake@redhat.com> writes:
>>>>>
>>>>>> On 08/29/2018 08:57 PM, John Snow wrote:
>>>>>>> Jobs presently use both an Error object in the case of the create job,
>>>>>>> and char strings in the case of generic errors elsewhere.
>>>>>>>
>>>>>>> Unify the two paths as just j->err, and remove the extra argument from
>>>>>>> job_completed. The integer error code for job_completed is kept for now,
>>>>>>> to be removed shortly in a separate patch.
>>>>>>>
>>>>>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>>>>>> ---
>>>>>>
>>>>>>> +++ b/job.c
>>>>>>
>>>>>>> @@ -666,8 +666,8 @@ static void job_update_rc(Job *job)
>>>>>>>           job->ret = -ECANCELED;
>>>>>>>       }
>>>>>>>       if (job->ret) {
>>>>>>> -        if (!job->error) {
>>>>>>> -            job->error = g_strdup(strerror(-job->ret));
>>>>>>> +        if (!job->err) {
>>>>>>> +            error_setg(&job->err, "%s", g_strdup(strerror(-job->ret)));
>>>>>>
>>>>>> Memleak. Drop the g_strdup(), and just directly pass strerror()
>>>>>> results to error_setg().  (I guess we can't quite use
>>>>>> error_setg_errno() unless we add additional text beyond the strerror()
>>>>>> results).
>>>>>
>>>>> Adding such text might well be an improvement.  I'm not telling you to
>>>>> do so (not having looked at the context myself), just to think about it.
>>>>>
>>>>
>>>> In this case, and I agree with Kevin who suggested it; we ought to be
>>>> moving away from the retcode in general and using first-class error
>>>> objects for all of our jobs anyway.
>>>>
>>>> In this case, the job has failed with a retcode and we wish to give the
>>>> user some hope of understanding why, but at this point in the code all
>>>> we know is what the strerror can tell us, so a generic prefix like "The
>>>> job failed" is not helpful because it will already be clear by events
>>>> and other things that the job failed.
>>>>
>>>> The only text I can think of that would be useful is: "The job failed
>>>> and didn't give a more specific error message. Please email
>>>> qemu-devel@nongnu.org and harass the authors until they fix it. Anyway,
>>>> nice chatting to you, the generic error message is: %s"
>>>
>>> That might well be an improvement ;)
>>>
>>> Since I don't have a realistic example ready, I'm making up a contrieved
>>> one:
>>>
>>> --> {"execute": "job-frobnicate"}
>>> <-- {"error": {"class": "GenericError", "desc": "Device or resource busy"}}
>>>
>>> Would a reply
>>>
>>> <-- {"error": {"class": "GenericError", "desc": "Job failed: Device or resource busy"}}
>>>
>>> be better, worse, or a wash?
>>>
>>> If it's a wash, maintainability breaks the tie.  So let's have a look at
>>> the code.  It's either
>>>
>>>             error_setg(&job->err, "%s", strerror(-job->ret));
>>>
>>> or
>>>
>>>             error_setg_errno(&job->err, -job->ret, "Job failed");
>>>
>>> I'd prefer the latter, because it's the common way to put an errno code
>>> into an Error object, and it lets me grep for the message more easily.
>>
>> Basically, if I call "job-frobnicate", I don't want an error message to
>> tell me "job-frobnicate failed: foo". I already know that I called
>> "job-frobnicate", so the actually useful message is only "foo".
>>
>> A prefix like "job-frobnicate failed" should be added when it's one of
>> multiple possible error sources. For example, if a higher level monitor
>> command internally involves job-frobnicate, but also three other
>> operations, this is the place where the prefix should be added to any
>> error message returned by job-frobnicate so that we can distinguish
>> which part of the operation failed.
> 
> John's point is that the error message is bad no matter what.
> 
> My point is that if it's bad no matter what, we can just as well emit it
> the usual way, with error_setg_errno(), rather than playing games with
> strerror().
> 

Not worth it, in my opinion. This way keeps visible behavior consistent
until we fully eradicate the retcode.

--js