[PATCH v4] block-jobs: flush target at the end of .run()

Vladimir Sementsov-Ogievskiy posted 1 patch 9 months, 3 weeks ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20230725174008.1147467-1-vsementsov@yandex-team.ru
Maintainers: John Snow <jsnow@redhat.com>, Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>, Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>
There is a newer version of this series
block/backup.c               |  7 +++++--
block/commit.c               |  2 +-
block/mirror.c               |  4 ++++
block/stream.c               |  7 ++++++-
blockjob.c                   | 18 ++++++++++++++++++
include/block/blockjob_int.h | 11 +++++++++++
6 files changed, 45 insertions(+), 4 deletions(-)
[PATCH v4] block-jobs: flush target at the end of .run()
Posted by Vladimir Sementsov-Ogievskiy 9 months, 3 weeks ago
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

Actually block job is not completed without this final flush. It's
rather unexpected to have broken target when job was successfully
completed long ago and now we fail to flush or process just
crashed/killed.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
 block/backup.c               |  7 +++++--
 block/commit.c               |  2 +-
 block/mirror.c               |  4 ++++
 block/stream.c               |  7 ++++++-
 blockjob.c                   | 18 ++++++++++++++++++
 include/block/blockjob_int.h | 11 +++++++++++
 6 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index db3791f4d1..b9ff63359a 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -295,10 +295,13 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
             job_yield(job);
         }
     } else {
-        return backup_loop(s);
+        ret = backup_loop(s);
+        if (ret < 0) {
+            return ret;
+        }
     }
 
-    return 0;
+    return block_job_final_target_flush(&s->common, s->target_bs);
 }
 
 static void coroutine_fn backup_pause(Job *job)
diff --git a/block/commit.c b/block/commit.c
index aa45beb0f0..15df96b4f3 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -187,7 +187,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
         }
     }
 
-    return 0;
+    return block_job_final_target_flush(&s->common, blk_bs(s->base));
 }
 
 static const BlockJobDriver commit_job_driver = {
diff --git a/block/mirror.c b/block/mirror.c
index d3cacd1708..cd19b49f7f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1143,6 +1143,10 @@ immediate_exit:
     g_free(s->in_flight_bitmap);
     bdrv_dirty_iter_free(s->dbi);
 
+    if (ret >= 0) {
+        ret = block_job_final_target_flush(&s->common, blk_bs(s->target));
+    }
+
     if (need_drain) {
         s->in_drain = true;
         bdrv_drained_begin(bs);
diff --git a/block/stream.c b/block/stream.c
index e522bbdec5..f7e8b35e94 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -131,6 +131,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
     BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
     int64_t len;
     int64_t offset = 0;
+    int ret;
     int error = 0;
     int64_t n = 0; /* bytes */
 
@@ -149,7 +150,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 
     for ( ; offset < len; offset += n) {
         bool copy;
-        int ret;
 
         /* Note that even when no rate limit is applied we need to yield
          * with no pending I/O here so that bdrv_drain_all() returns.
@@ -207,6 +207,11 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
         }
     }
 
+    ret = block_job_final_target_flush(&s->common, s->target_bs);
+    if (error == 0) {
+        error = ret;
+    }
+
     /* Do not remove the backing file if an error was there but ignored. */
     return error;
 }
diff --git a/blockjob.c b/blockjob.c
index 25fe8e625d..313e586b0d 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -611,3 +611,21 @@ AioContext *block_job_get_aio_context(BlockJob *job)
     GLOBAL_STATE_CODE();
     return job->job.aio_context;
 }
+
+int coroutine_fn
+block_job_final_target_flush(BlockJob *job, BlockDriverState *target_bs)
+{
+    int ret;
+
+    WITH_GRAPH_RDLOCK_GUARD() {
+        ret = bdrv_co_flush(target_bs);
+    }
+
+    if (ret < 0 && !block_job_is_internal(job)) {
+        qapi_event_send_block_job_error(job->job.id,
+                                        IO_OPERATION_TYPE_WRITE,
+                                        BLOCK_ERROR_ACTION_REPORT);
+    }
+
+    return ret;
+}
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 104824040c..617e40b916 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -152,4 +152,15 @@ void block_job_ratelimit_sleep(BlockJob *job);
 BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
                                         int is_read, int error);
 
+/**
+ * block_job_final_target_flush:
+ * @job: The job to signal an error for if flush failed.
+ * @target_bs: The bs to flush.
+ *
+ * The function is intended to be called at the end of .run() for any data
+ * copying job.
+ */
+int coroutine_fn
+block_job_final_target_flush(BlockJob *job, BlockDriverState *target_bs);
+
 #endif
-- 
2.34.1
Re: [PATCH v4] block-jobs: flush target at the end of .run()
Posted by Hanna Czenczek 9 months, 3 weeks ago
On 25.07.23 19:40, Vladimir Sementsov-Ogievskiy wrote:
> From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>
> Actually block job is not completed without this final flush. It's
> rather unexpected to have broken target when job was successfully
> completed long ago and now we fail to flush or process just
> crashed/killed.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
>   block/backup.c               |  7 +++++--
>   block/commit.c               |  2 +-
>   block/mirror.c               |  4 ++++
>   block/stream.c               |  7 ++++++-
>   blockjob.c                   | 18 ++++++++++++++++++
>   include/block/blockjob_int.h | 11 +++++++++++
>   6 files changed, 45 insertions(+), 4 deletions(-)

Yes, that’s a good change.

[...]

> diff --git a/block/stream.c b/block/stream.c
> index e522bbdec5..f7e8b35e94 100644
> --- a/block/stream.c
> +++ b/block/stream.c

[...]

> @@ -207,6 +207,11 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
>           }
>       }
>   
> +    ret = block_job_final_target_flush(&s->common, s->target_bs);
> +    if (error == 0) {
> +        error = ret;
> +    }

In all other jobs, this function is invoked only if the job was 
successful, but here it’s called unconditionally.  I don’t mind one way 
or the other, but I think it should be consistent.  (Mainly just because 
inconsistency makes me wonder whether there’s an undocumented reason for 
it.)

> +
>       /* Do not remove the backing file if an error was there but ignored. */
>       return error;
>   }
> diff --git a/blockjob.c b/blockjob.c
> index 25fe8e625d..313e586b0d 100644
> --- a/blockjob.c
> +++ b/blockjob.c
> @@ -611,3 +611,21 @@ AioContext *block_job_get_aio_context(BlockJob *job)
>       GLOBAL_STATE_CODE();
>       return job->job.aio_context;
>   }
> +
> +int coroutine_fn
> +block_job_final_target_flush(BlockJob *job, BlockDriverState *target_bs)
> +{
> +    int ret;
> +

Should we mark this as IO_CODE()?

> +    WITH_GRAPH_RDLOCK_GUARD() {
> +        ret = bdrv_co_flush(target_bs);
> +    }
> +
> +    if (ret < 0 && !block_job_is_internal(job)) {
> +        qapi_event_send_block_job_error(job->job.id,
> +                                        IO_OPERATION_TYPE_WRITE,
> +                                        BLOCK_ERROR_ACTION_REPORT);
> +    }

Would it make sense to rely on block_job_error_action() instead?  If so, 
should we use the on-target-error setting?

I have no informed opinion on this, but I think using 
block_job_error_action() does come to mind, so if we consciously decide 
against it, that’s probably worth a comment, too.

Hanna

> +
> +    return ret;
> +}


Re: [PATCH v4] block-jobs: flush target at the end of .run()
Posted by Vladimir Sementsov-Ogievskiy 9 months, 3 weeks ago
On 27.07.23 18:27, Hanna Czenczek wrote:
> On 25.07.23 19:40, Vladimir Sementsov-Ogievskiy wrote:
>> From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>
>> Actually block job is not completed without this final flush. It's
>> rather unexpected to have broken target when job was successfully
>> completed long ago and now we fail to flush or process just
>> crashed/killed.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>>   block/backup.c               |  7 +++++--
>>   block/commit.c               |  2 +-
>>   block/mirror.c               |  4 ++++
>>   block/stream.c               |  7 ++++++-
>>   blockjob.c                   | 18 ++++++++++++++++++
>>   include/block/blockjob_int.h | 11 +++++++++++
>>   6 files changed, 45 insertions(+), 4 deletions(-)
> 
> Yes, that’s a good change.
> 
> [...]
> 
>> diff --git a/block/stream.c b/block/stream.c
>> index e522bbdec5..f7e8b35e94 100644
>> --- a/block/stream.c
>> +++ b/block/stream.c
> 
> [...]
> 
>> @@ -207,6 +207,11 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
>>           }
>>       }
>> +    ret = block_job_final_target_flush(&s->common, s->target_bs);
>> +    if (error == 0) {
>> +        error = ret;
>> +    }
> 
> In all other jobs, this function is invoked only if the job was successful, but here it’s called unconditionally.  I don’t mind one way or the other, but I think it should be consistent.  (Mainly just because inconsistency makes me wonder whether there’s an undocumented reason for it.)

The inconsistency is preexisting: stream job do return error even when there were ignored errors. I'm not sure do we really need this logic for stream job, but decided to flush anyway. It looks consistent in context of stream_run: it continues copy operations even when error is already < 0. So, let's do flush too.

> 
>> +
>>       /* Do not remove the backing file if an error was there but ignored. */
>>       return error;
>>   }
>> diff --git a/blockjob.c b/blockjob.c
>> index 25fe8e625d..313e586b0d 100644
>> --- a/blockjob.c
>> +++ b/blockjob.c
>> @@ -611,3 +611,21 @@ AioContext *block_job_get_aio_context(BlockJob *job)
>>       GLOBAL_STATE_CODE();
>>       return job->job.aio_context;
>>   }
>> +
>> +int coroutine_fn
>> +block_job_final_target_flush(BlockJob *job, BlockDriverState *target_bs)
>> +{
>> +    int ret;
>> +
> 
> Should we mark this as IO_CODE()?

I'm not sure, but seems that yes.

> 
>> +    WITH_GRAPH_RDLOCK_GUARD() {
>> +        ret = bdrv_co_flush(target_bs);
>> +    }
>> +
>> +    if (ret < 0 && !block_job_is_internal(job)) {
>> +        qapi_event_send_block_job_error(job->job.id,
>> +                                        IO_OPERATION_TYPE_WRITE,
>> +                                        BLOCK_ERROR_ACTION_REPORT);
>> +    }
> 
> Would it make sense to rely on block_job_error_action() instead?  If so, should we use the on-target-error setting?

Hmm. To be honest we should. And handle the returned action appropriately. I'll resend, that's a good caught.

> 
> I have no informed opinion on this, but I think using block_job_error_action() does come to mind, so if we consciously decide against it, that’s probably worth a comment, too.
> 
> Hanna
> 
>> +
>> +    return ret;
>> +}
> 

-- 
Best regards,
Vladimir


Re: [PATCH v4] block-jobs: flush target at the end of .run()
Posted by Denis V. Lunev 9 months, 3 weeks ago
On 7/25/23 19:40, Vladimir Sementsov-Ogievskiy wrote:
> From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>
> Actually block job is not completed without this final flush. It's
> rather unexpected to have broken target when job was successfully
> completed long ago and now we fail to flush or process just
> crashed/killed.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
>   block/backup.c               |  7 +++++--
>   block/commit.c               |  2 +-
>   block/mirror.c               |  4 ++++
>   block/stream.c               |  7 ++++++-
>   blockjob.c                   | 18 ++++++++++++++++++
>   include/block/blockjob_int.h | 11 +++++++++++
>   6 files changed, 45 insertions(+), 4 deletions(-)
>
> diff --git a/block/backup.c b/block/backup.c
> index db3791f4d1..b9ff63359a 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -295,10 +295,13 @@ static int coroutine_fn backup_run(Job *job, Error **errp)
>               job_yield(job);
>           }
>       } else {
> -        return backup_loop(s);
> +        ret = backup_loop(s);
> +        if (ret < 0) {
> +            return ret;
> +        }
>       }
>   
> -    return 0;
> +    return block_job_final_target_flush(&s->common, s->target_bs);
>   }
>   
>   static void coroutine_fn backup_pause(Job *job)
> diff --git a/block/commit.c b/block/commit.c
> index aa45beb0f0..15df96b4f3 100644
> --- a/block/commit.c
> +++ b/block/commit.c
> @@ -187,7 +187,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
>           }
>       }
>   
> -    return 0;
> +    return block_job_final_target_flush(&s->common, blk_bs(s->base));
>   }
>   
>   static const BlockJobDriver commit_job_driver = {
> diff --git a/block/mirror.c b/block/mirror.c
> index d3cacd1708..cd19b49f7f 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -1143,6 +1143,10 @@ immediate_exit:
>       g_free(s->in_flight_bitmap);
>       bdrv_dirty_iter_free(s->dbi);
>   
> +    if (ret >= 0) {
> +        ret = block_job_final_target_flush(&s->common, blk_bs(s->target));
> +    }
> +
>       if (need_drain) {
>           s->in_drain = true;
>           bdrv_drained_begin(bs);
> diff --git a/block/stream.c b/block/stream.c
> index e522bbdec5..f7e8b35e94 100644
> --- a/block/stream.c
> +++ b/block/stream.c
> @@ -131,6 +131,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
>       BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
>       int64_t len;
>       int64_t offset = 0;
> +    int ret;
>       int error = 0;
>       int64_t n = 0; /* bytes */
>   
> @@ -149,7 +150,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
>   
>       for ( ; offset < len; offset += n) {
>           bool copy;
> -        int ret;
>   
>           /* Note that even when no rate limit is applied we need to yield
>            * with no pending I/O here so that bdrv_drain_all() returns.
> @@ -207,6 +207,11 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
>           }
>       }
>   
> +    ret = block_job_final_target_flush(&s->common, s->target_bs);
> +    if (error == 0) {
> +        error = ret;
> +    }
> +
>       /* Do not remove the backing file if an error was there but ignored. */
>       return error;
>   }
> diff --git a/blockjob.c b/blockjob.c
> index 25fe8e625d..313e586b0d 100644
> --- a/blockjob.c
> +++ b/blockjob.c
> @@ -611,3 +611,21 @@ AioContext *block_job_get_aio_context(BlockJob *job)
>       GLOBAL_STATE_CODE();
>       return job->job.aio_context;
>   }
> +
> +int coroutine_fn
> +block_job_final_target_flush(BlockJob *job, BlockDriverState *target_bs)
> +{
> +    int ret;
> +
> +    WITH_GRAPH_RDLOCK_GUARD() {
> +        ret = bdrv_co_flush(target_bs);
> +    }
> +
> +    if (ret < 0 && !block_job_is_internal(job)) {
> +        qapi_event_send_block_job_error(job->job.id,
> +                                        IO_OPERATION_TYPE_WRITE,
> +                                        BLOCK_ERROR_ACTION_REPORT);
> +    }
> +
> +    return ret;
> +}
> diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
> index 104824040c..617e40b916 100644
> --- a/include/block/blockjob_int.h
> +++ b/include/block/blockjob_int.h
> @@ -152,4 +152,15 @@ void block_job_ratelimit_sleep(BlockJob *job);
>   BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
>                                           int is_read, int error);
>   
> +/**
> + * block_job_final_target_flush:
> + * @job: The job to signal an error for if flush failed.
> + * @target_bs: The bs to flush.
> + *
> + * The function is intended to be called at the end of .run() for any data
> + * copying job.
> + */
> +int coroutine_fn
> +block_job_final_target_flush(BlockJob *job, BlockDriverState *target_bs);
> +
>   #endif
Reviewed-by: Denis V. Lunev <den@openvz.org>