[PATCH] file-posix: fix over-writing of returning zone_append offset

Naohiro Aota posted 1 patch 1 year ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20231030073853.2601162-1-naohiro.aota@wdc.com
Maintainers: Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>
block/file-posix.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
[PATCH] file-posix: fix over-writing of returning zone_append offset
Posted by Naohiro Aota 1 year ago
raw_co_zone_append() sets "s->offset" where "BDRVRawState *s". This pointer
is used later at raw_co_prw() to save the block address where the data is
written.

When multiple IOs are on-going at the same time, a later IO's
raw_co_zone_append() call over-writes a former IO's offset address before
raw_co_prw() completes. As a result, the former zone append IO returns the
initial value (= the start address of the writing zone), instead of the
proper address.

Fix the issue by passing the offset pointer to raw_co_prw() instead of
passing it through s->offset. Also, remove "offset" from BDRVRawState as
there is no usage anymore.

Fixes: 4751d09adcc3 ("block: introduce zone append write for zoned devices")
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 block/file-posix.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 50e2b20d5c45..c39209358909 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -160,7 +160,6 @@ typedef struct BDRVRawState {
     bool has_write_zeroes:1;
     bool use_linux_aio:1;
     bool use_linux_io_uring:1;
-    int64_t *offset; /* offset of zone append operation */
     int page_cache_inconsistent; /* errno from fdatasync failure */
     bool has_fallocate;
     bool needs_alignment;
@@ -2445,12 +2444,13 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
     return true;
 }
 
-static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
+static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
                                    uint64_t bytes, QEMUIOVector *qiov, int type)
 {
     BDRVRawState *s = bs->opaque;
     RawPosixAIOData acb;
     int ret;
+    uint64_t offset = *offset_ptr;
 
     if (fd_open(bs) < 0)
         return -EIO;
@@ -2513,8 +2513,8 @@ out:
             uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
             if (!BDRV_ZT_IS_CONV(*wp)) {
                 if (type & QEMU_AIO_ZONE_APPEND) {
-                    *s->offset = *wp;
-                    trace_zbd_zone_append_complete(bs, *s->offset
+                    *offset_ptr = *wp;
+                    trace_zbd_zone_append_complete(bs, *offset_ptr
                         >> BDRV_SECTOR_BITS);
                 }
                 /* Advance the wp if needed */
@@ -2536,14 +2536,14 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
                                       int64_t bytes, QEMUIOVector *qiov,
                                       BdrvRequestFlags flags)
 {
-    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
+    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
 }
 
 static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
                                        int64_t bytes, QEMUIOVector *qiov,
                                        BdrvRequestFlags flags)
 {
-    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
+    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
 }
 
 static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
@@ -3506,8 +3506,6 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
     int64_t zone_size_mask = bs->bl.zone_size - 1;
     int64_t iov_len = 0;
     int64_t len = 0;
-    BDRVRawState *s = bs->opaque;
-    s->offset = offset;
 
     if (*offset & zone_size_mask) {
         error_report("sector offset %" PRId64 " is not aligned to zone size "
@@ -3528,7 +3526,7 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
     }
 
     trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
-    return raw_co_prw(bs, *offset, len, qiov, QEMU_AIO_ZONE_APPEND);
+    return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
 }
 #endif
 
-- 
2.42.0
Re: [PATCH] file-posix: fix over-writing of returning zone_append offset
Posted by Michael Tokarev 1 year ago
30.10.2023 10:38, Naohiro Aota wrote:
> raw_co_zone_append() sets "s->offset" where "BDRVRawState *s". This pointer
> is used later at raw_co_prw() to save the block address where the data is
> written.
> 
> When multiple IOs are on-going at the same time, a later IO's
> raw_co_zone_append() call over-writes a former IO's offset address before
> raw_co_prw() completes. As a result, the former zone append IO returns the
> initial value (= the start address of the writing zone), instead of the
> proper address.
> 
> Fix the issue by passing the offset pointer to raw_co_prw() instead of
> passing it through s->offset. Also, remove "offset" from BDRVRawState as
> there is no usage anymore.
> 
> Fixes: 4751d09adcc3 ("block: introduce zone append write for zoned devices")
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>

This smells like a -stable material. Please let me know if it is not.

Thanks,

/mjt
Re: [PATCH] file-posix: fix over-writing of returning zone_append offset
Posted by Hanna Czenczek 1 year ago
On 30.10.23 08:38, Naohiro Aota wrote:
> raw_co_zone_append() sets "s->offset" where "BDRVRawState *s". This pointer
> is used later at raw_co_prw() to save the block address where the data is
> written.
>
> When multiple IOs are on-going at the same time, a later IO's
> raw_co_zone_append() call over-writes a former IO's offset address before
> raw_co_prw() completes. As a result, the former zone append IO returns the
> initial value (= the start address of the writing zone), instead of the
> proper address.
>
> Fix the issue by passing the offset pointer to raw_co_prw() instead of
> passing it through s->offset. Also, remove "offset" from BDRVRawState as
> there is no usage anymore.
>
> Fixes: 4751d09adcc3 ("block: introduce zone append write for zoned devices")
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> ---
>   block/file-posix.c | 16 +++++++---------
>   1 file changed, 7 insertions(+), 9 deletions(-)

Thanks, applied to my block branch:

https://gitlab.com/hreitz/qemu/-/commits/block

Hanna
Re: [PATCH] file-posix: fix over-writing of returning zone_append offset
Posted by Stefan Hajnoczi 1 year ago
On Mon, Oct 30, 2023 at 04:38:53PM +0900, Naohiro Aota wrote:
> raw_co_zone_append() sets "s->offset" where "BDRVRawState *s". This pointer
> is used later at raw_co_prw() to save the block address where the data is
> written.
> 
> When multiple IOs are on-going at the same time, a later IO's
> raw_co_zone_append() call over-writes a former IO's offset address before
> raw_co_prw() completes. As a result, the former zone append IO returns the
> initial value (= the start address of the writing zone), instead of the
> proper address.
> 
> Fix the issue by passing the offset pointer to raw_co_prw() instead of
> passing it through s->offset. Also, remove "offset" from BDRVRawState as
> there is no usage anymore.
> 
> Fixes: 4751d09adcc3 ("block: introduce zone append write for zoned devices")
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> ---
>  block/file-posix.c | 16 +++++++---------
>  1 file changed, 7 insertions(+), 9 deletions(-)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Re: [PATCH] file-posix: fix over-writing of returning zone_append offset
Posted by Sam Li 1 year ago
Naohiro Aota <nao.aota@gmail.com> 于2023年10月30日周一 15:39写道:
>
> raw_co_zone_append() sets "s->offset" where "BDRVRawState *s". This pointer
> is used later at raw_co_prw() to save the block address where the data is
> written.
>
> When multiple IOs are on-going at the same time, a later IO's
> raw_co_zone_append() call over-writes a former IO's offset address before
> raw_co_prw() completes. As a result, the former zone append IO returns the
> initial value (= the start address of the writing zone), instead of the
> proper address.
>
> Fix the issue by passing the offset pointer to raw_co_prw() instead of
> passing it through s->offset. Also, remove "offset" from BDRVRawState as
> there is no usage anymore.
>
> Fixes: 4751d09adcc3 ("block: introduce zone append write for zoned devices")
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> ---
>  block/file-posix.c | 16 +++++++---------
>  1 file changed, 7 insertions(+), 9 deletions(-)

Thanks!

Reviewed-by: Sam Li <faithilikerun@gmail.com>

>
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 50e2b20d5c45..c39209358909 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -160,7 +160,6 @@ typedef struct BDRVRawState {
>      bool has_write_zeroes:1;
>      bool use_linux_aio:1;
>      bool use_linux_io_uring:1;
> -    int64_t *offset; /* offset of zone append operation */
>      int page_cache_inconsistent; /* errno from fdatasync failure */
>      bool has_fallocate;
>      bool needs_alignment;
> @@ -2445,12 +2444,13 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
>      return true;
>  }
>
> -static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
> +static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
>                                     uint64_t bytes, QEMUIOVector *qiov, int type)
>  {
>      BDRVRawState *s = bs->opaque;
>      RawPosixAIOData acb;
>      int ret;
> +    uint64_t offset = *offset_ptr;
>
>      if (fd_open(bs) < 0)
>          return -EIO;
> @@ -2513,8 +2513,8 @@ out:
>              uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
>              if (!BDRV_ZT_IS_CONV(*wp)) {
>                  if (type & QEMU_AIO_ZONE_APPEND) {
> -                    *s->offset = *wp;
> -                    trace_zbd_zone_append_complete(bs, *s->offset
> +                    *offset_ptr = *wp;
> +                    trace_zbd_zone_append_complete(bs, *offset_ptr
>                          >> BDRV_SECTOR_BITS);
>                  }
>                  /* Advance the wp if needed */
> @@ -2536,14 +2536,14 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
>                                        int64_t bytes, QEMUIOVector *qiov,
>                                        BdrvRequestFlags flags)
>  {
> -    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
> +    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
>  }
>
>  static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
>                                         int64_t bytes, QEMUIOVector *qiov,
>                                         BdrvRequestFlags flags)
>  {
> -    return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
> +    return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
>  }
>
>  static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
> @@ -3506,8 +3506,6 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
>      int64_t zone_size_mask = bs->bl.zone_size - 1;
>      int64_t iov_len = 0;
>      int64_t len = 0;
> -    BDRVRawState *s = bs->opaque;
> -    s->offset = offset;
>
>      if (*offset & zone_size_mask) {
>          error_report("sector offset %" PRId64 " is not aligned to zone size "
> @@ -3528,7 +3526,7 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
>      }
>
>      trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
> -    return raw_co_prw(bs, *offset, len, qiov, QEMU_AIO_ZONE_APPEND);
> +    return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
>  }
>  #endif
>
> --
> 2.42.0
>