block/file-posix.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-)
raw_co_zone_append() sets "s->offset" where "BDRVRawState *s". This pointer
is used later at raw_co_prw() to save the block address where the data is
written.
When multiple IOs are on-going at the same time, a later IO's
raw_co_zone_append() call over-writes a former IO's offset address before
raw_co_prw() completes. As a result, the former zone append IO returns the
initial value (= the start address of the writing zone), instead of the
proper address.
Fix the issue by passing the offset pointer to raw_co_prw() instead of
passing it through s->offset. Also, remove "offset" from BDRVRawState as
there is no usage anymore.
Fixes: 4751d09adcc3 ("block: introduce zone append write for zoned devices")
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
block/file-posix.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index 50e2b20d5c45..c39209358909 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -160,7 +160,6 @@ typedef struct BDRVRawState {
bool has_write_zeroes:1;
bool use_linux_aio:1;
bool use_linux_io_uring:1;
- int64_t *offset; /* offset of zone append operation */
int page_cache_inconsistent; /* errno from fdatasync failure */
bool has_fallocate;
bool needs_alignment;
@@ -2445,12 +2444,13 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
return true;
}
-static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
+static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
uint64_t bytes, QEMUIOVector *qiov, int type)
{
BDRVRawState *s = bs->opaque;
RawPosixAIOData acb;
int ret;
+ uint64_t offset = *offset_ptr;
if (fd_open(bs) < 0)
return -EIO;
@@ -2513,8 +2513,8 @@ out:
uint64_t *wp = &wps->wp[offset / bs->bl.zone_size];
if (!BDRV_ZT_IS_CONV(*wp)) {
if (type & QEMU_AIO_ZONE_APPEND) {
- *s->offset = *wp;
- trace_zbd_zone_append_complete(bs, *s->offset
+ *offset_ptr = *wp;
+ trace_zbd_zone_append_complete(bs, *offset_ptr
>> BDRV_SECTOR_BITS);
}
/* Advance the wp if needed */
@@ -2536,14 +2536,14 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
- return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
+ return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
}
static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
- return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
+ return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
}
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
@@ -3506,8 +3506,6 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
int64_t zone_size_mask = bs->bl.zone_size - 1;
int64_t iov_len = 0;
int64_t len = 0;
- BDRVRawState *s = bs->opaque;
- s->offset = offset;
if (*offset & zone_size_mask) {
error_report("sector offset %" PRId64 " is not aligned to zone size "
@@ -3528,7 +3526,7 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
}
trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
- return raw_co_prw(bs, *offset, len, qiov, QEMU_AIO_ZONE_APPEND);
+ return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
}
#endif
--
2.42.0
30.10.2023 10:38, Naohiro Aota wrote: > raw_co_zone_append() sets "s->offset" where "BDRVRawState *s". This pointer > is used later at raw_co_prw() to save the block address where the data is > written. > > When multiple IOs are on-going at the same time, a later IO's > raw_co_zone_append() call over-writes a former IO's offset address before > raw_co_prw() completes. As a result, the former zone append IO returns the > initial value (= the start address of the writing zone), instead of the > proper address. > > Fix the issue by passing the offset pointer to raw_co_prw() instead of > passing it through s->offset. Also, remove "offset" from BDRVRawState as > there is no usage anymore. > > Fixes: 4751d09adcc3 ("block: introduce zone append write for zoned devices") > Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> This smells like a -stable material. Please let me know if it is not. Thanks, /mjt
On 30.10.23 08:38, Naohiro Aota wrote: > raw_co_zone_append() sets "s->offset" where "BDRVRawState *s". This pointer > is used later at raw_co_prw() to save the block address where the data is > written. > > When multiple IOs are on-going at the same time, a later IO's > raw_co_zone_append() call over-writes a former IO's offset address before > raw_co_prw() completes. As a result, the former zone append IO returns the > initial value (= the start address of the writing zone), instead of the > proper address. > > Fix the issue by passing the offset pointer to raw_co_prw() instead of > passing it through s->offset. Also, remove "offset" from BDRVRawState as > there is no usage anymore. > > Fixes: 4751d09adcc3 ("block: introduce zone append write for zoned devices") > Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> > --- > block/file-posix.c | 16 +++++++--------- > 1 file changed, 7 insertions(+), 9 deletions(-) Thanks, applied to my block branch: https://gitlab.com/hreitz/qemu/-/commits/block Hanna
On Mon, Oct 30, 2023 at 04:38:53PM +0900, Naohiro Aota wrote: > raw_co_zone_append() sets "s->offset" where "BDRVRawState *s". This pointer > is used later at raw_co_prw() to save the block address where the data is > written. > > When multiple IOs are on-going at the same time, a later IO's > raw_co_zone_append() call over-writes a former IO's offset address before > raw_co_prw() completes. As a result, the former zone append IO returns the > initial value (= the start address of the writing zone), instead of the > proper address. > > Fix the issue by passing the offset pointer to raw_co_prw() instead of > passing it through s->offset. Also, remove "offset" from BDRVRawState as > there is no usage anymore. > > Fixes: 4751d09adcc3 ("block: introduce zone append write for zoned devices") > Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> > --- > block/file-posix.c | 16 +++++++--------- > 1 file changed, 7 insertions(+), 9 deletions(-) Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Naohiro Aota <nao.aota@gmail.com> 于2023年10月30日周一 15:39写道: > > raw_co_zone_append() sets "s->offset" where "BDRVRawState *s". This pointer > is used later at raw_co_prw() to save the block address where the data is > written. > > When multiple IOs are on-going at the same time, a later IO's > raw_co_zone_append() call over-writes a former IO's offset address before > raw_co_prw() completes. As a result, the former zone append IO returns the > initial value (= the start address of the writing zone), instead of the > proper address. > > Fix the issue by passing the offset pointer to raw_co_prw() instead of > passing it through s->offset. Also, remove "offset" from BDRVRawState as > there is no usage anymore. > > Fixes: 4751d09adcc3 ("block: introduce zone append write for zoned devices") > Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> > --- > block/file-posix.c | 16 +++++++--------- > 1 file changed, 7 insertions(+), 9 deletions(-) Thanks! Reviewed-by: Sam Li <faithilikerun@gmail.com> > > diff --git a/block/file-posix.c b/block/file-posix.c > index 50e2b20d5c45..c39209358909 100644 > --- a/block/file-posix.c > +++ b/block/file-posix.c > @@ -160,7 +160,6 @@ typedef struct BDRVRawState { > bool has_write_zeroes:1; > bool use_linux_aio:1; > bool use_linux_io_uring:1; > - int64_t *offset; /* offset of zone append operation */ > int page_cache_inconsistent; /* errno from fdatasync failure */ > bool has_fallocate; > bool needs_alignment; > @@ -2445,12 +2444,13 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) > return true; > } > > -static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, > +static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, > uint64_t bytes, QEMUIOVector *qiov, int type) > { > BDRVRawState *s = bs->opaque; > RawPosixAIOData acb; > int ret; > + uint64_t offset = *offset_ptr; > > if (fd_open(bs) < 0) > return -EIO; > @@ -2513,8 +2513,8 @@ out: > uint64_t *wp = &wps->wp[offset / bs->bl.zone_size]; > if (!BDRV_ZT_IS_CONV(*wp)) { > if (type & QEMU_AIO_ZONE_APPEND) { > - *s->offset = *wp; > - trace_zbd_zone_append_complete(bs, *s->offset > + *offset_ptr = *wp; > + trace_zbd_zone_append_complete(bs, *offset_ptr > >> BDRV_SECTOR_BITS); > } > /* Advance the wp if needed */ > @@ -2536,14 +2536,14 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset, > int64_t bytes, QEMUIOVector *qiov, > BdrvRequestFlags flags) > { > - return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ); > + return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ); > } > > static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset, > int64_t bytes, QEMUIOVector *qiov, > BdrvRequestFlags flags) > { > - return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE); > + return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE); > } > > static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) > @@ -3506,8 +3506,6 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs, > int64_t zone_size_mask = bs->bl.zone_size - 1; > int64_t iov_len = 0; > int64_t len = 0; > - BDRVRawState *s = bs->opaque; > - s->offset = offset; > > if (*offset & zone_size_mask) { > error_report("sector offset %" PRId64 " is not aligned to zone size " > @@ -3528,7 +3526,7 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs, > } > > trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS); > - return raw_co_prw(bs, *offset, len, qiov, QEMU_AIO_ZONE_APPEND); > + return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND); > } > #endif > > -- > 2.42.0 >
© 2016 - 2024 Red Hat, Inc.