The idea is that ALLOCATE requests may overlap with other requests.
Reuse the existing block layer infrastructure for serialising requests.
Use the following approach:
- mark ALLOCATE serialising, so subsequent requests to the area wait
- ALLOCATE request itself must never wait if another request is in flight
already. Return EAGAIN, let the caller reconsider.
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
block/io.c | 27 +++++++++++++++++++--------
1 file changed, 19 insertions(+), 8 deletions(-)
diff --git a/block/io.c b/block/io.c
index cf2f84c..4b0d34f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -605,7 +605,8 @@ void bdrv_dec_in_flight(BlockDriverState *bs)
bdrv_wakeup(bs);
}
-static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
+static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self,
+ bool nowait)
{
BlockDriverState *bs = self->bs;
BdrvTrackedRequest *req;
@@ -636,11 +637,14 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
* will wait for us as soon as it wakes up, then just go on
* (instead of producing a deadlock in the former case). */
if (!req->waiting_for) {
+ waited = true;
+ if (nowait) {
+ break;
+ }
self->waiting_for = req;
qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
self->waiting_for = NULL;
retry = true;
- waited = true;
break;
}
}
@@ -1206,7 +1210,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
}
if (!(flags & BDRV_REQ_NO_SERIALISING)) {
- wait_serialising_requests(req);
+ wait_serialising_requests(req, false);
}
if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -1504,7 +1508,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
align);
- waited = wait_serialising_requests(req);
+ waited = wait_serialising_requests(req, flags & BDRV_REQ_ALLOCATE);
+ if (waited && flags & BDRV_REQ_ALLOCATE) {
+ return -EAGAIN;
+ }
assert(!waited || !req->serialising);
assert(req->overlap_offset <= offset);
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
@@ -1608,7 +1615,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
/* RMW the unaligned part before head. */
mark_request_serialising(req, align);
- wait_serialising_requests(req);
+ wait_serialising_requests(req, false);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
align, &local_qiov, 0);
@@ -1628,6 +1635,10 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
bytes -= zero_bytes;
}
+ if (flags & BDRV_REQ_ALLOCATE) {
+ mark_request_serialising(req, align);
+ }
+
assert(!bytes || (offset & (align - 1)) == 0);
if (bytes >= align) {
/* Write the aligned part in the middle. */
@@ -1646,7 +1657,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
assert(align == tail_padding_bytes + bytes);
/* RMW the unaligned part after tail. */
mark_request_serialising(req, align);
- wait_serialising_requests(req);
+ wait_serialising_requests(req, false);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(child, req, offset, align,
align, &local_qiov, 0);
@@ -1717,7 +1728,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
struct iovec head_iov;
mark_request_serialising(&req, align);
- wait_serialising_requests(&req);
+ wait_serialising_requests(&req, false);
head_buf = qemu_blockalign(bs, align);
head_iov = (struct iovec) {
@@ -1758,7 +1769,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
bool waited;
mark_request_serialising(&req, align);
- waited = wait_serialising_requests(&req);
+ waited = wait_serialising_requests(&req, false);
assert(!waited || !use_local_qiov);
tail_buf = qemu_blockalign(bs, align);
--
2.7.4
On 2018-01-18 18:49, Anton Nefedov wrote:
> The idea is that ALLOCATE requests may overlap with other requests.
> Reuse the existing block layer infrastructure for serialising requests.
> Use the following approach:
> - mark ALLOCATE serialising, so subsequent requests to the area wait
> - ALLOCATE request itself must never wait if another request is in flight
> already. Return EAGAIN, let the caller reconsider.
>
> Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
> Reviewed-by: Eric Blake <eblake@redhat.com>
> ---
> block/io.c | 27 +++++++++++++++++++--------
> 1 file changed, 19 insertions(+), 8 deletions(-)
The basic principle looks good to me.
> diff --git a/block/io.c b/block/io.c
> index cf2f84c..4b0d34f 100644
> --- a/block/io.c
> +++ b/block/io.c
[...]
> @@ -1717,7 +1728,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
> struct iovec head_iov;
>
> mark_request_serialising(&req, align);
> - wait_serialising_requests(&req);
> + wait_serialising_requests(&req, false);
What if someone calls bdrv_co_pwritev() with BDRV_REQ_ZERO_WRITE |
BDRV_REQ_ALLOCATE? Then this should do exactly the same as
bdrv_co_do_zero_pwritev(), which it currently does not -- besides this
serialization, this includes returning -ENOTSUP if there is a head or
tail to write.
Max
>
> head_buf = qemu_blockalign(bs, align);
> head_iov = (struct iovec) {
> @@ -1758,7 +1769,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
> bool waited;
>
> mark_request_serialising(&req, align);
> - waited = wait_serialising_requests(&req);
> + waited = wait_serialising_requests(&req, false);
> assert(!waited || !use_local_qiov);
>
> tail_buf = qemu_blockalign(bs, align);
>
On 29/1/2018 10:48 PM, Max Reitz wrote:
> On 2018-01-18 18:49, Anton Nefedov wrote:
>> The idea is that ALLOCATE requests may overlap with other requests.
>> Reuse the existing block layer infrastructure for serialising requests.
>> Use the following approach:
>> - mark ALLOCATE serialising, so subsequent requests to the area wait
>> - ALLOCATE request itself must never wait if another request is in flight
>> already. Return EAGAIN, let the caller reconsider.
>>
>> Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
>> Reviewed-by: Eric Blake <eblake@redhat.com>
>> ---
>> block/io.c | 27 +++++++++++++++++++--------
>> 1 file changed, 19 insertions(+), 8 deletions(-)
>
> The basic principle looks good to me.
>
>> diff --git a/block/io.c b/block/io.c
>> index cf2f84c..4b0d34f 100644
>> --- a/block/io.c
>> +++ b/block/io.c
>
> [...]
>
>> @@ -1717,7 +1728,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
>> struct iovec head_iov;
>>
>> mark_request_serialising(&req, align);
>> - wait_serialising_requests(&req);
>> + wait_serialising_requests(&req, false);
>
> What if someone calls bdrv_co_pwritev() with BDRV_REQ_ZERO_WRITE |
> BDRV_REQ_ALLOCATE?
Either
assert(!(qiov && (flags & BDRV_REQ_ALLOCATE)));
will fail or bdrv_co_do_zero_pwritev() will be used.
> .. Then this should do exactly the same as
> bdrv_co_do_zero_pwritev(), which it currently does not -- besides this
> serialization, this includes returning -ENOTSUP if there is a head or
> tail to write.
>
Another question is if that assertion is ok.
In other words: should (qiov!=NULL && REQ_ALLOCATE) be a valid case?
e.g. with qiov filled with zeroes?
I'd rather document that not supported (and leave the assertion).
Actually, even (qiov!=NULL && REQ_ZERO_WRITE) looks kind of
unsupported/broken? Alignment code in bdrv_co_pwritev() zeroes out the
head and tail by passing the flag down bdrv_aligned_pwritev()
On 2018-01-30 13:36, Anton Nefedov wrote: > > > On 29/1/2018 10:48 PM, Max Reitz wrote: >> On 2018-01-18 18:49, Anton Nefedov wrote: >>> The idea is that ALLOCATE requests may overlap with other requests. >>> Reuse the existing block layer infrastructure for serialising requests. >>> Use the following approach: >>> - mark ALLOCATE serialising, so subsequent requests to the area wait >>> - ALLOCATE request itself must never wait if another request is in >>> flight >>> already. Return EAGAIN, let the caller reconsider. >>> >>> Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com> >>> Reviewed-by: Eric Blake <eblake@redhat.com> >>> --- >>> block/io.c | 27 +++++++++++++++++++-------- >>> 1 file changed, 19 insertions(+), 8 deletions(-) >> >> The basic principle looks good to me. >> >>> diff --git a/block/io.c b/block/io.c >>> index cf2f84c..4b0d34f 100644 >>> --- a/block/io.c >>> +++ b/block/io.c >> >> [...] >> >>> @@ -1717,7 +1728,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, >>> struct iovec head_iov; >>> mark_request_serialising(&req, align); >>> - wait_serialising_requests(&req); >>> + wait_serialising_requests(&req, false); >> >> What if someone calls bdrv_co_pwritev() with BDRV_REQ_ZERO_WRITE | >> BDRV_REQ_ALLOCATE? > > Either > > assert(!(qiov && (flags & BDRV_REQ_ALLOCATE))); > > will fail or bdrv_co_do_zero_pwritev() will be used. Ah, right, I didn't see that condition there. >> .. Then this should do exactly the same as >> bdrv_co_do_zero_pwritev(), which it currently does not -- besides this >> serialization, this includes returning -ENOTSUP if there is a head or >> tail to write. >> > > Another question is if that assertion is ok. > In other words: should (qiov!=NULL && REQ_ALLOCATE) be a valid case? > e.g. with qiov filled with zeroes? I think it's OK to leave the assertion that way. But maybe move it down, under the bdrv_co_do_zero_pwritev() call (and then omit the qiov != NULL, because that's guaranteed then)? (But maybe not everyone's as blind as me.) > I'd rather document that not supported (and leave the assertion). > > Actually, even (qiov!=NULL && REQ_ZERO_WRITE) looks kind of > unsupported/broken? Alignment code in bdrv_co_pwritev() zeroes out the > head and tail by passing the flag down bdrv_aligned_pwritev() Yes. Maybe we should have an assertion that you aren't allowed to pass a qiov with REQ_ZERO_WRITE... Max
On Thu 18 Jan 2018 06:49:02 PM CET, Anton Nefedov wrote:
> -static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
> +static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self,
> + bool nowait)
It's a bit confusing to have a function called wait_foo() with a
parameter that says "don't wait"...
How about
check_serialising_requests(BdrvTrackedRequest *self, bool wait)
> - waited = wait_serialising_requests(req);
> + waited = wait_serialising_requests(req, flags & BDRV_REQ_ALLOCATE);
> + if (waited && flags & BDRV_REQ_ALLOCATE) {
> + return -EAGAIN;
> + }
I find this more readable (even if not strictly necessary):
if (waited && (flags & BDRV_REQ_ALLOCATE)) {
None of my two comments are blockers, though, so
Reviewed-by: Alberto Garcia <berto@igalia.com>
Berto
On 31/1/2018 6:11 PM, Alberto Garcia wrote:
> On Thu 18 Jan 2018 06:49:02 PM CET, Anton Nefedov wrote:
>
>> -static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>> +static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self,
>> + bool nowait)
>
> It's a bit confusing to have a function called wait_foo() with a
> parameter that says "don't wait"...
>
> How about
>
> check_serialising_requests(BdrvTrackedRequest *self, bool wait)
>
I think it might be more important to emphasize in the name that the
function _might_ wait.
i.e. it feels worse to read
check_serialising_requests(req, true);
when one needs to follow the function to find out that it might yield.
Personally I'd vote for
static int check_or_wait_serialising_requests(
BdrvTrackedRequest *self, bool wait) {}
and maybe even:
static int check_serialising_requests(BdrvTrackedRequest *self) {
return check_or_wait_serialising_requests(self, false);
static int wait_serialising_requests(BdrvTrackedRequest *self) {
return check_or_wait_serialising_requests(self, true);
}
>> - waited = wait_serialising_requests(req);
>> + waited = wait_serialising_requests(req, flags & BDRV_REQ_ALLOCATE);
>> + if (waited && flags & BDRV_REQ_ALLOCATE) {
>> + return -EAGAIN;
>> + }
>
> I find this more readable (even if not strictly necessary):
>
> if (waited && (flags & BDRV_REQ_ALLOCATE)) {
>
Done!
> None of my two comments are blockers, though, so
>
> Reviewed-by: Alberto Garcia <berto@igalia.com>
>
> Berto
>
On Wed 31 Jan 2018 06:11:27 PM CET, Anton Nefedov wrote:
> On 31/1/2018 6:11 PM, Alberto Garcia wrote:
>> On Thu 18 Jan 2018 06:49:02 PM CET, Anton Nefedov wrote:
>>
>>> -static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
>>> +static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self,
>>> + bool nowait)
>>
>> It's a bit confusing to have a function called wait_foo() with a
>> parameter that says "don't wait"...
>>
>> How about
>>
>> check_serialising_requests(BdrvTrackedRequest *self, bool wait)
>>
>
> I think it might be more important to emphasize in the name that the
> function _might_ wait.
>
> i.e. it feels worse to read
> check_serialising_requests(req, true);
> when one needs to follow the function to find out that it might yield.
>
> Personally I'd vote for
>
> static int check_or_wait_serialising_requests(
> BdrvTrackedRequest *self, bool wait) {}
>
> and maybe even:
>
> static int check_serialising_requests(BdrvTrackedRequest *self) {
> return check_or_wait_serialising_requests(self, false);
>
> static int wait_serialising_requests(BdrvTrackedRequest *self) {
> return check_or_wait_serialising_requests(self, true);
> }
You're right. Either approach works for me though, also keeping the
current solution, wait_serialising_requests(req, true).
Berto
© 2016 - 2025 Red Hat, Inc.