[PATCH] block/rbd: implement bdrv_co_block_status

Peter Lieven posted 1 patch 2 years, 9 months ago
Test checkpatch passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20210809134136.23140-1-pl@kamp.de
Maintainers: Ilya Dryomov <idryomov@gmail.com>, Max Reitz <mreitz@redhat.com>, Kevin Wolf <kwolf@redhat.com>, Peter Lieven <pl@kamp.de>
There is a newer version of this series
block/rbd.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 119 insertions(+)
[PATCH] block/rbd: implement bdrv_co_block_status
Posted by Peter Lieven 2 years, 9 months ago
Signed-off-by: Peter Lieven <pl@kamp.de>
---
 block/rbd.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/block/rbd.c b/block/rbd.c
index dcf82b15b8..ef1eaa6af3 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -88,6 +88,7 @@ typedef struct BDRVRBDState {
     char *namespace;
     uint64_t image_size;
     uint64_t object_size;
+    uint64_t features;
 } BDRVRBDState;
 
 typedef struct RBDTask {
@@ -983,6 +984,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
     s->image_size = info.size;
     s->object_size = info.obj_size;
 
+    r = rbd_get_features(s->image, &s->features);
+    if (r < 0) {
+        error_setg_errno(errp, -r, "error getting image features from %s",
+                         s->image_name);
+        rbd_close(s->image);
+        goto failed_open;
+    }
+
     /* If we are using an rbd snapshot, we must be r/o, otherwise
      * leave as-is */
     if (s->snap != NULL) {
@@ -1259,6 +1268,115 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
     return spec_info;
 }
 
+typedef struct rbd_diff_req {
+    uint64_t offs;
+    uint64_t bytes;
+    int exists;
+} rbd_diff_req;
+
+static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len,
+                                       int exists, void *opaque)
+{
+    struct rbd_diff_req *req = opaque;
+
+    assert(req->offs + req->bytes <= offs);
+    assert(offs >= req->offs + req->bytes);
+
+    if (req->exists && offs > req->offs + req->bytes) {
+        /*
+         * we started in an allocated area and jumped over an unallocated area,
+         * req->bytes contains the length of the allocated area before the
+         * unallocated area. stop further processing.
+         */
+        return -9000;
+    }
+    if (req->exists && !exists) {
+        /*
+         * we started in an allocated area and reached a hole. req->bytes
+         * contains the length of the allocated area before the hole.
+         * stop further processing.
+         */
+        return -9000;
+    }
+    if (!req->exists && exists && offs > req->offs) {
+        /*
+         * we started in an unallocated area and hit the first allocated
+         * block. req->bytes must be set to the length of the unallocated area
+         * before the allocated area. stop further processing.
+         */
+        req->bytes = offs - req->offs;
+        return -9000;
+    }
+
+    /*
+     * assert that we catched all cases above and allocation state has not
+     * changed during callbacks.
+     */
+    assert(exists == req->exists || !req->bytes);
+    req->exists = exists;
+
+    /*
+     * assert that we either return an unallocated block or have got callbacks
+     * for all allocated blocks present.
+     */
+    assert(!req->exists || offs == req->offs + req->bytes);
+    req->bytes = offs + len - req->offs;
+
+    return 0;
+}
+
+static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+                                                 bool want_zero, int64_t offset,
+                                                 int64_t bytes, int64_t *pnum,
+                                                 int64_t *map,
+                                                 BlockDriverState **file)
+{
+    BDRVRBDState *s = bs->opaque;
+    int ret, r;
+    struct rbd_diff_req req = { .offs = offset };
+
+    assert(offset + bytes <= s->image_size);
+
+    /* default to all sectors allocated */
+    ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+    if (map) {
+        *map = offset;
+    }
+    *pnum = bytes;
+
+    /* RBD image does not support fast-diff */
+    if (!(s->features & RBD_FEATURE_FAST_DIFF)) {
+        goto out;
+    }
+
+    r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
+                          qemu_rbd_co_block_status_cb, &req);
+    if (r < 0 && r != -9000) {
+        goto out;
+    }
+    assert(req.bytes <= bytes);
+    if (!req.exists) {
+        if (r == 0 && !req.bytes) {
+            /*
+             * rbd_diff_iterate2 does not invoke callbacks for unallocated areas
+             * except for the case where an overlay has a hole where the parent
+             * has not. This here catches the case where no callback was
+             * invoked at all.
+             */
+            req.bytes = bytes;
+        }
+        ret &= ~BDRV_BLOCK_DATA;
+        ret |= BDRV_BLOCK_ZERO;
+    }
+    *pnum = req.bytes;
+
+out:
+    if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) {
+        *file = bs;
+    }
+    return ret;
+}
+
 static int64_t qemu_rbd_getlength(BlockDriverState *bs)
 {
     BDRVRBDState *s = bs->opaque;
@@ -1494,6 +1612,7 @@ static BlockDriver bdrv_rbd = {
 #ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
     .bdrv_co_pwrite_zeroes  = qemu_rbd_co_pwrite_zeroes,
 #endif
+    .bdrv_co_block_status   = qemu_rbd_co_block_status,
 
     .bdrv_snapshot_create   = qemu_rbd_snap_create,
     .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
-- 
2.17.1



Re: [PATCH] block/rbd: implement bdrv_co_block_status
Posted by Stefano Garzarella 2 years, 9 months ago
On Mon, Aug 09, 2021 at 03:41:36PM +0200, Peter Lieven wrote:

Please, can you add a description?
For example also describing what happens if RBD image does not support 
RBD_FEATURE_FAST_DIFF.

>Signed-off-by: Peter Lieven <pl@kamp.de>
>---
> block/rbd.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 119 insertions(+)
>
>diff --git a/block/rbd.c b/block/rbd.c
>index dcf82b15b8..ef1eaa6af3 100644
>--- a/block/rbd.c
>+++ b/block/rbd.c
>@@ -88,6 +88,7 @@ typedef struct BDRVRBDState {
>     char *namespace;
>     uint64_t image_size;
>     uint64_t object_size;
>+    uint64_t features;
> } BDRVRBDState;
>
> typedef struct RBDTask {
>@@ -983,6 +984,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
>     s->image_size = info.size;
>     s->object_size = info.obj_size;
>
>+    r = rbd_get_features(s->image, &s->features);
>+    if (r < 0) {
>+        error_setg_errno(errp, -r, "error getting image features from %s",
>+                         s->image_name);
>+        rbd_close(s->image);
>+        goto failed_open;
           ^
You can use `failed_post_open` label here, so you can avoid to call 
rbd_close().

>+    }
>+
>     /* If we are using an rbd snapshot, we must be r/o, otherwise
>      * leave as-is */
>     if (s->snap != NULL) {
>@@ -1259,6 +1268,115 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
>     return spec_info;
> }
>
>+typedef struct rbd_diff_req {
>+    uint64_t offs;
>+    uint64_t bytes;
>+    int exists;
>+} rbd_diff_req;
>+
>+static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len,
>+                                       int exists, void *opaque)
>+{
>+    struct rbd_diff_req *req = opaque;
>+
>+    assert(req->offs + req->bytes <= offs);
>+    assert(offs >= req->offs + req->bytes);

I think just one of the two asserts is enough, isn't that the same 
condition?

>+
>+    if (req->exists && offs > req->offs + req->bytes) {
>+        /*
>+         * we started in an allocated area and jumped over an unallocated area,
>+         * req->bytes contains the length of the allocated area before the
>+         * unallocated area. stop further processing.
>+         */
>+        return -9000;
                  ^
What is this magical value?

Please add a macro (with a comment) and also use it below in other 
places.

>+    }
>+    if (req->exists && !exists) {
>+        /*
>+         * we started in an allocated area and reached a hole.  
>req->bytes
>+         * contains the length of the allocated area before the hole.
>+         * stop further processing.
>+         */
>+        return -9000;
>+    }
>+    if (!req->exists && exists && offs > req->offs) {
>+        /*
>+         * we started in an unallocated area and hit the first allocated
>+         * block. req->bytes must be set to the length of the unallocated area
>+         * before the allocated area. stop further processing.
>+         */
>+        req->bytes = offs - req->offs;
>+        return -9000;
>+    }
>+
>+    /*
>+     * assert that we catched all cases above and allocation state has not
>+     * changed during callbacks.
>+     */
>+    assert(exists == req->exists || !req->bytes);
>+    req->exists = exists;
>+
>+    /*
>+     * assert that we either return an unallocated block or have got callbacks
>+     * for all allocated blocks present.
>+     */
>+    assert(!req->exists || offs == req->offs + req->bytes);
>+    req->bytes = offs + len - req->offs;
>+
>+    return 0;
>+}
>+
>+static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
>+                                                 bool want_zero, int64_t offset,
>+                                                 int64_t bytes, int64_t *pnum,
>+                                                 int64_t *map,
>+                                                 BlockDriverState **file)
>+{
>+    BDRVRBDState *s = bs->opaque;
>+    int ret, r;
>+    struct rbd_diff_req req = { .offs = offset };
>+
>+    assert(offset + bytes <= s->image_size);
>+
>+    /* default to all sectors allocated */
>+    ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
>+    if (map) {
>+        *map = offset;
>+    }
>+    *pnum = bytes;
>+
>+    /* RBD image does not support fast-diff */
>+    if (!(s->features & RBD_FEATURE_FAST_DIFF)) {
>+        goto out;
>+    }
>+
>+    r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
>+                          qemu_rbd_co_block_status_cb, &req);
>+    if (r < 0 && r != -9000) {
>+        goto out;
>+    }
>+    assert(req.bytes <= bytes);
>+    if (!req.exists) {
>+        if (r == 0 && !req.bytes) {
>+            /*
>+             * rbd_diff_iterate2 does not invoke callbacks for 
>unallocated areas
>+             * except for the case where an overlay has a hole where 
>the parent
>+             * has not. This here catches the case where no callback 
>was
>+             * invoked at all.
>+             */
>+            req.bytes = bytes;
>+        }
>+        ret &= ~BDRV_BLOCK_DATA;
>+        ret |= BDRV_BLOCK_ZERO;
>+    }
>+    *pnum = req.bytes;
>+
>+out:
>+    if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) {

Can ret be zero at this point?
Doesn't BDRV_BLOCK_OFFSET_VALID always stay set?

Thanks,
Stefano


Re: [PATCH] block/rbd: implement bdrv_co_block_status
Posted by Peter Lieven 2 years, 9 months ago
Am 10.08.21 um 10:51 schrieb Stefano Garzarella:
> On Mon, Aug 09, 2021 at 03:41:36PM +0200, Peter Lieven wrote:
>
> Please, can you add a description?
> For example also describing what happens if RBD image does not support RBD_FEATURE_FAST_DIFF.


Sure.


>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>> block/rbd.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 119 insertions(+)
>>
>> diff --git a/block/rbd.c b/block/rbd.c
>> index dcf82b15b8..ef1eaa6af3 100644
>> --- a/block/rbd.c
>> +++ b/block/rbd.c
>> @@ -88,6 +88,7 @@ typedef struct BDRVRBDState {
>>     char *namespace;
>>     uint64_t image_size;
>>     uint64_t object_size;
>> +    uint64_t features;
>> } BDRVRBDState;
>>
>> typedef struct RBDTask {
>> @@ -983,6 +984,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
>>     s->image_size = info.size;
>>     s->object_size = info.obj_size;
>>
>> +    r = rbd_get_features(s->image, &s->features);
>> +    if (r < 0) {
>> +        error_setg_errno(errp, -r, "error getting image features from %s",
>> +                         s->image_name);
>> +        rbd_close(s->image);
>> +        goto failed_open;
>           ^
> You can use `failed_post_open` label here, so you can avoid to call rbd_close().


Bad me, I developed this patch in a Qemu version where failed_post_open wasn't present...


>
>> +    }
>> +
>>     /* If we are using an rbd snapshot, we must be r/o, otherwise
>>      * leave as-is */
>>     if (s->snap != NULL) {
>> @@ -1259,6 +1268,115 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
>>     return spec_info;
>> }
>>
>> +typedef struct rbd_diff_req {
>> +    uint64_t offs;
>> +    uint64_t bytes;
>> +    int exists;
>> +} rbd_diff_req;
>> +
>> +static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len,
>> +                                       int exists, void *opaque)
>> +{
>> +    struct rbd_diff_req *req = opaque;
>> +
>> +    assert(req->offs + req->bytes <= offs);
>> +    assert(offs >= req->offs + req->bytes);
>
> I think just one of the two asserts is enough, isn't that the same condition?


Right.


>
>> +
>> +    if (req->exists && offs > req->offs + req->bytes) {
>> +        /*
>> +         * we started in an allocated area and jumped over an unallocated area,
>> +         * req->bytes contains the length of the allocated area before the
>> +         * unallocated area. stop further processing.
>> +         */
>> +        return -9000;
>                  ^
> What is this magical value?
>
> Please add a macro (with a comment) and also use it below in other places.


Will add in V2.


>
>> +    }
>> +    if (req->exists && !exists) {
>> +        /*
>> +         * we started in an allocated area and reached a hole. req->bytes
>> +         * contains the length of the allocated area before the hole.
>> +         * stop further processing.
>> +         */
>> +        return -9000;
>> +    }
>> +    if (!req->exists && exists && offs > req->offs) {
>> +        /*
>> +         * we started in an unallocated area and hit the first allocated
>> +         * block. req->bytes must be set to the length of the unallocated area
>> +         * before the allocated area. stop further processing.
>> +         */
>> +        req->bytes = offs - req->offs;
>> +        return -9000;
>> +    }
>> +
>> +    /*
>> +     * assert that we catched all cases above and allocation state has not
>> +     * changed during callbacks.
>> +     */
>> +    assert(exists == req->exists || !req->bytes);
>> +    req->exists = exists;
>> +
>> +    /*
>> +     * assert that we either return an unallocated block or have got callbacks
>> +     * for all allocated blocks present.
>> +     */
>> +    assert(!req->exists || offs == req->offs + req->bytes);
>> +    req->bytes = offs + len - req->offs;
>> +
>> +    return 0;
>> +}
>> +
>> +static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
>> +                                                 bool want_zero, int64_t offset,
>> +                                                 int64_t bytes, int64_t *pnum,
>> +                                                 int64_t *map,
>> + BlockDriverState **file)
>> +{
>> +    BDRVRBDState *s = bs->opaque;
>> +    int ret, r;
>> +    struct rbd_diff_req req = { .offs = offset };
>> +
>> +    assert(offset + bytes <= s->image_size);
>> +
>> +    /* default to all sectors allocated */
>> +    ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
>> +    if (map) {
>> +        *map = offset;
>> +    }
>> +    *pnum = bytes;
>> +
>> +    /* RBD image does not support fast-diff */
>> +    if (!(s->features & RBD_FEATURE_FAST_DIFF)) {
>> +        goto out;
>> +    }
>> +
>> +    r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
>> +                          qemu_rbd_co_block_status_cb, &req);
>> +    if (r < 0 && r != -9000) {
>> +        goto out;
>> +    }
>> +    assert(req.bytes <= bytes);
>> +    if (!req.exists) {
>> +        if (r == 0 && !req.bytes) {
>> +            /*
>> +             * rbd_diff_iterate2 does not invoke callbacks for unallocated areas
>> +             * except for the case where an overlay has a hole where the parent
>> +             * has not. This here catches the case where no callback was
>> +             * invoked at all.
>> +             */
>> +            req.bytes = bytes;
>> +        }
>> +        ret &= ~BDRV_BLOCK_DATA;
>> +        ret |= BDRV_BLOCK_ZERO;
>> +    }
>> +    *pnum = req.bytes;
>> +
>> +out:
>> +    if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) {
>
> Can ret be zero at this point?
> Doesn't BDRV_BLOCK_OFFSET_VALID always stay set?


Right, I decided to declare any area as allocated if rbd_diff_iterate2 would fail so this

can't happen.


I will send a V2 shortly.


Peter