block/rbd.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+)
Signed-off-by: Peter Lieven <pl@kamp.de>
---
block/rbd.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 119 insertions(+)
diff --git a/block/rbd.c b/block/rbd.c
index dcf82b15b8..ef1eaa6af3 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -88,6 +88,7 @@ typedef struct BDRVRBDState {
char *namespace;
uint64_t image_size;
uint64_t object_size;
+ uint64_t features;
} BDRVRBDState;
typedef struct RBDTask {
@@ -983,6 +984,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
s->image_size = info.size;
s->object_size = info.obj_size;
+ r = rbd_get_features(s->image, &s->features);
+ if (r < 0) {
+ error_setg_errno(errp, -r, "error getting image features from %s",
+ s->image_name);
+ rbd_close(s->image);
+ goto failed_open;
+ }
+
/* If we are using an rbd snapshot, we must be r/o, otherwise
* leave as-is */
if (s->snap != NULL) {
@@ -1259,6 +1268,115 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
return spec_info;
}
+typedef struct rbd_diff_req {
+ uint64_t offs;
+ uint64_t bytes;
+ int exists;
+} rbd_diff_req;
+
+static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len,
+ int exists, void *opaque)
+{
+ struct rbd_diff_req *req = opaque;
+
+ assert(req->offs + req->bytes <= offs);
+ assert(offs >= req->offs + req->bytes);
+
+ if (req->exists && offs > req->offs + req->bytes) {
+ /*
+ * we started in an allocated area and jumped over an unallocated area,
+ * req->bytes contains the length of the allocated area before the
+ * unallocated area. stop further processing.
+ */
+ return -9000;
+ }
+ if (req->exists && !exists) {
+ /*
+ * we started in an allocated area and reached a hole. req->bytes
+ * contains the length of the allocated area before the hole.
+ * stop further processing.
+ */
+ return -9000;
+ }
+ if (!req->exists && exists && offs > req->offs) {
+ /*
+ * we started in an unallocated area and hit the first allocated
+ * block. req->bytes must be set to the length of the unallocated area
+ * before the allocated area. stop further processing.
+ */
+ req->bytes = offs - req->offs;
+ return -9000;
+ }
+
+ /*
+ * assert that we catched all cases above and allocation state has not
+ * changed during callbacks.
+ */
+ assert(exists == req->exists || !req->bytes);
+ req->exists = exists;
+
+ /*
+ * assert that we either return an unallocated block or have got callbacks
+ * for all allocated blocks present.
+ */
+ assert(!req->exists || offs == req->offs + req->bytes);
+ req->bytes = offs + len - req->offs;
+
+ return 0;
+}
+
+static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t offset,
+ int64_t bytes, int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file)
+{
+ BDRVRBDState *s = bs->opaque;
+ int ret, r;
+ struct rbd_diff_req req = { .offs = offset };
+
+ assert(offset + bytes <= s->image_size);
+
+ /* default to all sectors allocated */
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+ if (map) {
+ *map = offset;
+ }
+ *pnum = bytes;
+
+ /* RBD image does not support fast-diff */
+ if (!(s->features & RBD_FEATURE_FAST_DIFF)) {
+ goto out;
+ }
+
+ r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
+ qemu_rbd_co_block_status_cb, &req);
+ if (r < 0 && r != -9000) {
+ goto out;
+ }
+ assert(req.bytes <= bytes);
+ if (!req.exists) {
+ if (r == 0 && !req.bytes) {
+ /*
+ * rbd_diff_iterate2 does not invoke callbacks for unallocated areas
+ * except for the case where an overlay has a hole where the parent
+ * has not. This here catches the case where no callback was
+ * invoked at all.
+ */
+ req.bytes = bytes;
+ }
+ ret &= ~BDRV_BLOCK_DATA;
+ ret |= BDRV_BLOCK_ZERO;
+ }
+ *pnum = req.bytes;
+
+out:
+ if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) {
+ *file = bs;
+ }
+ return ret;
+}
+
static int64_t qemu_rbd_getlength(BlockDriverState *bs)
{
BDRVRBDState *s = bs->opaque;
@@ -1494,6 +1612,7 @@ static BlockDriver bdrv_rbd = {
#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
.bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes,
#endif
+ .bdrv_co_block_status = qemu_rbd_co_block_status,
.bdrv_snapshot_create = qemu_rbd_snap_create,
.bdrv_snapshot_delete = qemu_rbd_snap_remove,
--
2.17.1
On Mon, Aug 09, 2021 at 03:41:36PM +0200, Peter Lieven wrote: Please, can you add a description? For example also describing what happens if RBD image does not support RBD_FEATURE_FAST_DIFF. >Signed-off-by: Peter Lieven <pl@kamp.de> >--- > block/rbd.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 119 insertions(+) > >diff --git a/block/rbd.c b/block/rbd.c >index dcf82b15b8..ef1eaa6af3 100644 >--- a/block/rbd.c >+++ b/block/rbd.c >@@ -88,6 +88,7 @@ typedef struct BDRVRBDState { > char *namespace; > uint64_t image_size; > uint64_t object_size; >+ uint64_t features; > } BDRVRBDState; > > typedef struct RBDTask { >@@ -983,6 +984,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, > s->image_size = info.size; > s->object_size = info.obj_size; > >+ r = rbd_get_features(s->image, &s->features); >+ if (r < 0) { >+ error_setg_errno(errp, -r, "error getting image features from %s", >+ s->image_name); >+ rbd_close(s->image); >+ goto failed_open; ^ You can use `failed_post_open` label here, so you can avoid to call rbd_close(). >+ } >+ > /* If we are using an rbd snapshot, we must be r/o, otherwise > * leave as-is */ > if (s->snap != NULL) { >@@ -1259,6 +1268,115 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs, > return spec_info; > } > >+typedef struct rbd_diff_req { >+ uint64_t offs; >+ uint64_t bytes; >+ int exists; >+} rbd_diff_req; >+ >+static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len, >+ int exists, void *opaque) >+{ >+ struct rbd_diff_req *req = opaque; >+ >+ assert(req->offs + req->bytes <= offs); >+ assert(offs >= req->offs + req->bytes); I think just one of the two asserts is enough, isn't that the same condition? >+ >+ if (req->exists && offs > req->offs + req->bytes) { >+ /* >+ * we started in an allocated area and jumped over an unallocated area, >+ * req->bytes contains the length of the allocated area before the >+ * unallocated area. stop further processing. >+ */ >+ return -9000; ^ What is this magical value? Please add a macro (with a comment) and also use it below in other places. >+ } >+ if (req->exists && !exists) { >+ /* >+ * we started in an allocated area and reached a hole. >req->bytes >+ * contains the length of the allocated area before the hole. >+ * stop further processing. >+ */ >+ return -9000; >+ } >+ if (!req->exists && exists && offs > req->offs) { >+ /* >+ * we started in an unallocated area and hit the first allocated >+ * block. req->bytes must be set to the length of the unallocated area >+ * before the allocated area. stop further processing. >+ */ >+ req->bytes = offs - req->offs; >+ return -9000; >+ } >+ >+ /* >+ * assert that we catched all cases above and allocation state has not >+ * changed during callbacks. >+ */ >+ assert(exists == req->exists || !req->bytes); >+ req->exists = exists; >+ >+ /* >+ * assert that we either return an unallocated block or have got callbacks >+ * for all allocated blocks present. >+ */ >+ assert(!req->exists || offs == req->offs + req->bytes); >+ req->bytes = offs + len - req->offs; >+ >+ return 0; >+} >+ >+static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, >+ bool want_zero, int64_t offset, >+ int64_t bytes, int64_t *pnum, >+ int64_t *map, >+ BlockDriverState **file) >+{ >+ BDRVRBDState *s = bs->opaque; >+ int ret, r; >+ struct rbd_diff_req req = { .offs = offset }; >+ >+ assert(offset + bytes <= s->image_size); >+ >+ /* default to all sectors allocated */ >+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; >+ if (map) { >+ *map = offset; >+ } >+ *pnum = bytes; >+ >+ /* RBD image does not support fast-diff */ >+ if (!(s->features & RBD_FEATURE_FAST_DIFF)) { >+ goto out; >+ } >+ >+ r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, >+ qemu_rbd_co_block_status_cb, &req); >+ if (r < 0 && r != -9000) { >+ goto out; >+ } >+ assert(req.bytes <= bytes); >+ if (!req.exists) { >+ if (r == 0 && !req.bytes) { >+ /* >+ * rbd_diff_iterate2 does not invoke callbacks for >unallocated areas >+ * except for the case where an overlay has a hole where >the parent >+ * has not. This here catches the case where no callback >was >+ * invoked at all. >+ */ >+ req.bytes = bytes; >+ } >+ ret &= ~BDRV_BLOCK_DATA; >+ ret |= BDRV_BLOCK_ZERO; >+ } >+ *pnum = req.bytes; >+ >+out: >+ if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) { Can ret be zero at this point? Doesn't BDRV_BLOCK_OFFSET_VALID always stay set? Thanks, Stefano
Am 10.08.21 um 10:51 schrieb Stefano Garzarella: > On Mon, Aug 09, 2021 at 03:41:36PM +0200, Peter Lieven wrote: > > Please, can you add a description? > For example also describing what happens if RBD image does not support RBD_FEATURE_FAST_DIFF. Sure. > >> Signed-off-by: Peter Lieven <pl@kamp.de> >> --- >> block/rbd.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 119 insertions(+) >> >> diff --git a/block/rbd.c b/block/rbd.c >> index dcf82b15b8..ef1eaa6af3 100644 >> --- a/block/rbd.c >> +++ b/block/rbd.c >> @@ -88,6 +88,7 @@ typedef struct BDRVRBDState { >> char *namespace; >> uint64_t image_size; >> uint64_t object_size; >> + uint64_t features; >> } BDRVRBDState; >> >> typedef struct RBDTask { >> @@ -983,6 +984,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, >> s->image_size = info.size; >> s->object_size = info.obj_size; >> >> + r = rbd_get_features(s->image, &s->features); >> + if (r < 0) { >> + error_setg_errno(errp, -r, "error getting image features from %s", >> + s->image_name); >> + rbd_close(s->image); >> + goto failed_open; > ^ > You can use `failed_post_open` label here, so you can avoid to call rbd_close(). Bad me, I developed this patch in a Qemu version where failed_post_open wasn't present... > >> + } >> + >> /* If we are using an rbd snapshot, we must be r/o, otherwise >> * leave as-is */ >> if (s->snap != NULL) { >> @@ -1259,6 +1268,115 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs, >> return spec_info; >> } >> >> +typedef struct rbd_diff_req { >> + uint64_t offs; >> + uint64_t bytes; >> + int exists; >> +} rbd_diff_req; >> + >> +static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len, >> + int exists, void *opaque) >> +{ >> + struct rbd_diff_req *req = opaque; >> + >> + assert(req->offs + req->bytes <= offs); >> + assert(offs >= req->offs + req->bytes); > > I think just one of the two asserts is enough, isn't that the same condition? Right. > >> + >> + if (req->exists && offs > req->offs + req->bytes) { >> + /* >> + * we started in an allocated area and jumped over an unallocated area, >> + * req->bytes contains the length of the allocated area before the >> + * unallocated area. stop further processing. >> + */ >> + return -9000; > ^ > What is this magical value? > > Please add a macro (with a comment) and also use it below in other places. Will add in V2. > >> + } >> + if (req->exists && !exists) { >> + /* >> + * we started in an allocated area and reached a hole. req->bytes >> + * contains the length of the allocated area before the hole. >> + * stop further processing. >> + */ >> + return -9000; >> + } >> + if (!req->exists && exists && offs > req->offs) { >> + /* >> + * we started in an unallocated area and hit the first allocated >> + * block. req->bytes must be set to the length of the unallocated area >> + * before the allocated area. stop further processing. >> + */ >> + req->bytes = offs - req->offs; >> + return -9000; >> + } >> + >> + /* >> + * assert that we catched all cases above and allocation state has not >> + * changed during callbacks. >> + */ >> + assert(exists == req->exists || !req->bytes); >> + req->exists = exists; >> + >> + /* >> + * assert that we either return an unallocated block or have got callbacks >> + * for all allocated blocks present. >> + */ >> + assert(!req->exists || offs == req->offs + req->bytes); >> + req->bytes = offs + len - req->offs; >> + >> + return 0; >> +} >> + >> +static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, >> + bool want_zero, int64_t offset, >> + int64_t bytes, int64_t *pnum, >> + int64_t *map, >> + BlockDriverState **file) >> +{ >> + BDRVRBDState *s = bs->opaque; >> + int ret, r; >> + struct rbd_diff_req req = { .offs = offset }; >> + >> + assert(offset + bytes <= s->image_size); >> + >> + /* default to all sectors allocated */ >> + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; >> + if (map) { >> + *map = offset; >> + } >> + *pnum = bytes; >> + >> + /* RBD image does not support fast-diff */ >> + if (!(s->features & RBD_FEATURE_FAST_DIFF)) { >> + goto out; >> + } >> + >> + r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, >> + qemu_rbd_co_block_status_cb, &req); >> + if (r < 0 && r != -9000) { >> + goto out; >> + } >> + assert(req.bytes <= bytes); >> + if (!req.exists) { >> + if (r == 0 && !req.bytes) { >> + /* >> + * rbd_diff_iterate2 does not invoke callbacks for unallocated areas >> + * except for the case where an overlay has a hole where the parent >> + * has not. This here catches the case where no callback was >> + * invoked at all. >> + */ >> + req.bytes = bytes; >> + } >> + ret &= ~BDRV_BLOCK_DATA; >> + ret |= BDRV_BLOCK_ZERO; >> + } >> + *pnum = req.bytes; >> + >> +out: >> + if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) { > > Can ret be zero at this point? > Doesn't BDRV_BLOCK_OFFSET_VALID always stay set? Right, I decided to declare any area as allocated if rbd_diff_iterate2 would fail so this can't happen. I will send a V2 shortly. Peter
© 2016 - 2024 Red Hat, Inc.