[Qemu-devel] [PATCH] block/fleecing-filter: new filter driver for fleecing

Vladimir Sementsov-Ogievskiy posted 1 patch 5 years, 10 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20180621154850.23354-1-vsementsov@virtuozzo.com
Test checkpatch passed
Test docker-mingw@fedora passed
Test docker-quick@centos7 passed
Test s390x passed
qapi/block-core.json    |  9 ++++---
block/fleecing-filter.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++
block/Makefile.objs     |  1 +
3 files changed, 79 insertions(+), 3 deletions(-)
create mode 100644 block/fleecing-filter.c
[Qemu-devel] [PATCH] block/fleecing-filter: new filter driver for fleecing
Posted by Vladimir Sementsov-Ogievskiy 5 years, 10 months ago
We need to synchronize backup job with reading from fleecing image
like it was done in block/replication.c.

Otherwise, the following situation is theoretically possible:

1. client start reading
2. client understand, that there is no corresponding cluster in
   fleecing image
3. client is going to read from backing file (i.e. active image)
4. guest writes to active image
5. this write is stopped by backup(sync=none) and cluster is copied to
   fleecing image
6. guest write continues...
7. and client reads _new_ (or partly new) date from active image

So, this fleecing-filter should be above fleecing image, the whole
picture of fleecing looks like this:

    +-------+           +------------+
    |       |           |            |
    | guest |           | NBD client +<------+
    |       |           |            |       |
    ++-----++           +------------+       |only read
     |     ^                                 |
     | IO  |                                 |
     v     |                           +-----+------+
    ++-----+---------+                 |            |
    |                |                 |  internal  |
    |  active image  +----+            | NBD server |
    |                |    |            |            |
    +-+--------------+    |backup      +-+----------+
      ^                   |sync=none     ^
      |backing            |              |only read
      |                   |              |
    +-+--------------+    |       +------+----------+
    |                |    |       |                 |
    | fleecing image +<---+       | fleecing filter |
    |                |            |                 |
    +--------+-------+            +-----+-----------+
             ^                          |
             |                          |
             +--------------------------+
                       file

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 qapi/block-core.json    |  9 ++++---
 block/fleecing-filter.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++
 block/Makefile.objs     |  1 +
 3 files changed, 79 insertions(+), 3 deletions(-)
 create mode 100644 block/fleecing-filter.c

diff --git a/qapi/block-core.json b/qapi/block-core.json
index dfaa050651..b21bc3b693 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2543,7 +2543,8 @@
             'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
             'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
             'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh',
-            'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
+            'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs',
+            'fleecing-filter' ] }
 
 ##
 # @BlockdevOptionsFile:
@@ -3607,7 +3608,8 @@
       'vmdk':       'BlockdevOptionsGenericCOWFormat',
       'vpc':        'BlockdevOptionsGenericFormat',
       'vvfat':      'BlockdevOptionsVVFAT',
-      'vxhs':       'BlockdevOptionsVxHS'
+      'vxhs':       'BlockdevOptionsVxHS',
+      'fleecing-filter': 'BlockdevOptionsGenericFormat'
   } }
 
 ##
@@ -4135,7 +4137,8 @@
       'vmdk':           'BlockdevCreateNotSupported',
       'vpc':            'BlockdevCreateOptionsVpc',
       'vvfat':          'BlockdevCreateNotSupported',
-      'vxhs':           'BlockdevCreateNotSupported'
+      'vxhs':           'BlockdevCreateNotSupported',
+      'fleecing-filter': 'BlockdevCreateNotSupported'
   } }
 
 ##
diff --git a/block/fleecing-filter.c b/block/fleecing-filter.c
new file mode 100644
index 0000000000..60bac9c0b4
--- /dev/null
+++ b/block/fleecing-filter.c
@@ -0,0 +1,72 @@
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "block/blockjob.h"
+#include "block/block_int.h"
+#include "block/block_backup.h"
+
+static int64_t fleecing_getlength(BlockDriverState *bs)
+{
+    return bdrv_getlength(bs->file->bs);
+}
+
+static coroutine_fn int fleecing_co_preadv(BlockDriverState *bs,
+                                           uint64_t offset, uint64_t bytes,
+                                           QEMUIOVector *qiov, int flags)
+{
+    int ret;
+    BlockJob *job = bs->file->bs->backing->bs->job;
+    CowRequest req;
+
+    backup_wait_for_overlapping_requests(job, offset, bytes);
+    backup_cow_request_begin(&req, job, offset, bytes);
+
+    ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+
+    backup_cow_request_end(&req);
+
+    return ret;
+}
+
+static coroutine_fn int fleecing_co_pwritev(BlockDriverState *bs,
+                                            uint64_t offset, uint64_t bytes,
+                                            QEMUIOVector *qiov, int flags)
+{
+    return -EINVAL;
+}
+
+static bool fleecing_recurse_is_first_non_filter(BlockDriverState *bs,
+                                                 BlockDriverState *candidate)
+{
+    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
+}
+
+static int fleecing_open(BlockDriverState *bs, QDict *options,
+                         int flags, Error **errp)
+{
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false,
+                               errp);
+
+    return bs->file ? 0 : -EINVAL;
+}
+
+BlockDriver bdrv_fleecing_filter = {
+    .format_name = "fleecing-filter",
+    .protocol_name = "fleecing-filter",
+    .instance_size = 0,
+
+    .bdrv_open = fleecing_open,
+
+    .bdrv_getlength = fleecing_getlength,
+    .bdrv_co_preadv = fleecing_co_preadv,
+    .bdrv_co_pwritev = fleecing_co_pwritev,
+
+    .is_filter = true,
+    .bdrv_recurse_is_first_non_filter = fleecing_recurse_is_first_non_filter,
+};
+
+static void bdrv_fleecing_init(void)
+{
+    bdrv_register(&bdrv_fleecing_filter);
+}
+
+block_init(bdrv_fleecing_init);
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 899bfb5e2c..aa0a6dd971 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -27,6 +27,7 @@ block-obj-y += write-threshold.o
 block-obj-y += backup.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
 block-obj-y += throttle.o copy-on-read.o
+block-obj-y += fleecing-filter.o
 
 block-obj-y += crypto.o
 
-- 
2.11.1


Re: [Qemu-devel] [PATCH] block/fleecing-filter: new filter driver for fleecing
Posted by John Snow 5 years, 9 months ago

On 06/21/2018 11:48 AM, Vladimir Sementsov-Ogievskiy wrote:
> We need to synchronize backup job with reading from fleecing image
> like it was done in block/replication.c.
> 
> Otherwise, the following situation is theoretically possible:
> 
> 1. client start reading
> 2. client understand, that there is no corresponding cluster in
>    fleecing image
> 3. client is going to read from backing file (i.e. active image)
> 4. guest writes to active image
> 5. this write is stopped by backup(sync=none) and cluster is copied to
>    fleecing image
> 6. guest write continues...
> 7. and client reads _new_ (or partly new) date from active image
> 
> So, this fleecing-filter should be above fleecing image, the whole
> picture of fleecing looks like this:
> 
>     +-------+           +------------+
>     |       |           |            |
>     | guest |           | NBD client +<------+
>     |       |           |            |       |
>     ++-----++           +------------+       |only read
>      |     ^                                 |
>      | IO  |                                 |
>      v     |                           +-----+------+
>     ++-----+---------+                 |            |
>     |                |                 |  internal  |
>     |  active image  +----+            | NBD server |
>     |                |    |            |            |
>     +-+--------------+    |backup      +-+----------+
>       ^                   |sync=none     ^
>       |backing            |              |only read
>       |                   |              |
>     +-+--------------+    |       +------+----------+
>     |                |    |       |                 |
>     | fleecing image +<---+       | fleecing filter |
>     |                |            |                 |
>     +--------+-------+            +-----+-----------+
>              ^                          |
>              |                          |
>              +--------------------------+
>                        file
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  qapi/block-core.json    |  9 ++++---
>  block/fleecing-filter.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++
>  block/Makefile.objs     |  1 +
>  3 files changed, 79 insertions(+), 3 deletions(-)
>  create mode 100644 block/fleecing-filter.c
> 
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index dfaa050651..b21bc3b693 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -2543,7 +2543,8 @@
>              'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
>              'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
>              'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh',
> -            'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
> +            'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs',
> +            'fleecing-filter' ] }
>  
>  ##
>  # @BlockdevOptionsFile:
> @@ -3607,7 +3608,8 @@
>        'vmdk':       'BlockdevOptionsGenericCOWFormat',
>        'vpc':        'BlockdevOptionsGenericFormat',
>        'vvfat':      'BlockdevOptionsVVFAT',
> -      'vxhs':       'BlockdevOptionsVxHS'
> +      'vxhs':       'BlockdevOptionsVxHS',
> +      'fleecing-filter': 'BlockdevOptionsGenericFormat'
>    } }
>  
>  ##
> @@ -4135,7 +4137,8 @@
>        'vmdk':           'BlockdevCreateNotSupported',
>        'vpc':            'BlockdevCreateOptionsVpc',
>        'vvfat':          'BlockdevCreateNotSupported',
> -      'vxhs':           'BlockdevCreateNotSupported'
> +      'vxhs':           'BlockdevCreateNotSupported',
> +      'fleecing-filter': 'BlockdevCreateNotSupported'
>    } }
>  
>  ##
> diff --git a/block/fleecing-filter.c b/block/fleecing-filter.c
> new file mode 100644
> index 0000000000..60bac9c0b4
> --- /dev/null
> +++ b/block/fleecing-filter.c
> @@ -0,0 +1,72 @@
> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +#include "block/blockjob.h"
> +#include "block/block_int.h"
> +#include "block/block_backup.h"
> +
> +static int64_t fleecing_getlength(BlockDriverState *bs)
> +{
> +    return bdrv_getlength(bs->file->bs);
> +}
> +
> +static coroutine_fn int fleecing_co_preadv(BlockDriverState *bs,
> +                                           uint64_t offset, uint64_t bytes,
> +                                           QEMUIOVector *qiov, int flags)
> +{
> +    int ret;
> +    BlockJob *job = bs->file->bs->backing->bs->job;
> +    CowRequest req;
> +
> +    backup_wait_for_overlapping_requests(job, offset, bytes);
> +    backup_cow_request_begin(&req, job, offset, bytes);
> +
> +    ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
> +
> +    backup_cow_request_end(&req);
> +
> +    return ret;
> +}
> +
> +static coroutine_fn int fleecing_co_pwritev(BlockDriverState *bs,
> +                                            uint64_t offset, uint64_t bytes,
> +                                            QEMUIOVector *qiov, int flags)
> +{
> +    return -EINVAL;
> +}
> +
> +static bool fleecing_recurse_is_first_non_filter(BlockDriverState *bs,
> +                                                 BlockDriverState *candidate)
> +{
> +    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
> +}
> +
> +static int fleecing_open(BlockDriverState *bs, QDict *options,
> +                         int flags, Error **errp)
> +{
> +    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false,
> +                               errp);
> +
> +    return bs->file ? 0 : -EINVAL;
> +}
> +
> +BlockDriver bdrv_fleecing_filter = {
> +    .format_name = "fleecing-filter",
> +    .protocol_name = "fleecing-filter",
> +    .instance_size = 0,
> +
> +    .bdrv_open = fleecing_open,
> +
> +    .bdrv_getlength = fleecing_getlength,
> +    .bdrv_co_preadv = fleecing_co_preadv,
> +    .bdrv_co_pwritev = fleecing_co_pwritev,
> +
> +    .is_filter = true,
> +    .bdrv_recurse_is_first_non_filter = fleecing_recurse_is_first_non_filter,
> +};
> +
> +static void bdrv_fleecing_init(void)
> +{
> +    bdrv_register(&bdrv_fleecing_filter);
> +}
> +
> +block_init(bdrv_fleecing_init);
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 899bfb5e2c..aa0a6dd971 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -27,6 +27,7 @@ block-obj-y += write-threshold.o
>  block-obj-y += backup.o
>  block-obj-$(CONFIG_REPLICATION) += replication.o
>  block-obj-y += throttle.o copy-on-read.o
> +block-obj-y += fleecing-filter.o
>  
>  block-obj-y += crypto.o
>  
> 

Dropping this one from review queue in favor of the fleecing discussion
on "[Qemu-devel] [PATCH v2 0/3] image fleecing"

--js