[PATCH v11 02/13] copy-on-read: add filter append/drop functions

Andrey Shinkevich via posted 13 patches 5 years, 1 month ago
Maintainers: Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>, John Snow <jsnow@redhat.com>, Fam Zheng <fam@euphon.net>, Kevin Wolf <kwolf@redhat.com>, Max Reitz <mreitz@redhat.com>
There is a newer version of this series
[PATCH v11 02/13] copy-on-read: add filter append/drop functions
Posted by Andrey Shinkevich via 5 years, 1 month ago
Provide API for the COR-filter insertion/removal.
Also, drop the filter child permissions for an inactive state when the
filter node is being removed.

Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/copy-on-read.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 block/copy-on-read.h | 35 +++++++++++++++++++++
 2 files changed, 123 insertions(+)
 create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..bcccf0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,21 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+    bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
+    BDRVStateCOR *state = bs->opaque;
+
     bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
                                BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
                                false, errp);
@@ -42,6 +52,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, int flags,
         ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
             bs->file->bs->supported_zero_flags);
 
+    state->active = true;
+
+    /*
+     * We don't need to call bdrv_child_refresh_perms() now as the permissions
+     * will be updated later when the filter node gets its parent.
+     */
+
     return 0;
 }
 
@@ -57,6 +74,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild *c,
                            uint64_t perm, uint64_t shared,
                            uint64_t *nperm, uint64_t *nshared)
 {
+    BDRVStateCOR *s = bs->opaque;
+
+    if (!s->active) {
+        /*
+         * While the filter is being removed
+         */
+        *nperm = 0;
+        *nshared = BLK_PERM_ALL;
+        return;
+    }
+
     *nperm = perm & PERM_PASSTHROUGH;
     *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +163,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool locked)
 
 static BlockDriver bdrv_copy_on_read = {
     .format_name                        = "copy-on-read",
+    .instance_size                      = sizeof(BDRVStateCOR),
 
     .bdrv_open                          = cor_open,
     .bdrv_child_perm                    = cor_child_perm,
@@ -159,4 +188,63 @@ static void bdrv_copy_on_read_init(void)
     bdrv_register(&bdrv_copy_on_read);
 }
 
+
+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+                                         QDict *node_options,
+                                         int flags, Error **errp)
+{
+    BlockDriverState *cor_filter_bs;
+    Error *local_err = NULL;
+
+    cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
+    if (cor_filter_bs == NULL) {
+        error_prepend(errp, "Could not create COR-filter node: ");
+        return NULL;
+    }
+
+    if (!qdict_get_try_str(node_options, "node-name")) {
+        cor_filter_bs->implicit = true;
+    }
+
+    bdrv_drained_begin(bs);
+    bdrv_replace_node(bs, cor_filter_bs, &local_err);
+    bdrv_drained_end(bs);
+
+    if (local_err) {
+        bdrv_unref(cor_filter_bs);
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+
+    return cor_filter_bs;
+}
+
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+    BdrvChild *child;
+    BlockDriverState *bs;
+    BDRVStateCOR *s = cor_filter_bs->opaque;
+
+    child = bdrv_filter_child(cor_filter_bs);
+    if (!child) {
+        return;
+    }
+    bs = child->bs;
+
+    /* Retain the BDS until we complete the graph change. */
+    bdrv_ref(bs);
+    /* Hold a guest back from writing while permissions are being reset. */
+    bdrv_drained_begin(bs);
+    /* Drop permissions before the graph change. */
+    s->active = false;
+    bdrv_child_refresh_perms(cor_filter_bs, child, &error_abort);
+    bdrv_replace_node(cor_filter_bs, bs, &error_abort);
+
+    bdrv_drained_end(bs);
+    bdrv_unref(bs);
+    bdrv_unref(cor_filter_bs);
+}
+
+
 block_init(bdrv_copy_on_read_init);
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
new file mode 100644
index 0000000..d6f2422
--- /dev/null
+++ b/block/copy-on-read.h
@@ -0,0 +1,35 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * The filter driver performs Copy-On-Read (COR) operations
+ *
+ * Copyright (c) 2018-2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *   Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef BLOCK_COPY_ON_READ
+#define BLOCK_COPY_ON_READ
+
+#include "block/block_int.h"
+
+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+                                         QDict *node_options,
+                                         int flags, Error **errp);
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs);
+
+#endif /* BLOCK_COPY_ON_READ */
-- 
1.8.3.1


Re: [PATCH v11 02/13] copy-on-read: add filter append/drop functions
Posted by Max Reitz 5 years, 1 month ago
On 12.10.20 19:43, Andrey Shinkevich wrote:
> Provide API for the COR-filter insertion/removal.
> Also, drop the filter child permissions for an inactive state when the
> filter node is being removed.
> 
> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  block/copy-on-read.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  block/copy-on-read.h | 35 +++++++++++++++++++++
>  2 files changed, 123 insertions(+)
>  create mode 100644 block/copy-on-read.h
> 
> diff --git a/block/copy-on-read.c b/block/copy-on-read.c
> index cb03e0f..bcccf0f 100644
> --- a/block/copy-on-read.c
> +++ b/block/copy-on-read.c

[...]

> @@ -159,4 +188,63 @@ static void bdrv_copy_on_read_init(void)
>      bdrv_register(&bdrv_copy_on_read);
>  }
>  
> +
> +BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
> +                                         QDict *node_options,
> +                                         int flags, Error **errp)

I had hoped you could make this a generic block layer function. :(

(Because it really is rather generic)

*shrug*

Reviewed-by: Max Reitz <mreitz@redhat.com>

> +{
> +    BlockDriverState *cor_filter_bs;
> +    Error *local_err = NULL;
> +
> +    cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
> +    if (cor_filter_bs == NULL) {
> +        error_prepend(errp, "Could not create COR-filter node: ");
> +        return NULL;
> +    }
> +
> +    if (!qdict_get_try_str(node_options, "node-name")) {
> +        cor_filter_bs->implicit = true;
> +    }
> +
> +    bdrv_drained_begin(bs);
> +    bdrv_replace_node(bs, cor_filter_bs, &local_err);
> +    bdrv_drained_end(bs);
> +
> +    if (local_err) {
> +        bdrv_unref(cor_filter_bs);
> +        error_propagate(errp, local_err);
> +        return NULL;
> +    }
> +
> +    return cor_filter_bs;
> +}

Re: [PATCH v11 02/13] copy-on-read: add filter append/drop functions
Posted by Andrey Shinkevich 5 years, 1 month ago
On 14.10.2020 13:44, Max Reitz wrote:
> On 12.10.20 19:43, Andrey Shinkevich wrote:
>> Provide API for the COR-filter insertion/removal.
>> Also, drop the filter child permissions for an inactive state when the
>> filter node is being removed.
>>
>> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
>> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>>   block/copy-on-read.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   block/copy-on-read.h | 35 +++++++++++++++++++++
>>   2 files changed, 123 insertions(+)
>>   create mode 100644 block/copy-on-read.h
>>
>> diff --git a/block/copy-on-read.c b/block/copy-on-read.c
>> index cb03e0f..bcccf0f 100644
>> --- a/block/copy-on-read.c
>> +++ b/block/copy-on-read.c
> 
> [...]
> 
>> @@ -159,4 +188,63 @@ static void bdrv_copy_on_read_init(void)
>>       bdrv_register(&bdrv_copy_on_read);
>>   }
>>   
>> +
>> +BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
>> +                                         QDict *node_options,
>> +                                         int flags, Error **errp)
> 
> I had hoped you could make this a generic block layer function. :(
> 
> (Because it really is rather generic)
> 
> *shrug*

Actually, I did (and still can do) that for the 'append node' function 
only but not for the 'drop node' one so far...

diff --git a/block.c b/block.c
index 11ab55f..f41e876 100644
--- a/block.c
+++ b/block.c
@@ -4669,6 +4669,55 @@ static void bdrv_delete(BlockDriverState *bs)
      g_free(bs);
  }

+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict 
*node_options,
+                                   int flags, Error **errp)
+{
+    BlockDriverState *new_node_bs;
+    Error *local_err = NULL;
+
+    new_node_bs =  bdrv_open(NULL, NULL, node_options, flags, errp);
+    if (new_node_bs == NULL) {
+        error_prepend(errp, "Could not create node: ");
+        return NULL;
+    }
+
+    bdrv_drained_begin(bs);
+    bdrv_replace_node(bs, new_node_bs, &local_err);
+    bdrv_drained_end(bs);
+
+    if (local_err) {
+        bdrv_unref(new_node_bs);
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+
+    return new_node_bs;
+}
+
+void bdrv_remove_node(BlockDriverState *bs)
+{
+    BdrvChild *child;
+    BlockDriverState *inferior_bs;
+
+    child = bdrv_filter_or_cow_child(bs);
+    if (!child) {
+        return;
+    }
+    inferior_bs = child->bs;
+
+    /* Retain the BDS until we complete the graph change. */
+    bdrv_ref(inferior_bs);
+    /* Hold a guest back from writing while permissions are being reset. */
+    bdrv_drained_begin(inferior_bs);
+    /* Refresh permissions before the graph change. */
+    bdrv_child_refresh_perms(bs, child, &error_abort);
+    bdrv_replace_node(bs, inferior_bs, &error_abort);
+
+    bdrv_drained_end(inferior_bs);
+    bdrv_unref(inferior_bs);
+    bdrv_unref(bs);
+}

So, it is an intermediate solution in this patch of the series. I am 
going to make both functions generic once Vladimir overhauls the QEMU 
permission update system. Otherwise, the COR-filter node cannot be 
removed from the backing chain gracefully.

Thank you for your r-b. If the next version comes, I can move the 
'append node' function only to the generic layer.

Andrey

> 
> Reviewed-by: Max Reitz <mreitz@redhat.com>
> 
>> +{
>> +    BlockDriverState *cor_filter_bs;
>> +    Error *local_err = NULL;
>> +
>> +    cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
>> +    if (cor_filter_bs == NULL) {
>> +        error_prepend(errp, "Could not create COR-filter node: ");
>> +        return NULL;
>> +    }
>> +
>> +    if (!qdict_get_try_str(node_options, "node-name")) {
>> +        cor_filter_bs->implicit = true;
>> +    }
>> +
>> +    bdrv_drained_begin(bs);
>> +    bdrv_replace_node(bs, cor_filter_bs, &local_err);
>> +    bdrv_drained_end(bs);
>> +
>> +    if (local_err) {
>> +        bdrv_unref(cor_filter_bs);
>> +        error_propagate(errp, local_err);
>> +        return NULL;
>> +    }
>> +
>> +    return cor_filter_bs;
>> +}
> 

Re: [PATCH v11 02/13] copy-on-read: add filter append/drop functions
Posted by Max Reitz 5 years, 1 month ago
On 14.10.20 16:28, Andrey Shinkevich wrote:
> On 14.10.2020 13:44, Max Reitz wrote:
>> On 12.10.20 19:43, Andrey Shinkevich wrote:
>>> Provide API for the COR-filter insertion/removal.
>>> Also, drop the filter child permissions for an inactive state when the
>>> filter node is being removed.
>>>
>>> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
>>> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>> ---
>>>   block/copy-on-read.c | 88
>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>   block/copy-on-read.h | 35 +++++++++++++++++++++
>>>   2 files changed, 123 insertions(+)
>>>   create mode 100644 block/copy-on-read.h
>>>
>>> diff --git a/block/copy-on-read.c b/block/copy-on-read.c
>>> index cb03e0f..bcccf0f 100644
>>> --- a/block/copy-on-read.c
>>> +++ b/block/copy-on-read.c
>>
>> [...]
>>
>>> @@ -159,4 +188,63 @@ static void bdrv_copy_on_read_init(void)
>>>       bdrv_register(&bdrv_copy_on_read);
>>>   }
>>>   +
>>> +BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
>>> +                                         QDict *node_options,
>>> +                                         int flags, Error **errp)
>>
>> I had hoped you could make this a generic block layer function. :(
>>
>> (Because it really is rather generic)
>>
>> *shrug*
> 
> Actually, I did (and still can do) that for the 'append node' function
> only but not for the 'drop node' one so far...

Ah, yeah.

> diff --git a/block.c b/block.c
> index 11ab55f..f41e876 100644
> --- a/block.c
> +++ b/block.c
> @@ -4669,6 +4669,55 @@ static void bdrv_delete(BlockDriverState *bs)
>      g_free(bs);
>  }
> 
> +BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict
> *node_options,
> +                                   int flags, Error **errp)
> +{
> +    BlockDriverState *new_node_bs;
> +    Error *local_err = NULL;
> +
> +    new_node_bs =  bdrv_open(NULL, NULL, node_options, flags, errp);
> +    if (new_node_bs == NULL) {
> +        error_prepend(errp, "Could not create node: ");
> +        return NULL;
> +    }
> +
> +    bdrv_drained_begin(bs);
> +    bdrv_replace_node(bs, new_node_bs, &local_err);
> +    bdrv_drained_end(bs);
> +
> +    if (local_err) {
> +        bdrv_unref(new_node_bs);
> +        error_propagate(errp, local_err);
> +        return NULL;
> +    }
> +
> +    return new_node_bs;
> +}
> +
> +void bdrv_remove_node(BlockDriverState *bs)
> +{
> +    BdrvChild *child;
> +    BlockDriverState *inferior_bs;
> +
> +    child = bdrv_filter_or_cow_child(bs);
> +    if (!child) {
> +        return;
> +    }
> +    inferior_bs = child->bs;
> +
> +    /* Retain the BDS until we complete the graph change. */
> +    bdrv_ref(inferior_bs);
> +    /* Hold a guest back from writing while permissions are being
> reset. */
> +    bdrv_drained_begin(inferior_bs);
> +    /* Refresh permissions before the graph change. */
> +    bdrv_child_refresh_perms(bs, child, &error_abort);
> +    bdrv_replace_node(bs, inferior_bs, &error_abort);
> +
> +    bdrv_drained_end(inferior_bs);
> +    bdrv_unref(inferior_bs);
> +    bdrv_unref(bs);
> +}
> 
> So, it is an intermediate solution in this patch of the series. I am
> going to make both functions generic once Vladimir overhauls the QEMU
> permission update system. Otherwise, the COR-filter node cannot be
> removed from the backing chain gracefully.

True.

> Thank you for your r-b. If the next version comes, I can move the
> 'append node' function only to the generic layer.

Thanks!  Even if you decide against it, I’m happy as long as there are
plans to perhaps make it generic at some point.

(I’m just afraid this function might be useful in some other case, too,
and we forget that it exists here already, and then end up
reimplementing it.)

Max