[PATCH 1/2] block/rbd: support selected key-value-pairs via QAPI

Fiona Ebner posted 2 patches 7 months ago
Maintainers: Ilya Dryomov <idryomov@gmail.com>, Peter Lieven <pl@dlhnet.de>, Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>
[PATCH 1/2] block/rbd: support selected key-value-pairs via QAPI
Posted by Fiona Ebner 7 months ago
Currently, most Ceph configuration options are not exposed via QAPI.
While it is possible to specify a dedicated Ceph configuration file,
specialized options are often only required for a selection of images
on the RBD storage, not all of them. To avoid the need to generate a
dedicated Ceph configuration file for each image (or for each required
combination of options), support a selection of key-value pairs via
QAPI.

Initially, this is just 'rbd_cache_policy'. For example, this is
useful with small images used as a pflash for EFI variables. Setting
the 'rbd_cache_policy' to 'writeback' yields a substantial improvement
there [0].

The function qemu_rbd_extract_key_value_pairs() was copied/adapted
from the existing qemu_rbd_extract_encryption_create_options().

[0]: https://bugzilla.proxmox.com/show_bug.cgi?id=3329#c9

Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
 block/rbd.c          | 73 ++++++++++++++++++++++++++++++++++++++++++++
 qapi/block-core.json | 37 ++++++++++++++++++++++
 2 files changed, 110 insertions(+)

diff --git a/block/rbd.c b/block/rbd.c
index 7446e66659..2924f23093 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -298,6 +298,27 @@ static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts,
     return 0;
 }
 
+static int qemu_rbd_set_key_value_pairs(rados_t cluster,
+                                        RbdKeyValuePairs *key_value_pairs,
+                                        Error **errp)
+{
+    if (!key_value_pairs) {
+        return 0;
+    }
+
+    if (key_value_pairs->has_rbd_cache_policy) {
+        RbdCachePolicy value = key_value_pairs->rbd_cache_policy;
+        int r = rados_conf_set(cluster, "rbd_cache_policy",
+                               RbdCachePolicy_str(value));
+        if (r < 0) {
+            error_setg_errno(errp, -r, "could not set 'rbd_cache_policy'");
+            return -EINVAL;
+        }
+    }
+
+    return 0;
+}
+
 static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
                                  Error **errp)
 {
@@ -791,6 +812,44 @@ exit:
     return ret;
 }
 
+static int qemu_rbd_extract_key_value_pairs(
+        QemuOpts *opts,
+        RbdKeyValuePairs **key_value_pairs,
+        Error **errp)
+{
+    QDict *opts_qdict;
+    QDict *key_value_pairs_qdict;
+    Visitor *v;
+    int ret = 0;
+
+    opts_qdict = qemu_opts_to_qdict(opts, NULL);
+    qdict_extract_subqdict(opts_qdict, &key_value_pairs_qdict,
+                           "key-value-pairs.");
+    qobject_unref(opts_qdict);
+    if (!qdict_size(key_value_pairs_qdict)) {
+        *key_value_pairs = NULL;
+        goto exit;
+    }
+
+    /* Convert options into a QAPI object */
+    v = qobject_input_visitor_new_flat_confused(key_value_pairs_qdict, errp);
+    if (!v) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    visit_type_RbdKeyValuePairs(v, NULL, key_value_pairs, errp);
+    visit_free(v);
+    if (!*key_value_pairs) {
+        ret = -EINVAL;
+        goto exit;
+    }
+
+exit:
+    qobject_unref(key_value_pairs_qdict);
+    return ret;
+}
+
 static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
                                                 const char *filename,
                                                 QemuOpts *opts,
@@ -800,6 +859,7 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
     BlockdevCreateOptionsRbd *rbd_opts;
     BlockdevOptionsRbd *loc;
     RbdEncryptionCreateOptions *encrypt = NULL;
+    RbdKeyValuePairs *key_value_pairs = NULL;
     Error *local_err = NULL;
     const char *keypairs, *password_secret;
     QDict *options = NULL;
@@ -848,6 +908,13 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
     loc->image       = g_strdup(qdict_get_try_str(options, "image"));
     keypairs         = qdict_get_try_str(options, "=keyvalue-pairs");
 
+    /* These are the key-value pairs coming in via the QAPI. */
+    ret = qemu_rbd_extract_key_value_pairs(opts, &key_value_pairs, errp);
+    if (ret < 0) {
+        goto exit;
+    }
+    loc->key_value_pairs = key_value_pairs;
+
     ret = qemu_rbd_do_create(create_options, keypairs, password_secret, errp);
     if (ret < 0) {
         goto exit;
@@ -937,6 +1004,12 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
         goto failed_shutdown;
     }
 
+    /* For the key-value pairs coming via QAPI. */
+    r = qemu_rbd_set_key_value_pairs(*cluster, opts->key_value_pairs, errp);
+    if (r < 0) {
+        goto failed_shutdown;
+    }
+
     if (mon_host) {
         r = rados_conf_set(*cluster, "mon_host", mon_host);
         if (r < 0) {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 91c70e24a7..4666765e66 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4301,6 +4301,39 @@
   'data': { 'luks': 'RbdEncryptionCreateOptionsLUKS',
             'luks2': 'RbdEncryptionCreateOptionsLUKS2' } }
 
+##
+# @RbdCachePolicy:
+#
+# An enumeration of values for the 'rbd_cache_policy' Ceph
+# configuration setting.  See the Ceph documentation for details.
+#
+# @writearound: cachable writes return immediately, reads are not
+#     served from the cache.
+#
+# @writeback: cachable writes return immediately, reads are served
+#     from the cache.
+#
+# @writethrough: writes return only when the data is on disk for all
+#     replicas, reads are served from the cache.
+#
+# Since 10.1
+##
+{ 'enum' : 'RbdCachePolicy',
+  'data' : [ 'writearound', 'writeback', 'writethrough' ] }
+
+
+##
+# @RbdKeyValuePairs:
+#
+# Key-value pairs for Ceph configuration.
+#
+# @rbd-cache-policy: Ceph configuration option 'rbd_cache_policy'.
+#
+# Since 10.1
+##
+{ 'struct': 'RbdKeyValuePairs',
+  'data': { '*rbd-cache-policy': 'RbdCachePolicy' } }
+
 ##
 # @BlockdevOptionsRbd:
 #
@@ -4327,6 +4360,9 @@
 #     authentication.  This maps to Ceph configuration option "key".
 #     (Since 3.0)
 #
+# @key-value-pairs: Key-value pairs for additional Ceph configuraton.
+#     (Since 10.1)
+#
 # @server: Monitor host address and port.  This maps to the "mon_host"
 #     Ceph option.
 #
@@ -4342,6 +4378,7 @@
             '*user': 'str',
             '*auth-client-required': ['RbdAuthMode'],
             '*key-secret': 'str',
+            '*key-value-pairs' : 'RbdKeyValuePairs',
             '*server': ['InetSocketAddressBase'] } }
 
 ##
-- 
2.39.5
Re: [PATCH 1/2] block/rbd: support selected key-value-pairs via QAPI
Posted by Daniel P. Berrangé 6 months ago
On Thu, May 15, 2025 at 01:29:07PM +0200, Fiona Ebner wrote:
> Currently, most Ceph configuration options are not exposed via QAPI.
> While it is possible to specify a dedicated Ceph configuration file,
> specialized options are often only required for a selection of images
> on the RBD storage, not all of them. To avoid the need to generate a
> dedicated Ceph configuration file for each image (or for each required
> combination of options), support a selection of key-value pairs via
> QAPI.
> 
> Initially, this is just 'rbd_cache_policy'. For example, this is
> useful with small images used as a pflash for EFI variables. Setting
> the 'rbd_cache_policy' to 'writeback' yields a substantial improvement
> there [0].
> 
> The function qemu_rbd_extract_key_value_pairs() was copied/adapted
> from the existing qemu_rbd_extract_encryption_create_options().
> 
> [0]: https://bugzilla.proxmox.com/show_bug.cgi?id=3329#c9
> 
> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
> ---
>  block/rbd.c          | 73 ++++++++++++++++++++++++++++++++++++++++++++
>  qapi/block-core.json | 37 ++++++++++++++++++++++
>  2 files changed, 110 insertions(+)


> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 91c70e24a7..4666765e66 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -4301,6 +4301,39 @@
>    'data': { 'luks': 'RbdEncryptionCreateOptionsLUKS',
>              'luks2': 'RbdEncryptionCreateOptionsLUKS2' } }
>  
> +##
> +# @RbdCachePolicy:
> +#
> +# An enumeration of values for the 'rbd_cache_policy' Ceph
> +# configuration setting.  See the Ceph documentation for details.
> +#
> +# @writearound: cachable writes return immediately, reads are not
> +#     served from the cache.
> +#
> +# @writeback: cachable writes return immediately, reads are served
> +#     from the cache.
> +#
> +# @writethrough: writes return only when the data is on disk for all
> +#     replicas, reads are served from the cache.
> +#
> +# Since 10.1
> +##
> +{ 'enum' : 'RbdCachePolicy',
> +  'data' : [ 'writearound', 'writeback', 'writethrough' ] }
> +
> +
> +##
> +# @RbdKeyValuePairs:
> +#
> +# Key-value pairs for Ceph configuration.
> +#
> +# @rbd-cache-policy: Ceph configuration option 'rbd_cache_policy'.
> +#
> +# Since 10.1
> +##
> +{ 'struct': 'RbdKeyValuePairs',
> +  'data': { '*rbd-cache-policy': 'RbdCachePolicy' } }
> +
>  ##
>  # @BlockdevOptionsRbd:
>  #
> @@ -4327,6 +4360,9 @@
>  #     authentication.  This maps to Ceph configuration option "key".
>  #     (Since 3.0)
>  #
> +# @key-value-pairs: Key-value pairs for additional Ceph configuraton.
> +#     (Since 10.1)
> +#
>  # @server: Monitor host address and port.  This maps to the "mon_host"
>  #     Ceph option.
>  #
> @@ -4342,6 +4378,7 @@
>              '*user': 'str',
>              '*auth-client-required': ['RbdAuthMode'],
>              '*key-secret': 'str',
> +            '*key-value-pairs' : 'RbdKeyValuePairs',

I'm not seeing any point in this 'RbdKeyValuePairs' struct. Why isn't
the 'rbd-cache-policy' field just directly part of the BlockdevOptionsRbd
struct like all the other options are ?

Also, 'rbd-' as a prefix in the field name is redundant when this is
already in an RBD specific struct.

>              '*server': ['InetSocketAddressBase'] } }
>  

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Re: [PATCH 1/2] block/rbd: support selected key-value-pairs via QAPI
Posted by Ilya Dryomov 6 months ago
On Thu, May 15, 2025 at 1:29 PM Fiona Ebner <f.ebner@proxmox.com> wrote:
>
> Currently, most Ceph configuration options are not exposed via QAPI.
> While it is possible to specify a dedicated Ceph configuration file,
> specialized options are often only required for a selection of images
> on the RBD storage, not all of them. To avoid the need to generate a
> dedicated Ceph configuration file for each image (or for each required
> combination of options), support a selection of key-value pairs via
> QAPI.
>
> Initially, this is just 'rbd_cache_policy'. For example, this is
> useful with small images used as a pflash for EFI variables. Setting
> the 'rbd_cache_policy' to 'writeback' yields a substantial improvement
> there [0].
>
> The function qemu_rbd_extract_key_value_pairs() was copied/adapted
> from the existing qemu_rbd_extract_encryption_create_options().
>
> [0]: https://bugzilla.proxmox.com/show_bug.cgi?id=3329#c9
>
> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
> ---
>  block/rbd.c          | 73 ++++++++++++++++++++++++++++++++++++++++++++
>  qapi/block-core.json | 37 ++++++++++++++++++++++
>  2 files changed, 110 insertions(+)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index 7446e66659..2924f23093 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -298,6 +298,27 @@ static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts,
>      return 0;
>  }
>
> +static int qemu_rbd_set_key_value_pairs(rados_t cluster,
> +                                        RbdKeyValuePairs *key_value_pairs,
> +                                        Error **errp)
> +{
> +    if (!key_value_pairs) {
> +        return 0;
> +    }
> +
> +    if (key_value_pairs->has_rbd_cache_policy) {
> +        RbdCachePolicy value = key_value_pairs->rbd_cache_policy;
> +        int r = rados_conf_set(cluster, "rbd_cache_policy",
> +                               RbdCachePolicy_str(value));
> +        if (r < 0) {
> +            error_setg_errno(errp, -r, "could not set 'rbd_cache_policy'");
> +            return -EINVAL;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
>                                   Error **errp)
>  {
> @@ -791,6 +812,44 @@ exit:
>      return ret;
>  }
>
> +static int qemu_rbd_extract_key_value_pairs(
> +        QemuOpts *opts,
> +        RbdKeyValuePairs **key_value_pairs,
> +        Error **errp)
> +{
> +    QDict *opts_qdict;
> +    QDict *key_value_pairs_qdict;
> +    Visitor *v;
> +    int ret = 0;
> +
> +    opts_qdict = qemu_opts_to_qdict(opts, NULL);
> +    qdict_extract_subqdict(opts_qdict, &key_value_pairs_qdict,
> +                           "key-value-pairs.");
> +    qobject_unref(opts_qdict);
> +    if (!qdict_size(key_value_pairs_qdict)) {
> +        *key_value_pairs = NULL;
> +        goto exit;
> +    }
> +
> +    /* Convert options into a QAPI object */
> +    v = qobject_input_visitor_new_flat_confused(key_value_pairs_qdict, errp);
> +    if (!v) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    visit_type_RbdKeyValuePairs(v, NULL, key_value_pairs, errp);
> +    visit_free(v);
> +    if (!*key_value_pairs) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +exit:
> +    qobject_unref(key_value_pairs_qdict);
> +    return ret;
> +}
> +
>  static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
>                                                  const char *filename,
>                                                  QemuOpts *opts,
> @@ -800,6 +859,7 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
>      BlockdevCreateOptionsRbd *rbd_opts;
>      BlockdevOptionsRbd *loc;
>      RbdEncryptionCreateOptions *encrypt = NULL;
> +    RbdKeyValuePairs *key_value_pairs = NULL;
>      Error *local_err = NULL;
>      const char *keypairs, *password_secret;
>      QDict *options = NULL;
> @@ -848,6 +908,13 @@ static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv,
>      loc->image       = g_strdup(qdict_get_try_str(options, "image"));
>      keypairs         = qdict_get_try_str(options, "=keyvalue-pairs");
>
> +    /* These are the key-value pairs coming in via the QAPI. */
> +    ret = qemu_rbd_extract_key_value_pairs(opts, &key_value_pairs, errp);
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +    loc->key_value_pairs = key_value_pairs;
> +
>      ret = qemu_rbd_do_create(create_options, keypairs, password_secret, errp);
>      if (ret < 0) {
>          goto exit;
> @@ -937,6 +1004,12 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
>          goto failed_shutdown;
>      }
>
> +    /* For the key-value pairs coming via QAPI. */
> +    r = qemu_rbd_set_key_value_pairs(*cluster, opts->key_value_pairs, errp);
> +    if (r < 0) {
> +        goto failed_shutdown;
> +    }
> +
>      if (mon_host) {
>          r = rados_conf_set(*cluster, "mon_host", mon_host);
>          if (r < 0) {
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 91c70e24a7..4666765e66 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -4301,6 +4301,39 @@
>    'data': { 'luks': 'RbdEncryptionCreateOptionsLUKS',
>              'luks2': 'RbdEncryptionCreateOptionsLUKS2' } }
>
> +##
> +# @RbdCachePolicy:
> +#
> +# An enumeration of values for the 'rbd_cache_policy' Ceph
> +# configuration setting.  See the Ceph documentation for details.
> +#
> +# @writearound: cachable writes return immediately, reads are not
> +#     served from the cache.
> +#
> +# @writeback: cachable writes return immediately, reads are served
> +#     from the cache.
> +#
> +# @writethrough: writes return only when the data is on disk for all
> +#     replicas, reads are served from the cache.
> +#
> +# Since 10.1
> +##
> +{ 'enum' : 'RbdCachePolicy',
> +  'data' : [ 'writearound', 'writeback', 'writethrough' ] }
> +
> +
> +##
> +# @RbdKeyValuePairs:
> +#
> +# Key-value pairs for Ceph configuration.
> +#
> +# @rbd-cache-policy: Ceph configuration option 'rbd_cache_policy'.
> +#
> +# Since 10.1
> +##
> +{ 'struct': 'RbdKeyValuePairs',
> +  'data': { '*rbd-cache-policy': 'RbdCachePolicy' } }

Hi Fiona,

I'm not following the rationale for introducing RbdKeyValuePairs
struct.  If there is a desire to expose rbd_cache_policy option this
way, couldn't it just be added to BlockdevOptionsRbd struct?  The
existing auth_client_required option has a very similar pattern.

If exposing rbd_cache_policy option, rbd_cache option (enabled/disabled
bool) should likely be exposed as well.  It doesn't make much sense to
provide a built-in way to adjust the cache policy without also providing
a built-in way to disable the cache entirely.  Then the question of what
would be better from the QAPI perspective arises: a bool option to map
to Ceph as close as possible or perhaps an additional 'disabled' value
in RbdCachePolicy enum?  And regardless of that, the need to remember
to update QEMU if a new cache policy is ever added because even though
these are just strings, QEMU is going to be validating them...

> +
>  ##
>  # @BlockdevOptionsRbd:
>  #
> @@ -4327,6 +4360,9 @@
>  #     authentication.  This maps to Ceph configuration option "key".
>  #     (Since 3.0)
>  #
> +# @key-value-pairs: Key-value pairs for additional Ceph configuraton.
> +#     (Since 10.1)
> +#
>  # @server: Monitor host address and port.  This maps to the "mon_host"
>  #     Ceph option.
>  #
> @@ -4342,6 +4378,7 @@
>              '*user': 'str',
>              '*auth-client-required': ['RbdAuthMode'],
>              '*key-secret': 'str',
> +            '*key-value-pairs' : 'RbdKeyValuePairs',

To side-step all of the above, have you considered implementing
a straightforward passthrough to Ceph instead?  Something like

  '*key-value-pairs': ['RbdKeyValuePair']

where RbdKeyValuePair is just a pair arbitrary strings (and
key-value-pairs is thus an optional list of those).  rados_conf_set()
would be called just the same but the user would be able to override
any Ceph option they wish, not just a few that we thought of here.

Thanks,

                Ilya
Re: [PATCH 1/2] block/rbd: support selected key-value-pairs via QAPI
Posted by Fiona Ebner 6 months ago
Am 16.06.25 um 11:41 schrieb Daniel P. Berrangé:
> On Thu, May 15, 2025 at 01:29:07PM +0200, Fiona Ebner wrote:
>> @@ -4327,6 +4360,9 @@
>>  #     authentication.  This maps to Ceph configuration option "key".
>>  #     (Since 3.0)
>>  #
>> +# @key-value-pairs: Key-value pairs for additional Ceph configuraton.
>> +#     (Since 10.1)
>> +#
>>  # @server: Monitor host address and port.  This maps to the "mon_host"
>>  #     Ceph option.
>>  #
>> @@ -4342,6 +4378,7 @@
>>              '*user': 'str',
>>              '*auth-client-required': ['RbdAuthMode'],
>>              '*key-secret': 'str',
>> +            '*key-value-pairs' : 'RbdKeyValuePairs',
>
> I'm not seeing any point in this 'RbdKeyValuePairs' struct. Why isn't
> the 'rbd-cache-policy' field just directly part of the BlockdevOptionsRbd
> struct like all the other options are ?
>
> Also, 'rbd-' as a prefix in the field name is redundant when this is
> already in an RBD specific struct.

Am 16.06.25 um 11:25 schrieb Ilya Dryomov:
> Hi Fiona,
> 
> I'm not following the rationale for introducing RbdKeyValuePairs
> struct.  If there is a desire to expose rbd_cache_policy option this
> way, couldn't it just be added to BlockdevOptionsRbd struct?  The
> existing auth_client_required option has a very similar pattern.

Hi Ilya and Daniel,

my intention was to not "pollute" the top-level struct with rather
uncommon options, but collect them separately and also make it explicit
that those are the key-value pairs passed along to Ceph.

> If exposing rbd_cache_policy option, rbd_cache option (enabled/disabled
> bool) should likely be exposed as well.  It doesn't make much sense to
> provide a built-in way to adjust the cache policy without also providing
> a built-in way to disable the cache entirely.  Then the question of what
> would be better from the QAPI perspective arises: a bool option to map
> to Ceph as close as possible or perhaps an additional 'disabled' value
> in RbdCachePolicy enum?  And regardless of that, the need to remember
> to update QEMU if a new cache policy is ever added because even though
> these are just strings, QEMU is going to be validating them...

Good point! If going with the key-value-pairs approach, it would be
nicer to go with a direct mapping IMHO. If adding it to the top-level,
I'd be in favor of the 'disabled' value.

Best Regards,
Fiona


Re: [PATCH 1/2] block/rbd: support selected key-value-pairs via QAPI
Posted by Daniel P. Berrangé 6 months ago
On Mon, Jun 16, 2025 at 02:29:56PM +0200, Fiona Ebner wrote:
> Am 16.06.25 um 11:41 schrieb Daniel P. Berrangé:
> > On Thu, May 15, 2025 at 01:29:07PM +0200, Fiona Ebner wrote:
> >> @@ -4327,6 +4360,9 @@
> >>  #     authentication.  This maps to Ceph configuration option "key".
> >>  #     (Since 3.0)
> >>  #
> >> +# @key-value-pairs: Key-value pairs for additional Ceph configuraton.
> >> +#     (Since 10.1)
> >> +#
> >>  # @server: Monitor host address and port.  This maps to the "mon_host"
> >>  #     Ceph option.
> >>  #
> >> @@ -4342,6 +4378,7 @@
> >>              '*user': 'str',
> >>              '*auth-client-required': ['RbdAuthMode'],
> >>              '*key-secret': 'str',
> >> +            '*key-value-pairs' : 'RbdKeyValuePairs',
> >
> > I'm not seeing any point in this 'RbdKeyValuePairs' struct. Why isn't
> > the 'rbd-cache-policy' field just directly part of the BlockdevOptionsRbd
> > struct like all the other options are ?
> >
> > Also, 'rbd-' as a prefix in the field name is redundant when this is
> > already in an RBD specific struct.
> 
> Am 16.06.25 um 11:25 schrieb Ilya Dryomov:
> > Hi Fiona,
> > 
> > I'm not following the rationale for introducing RbdKeyValuePairs
> > struct.  If there is a desire to expose rbd_cache_policy option this
> > way, couldn't it just be added to BlockdevOptionsRbd struct?  The
> > existing auth_client_required option has a very similar pattern.
> 
> Hi Ilya and Daniel,
> 
> my intention was to not "pollute" the top-level struct with rather
> uncommon options, but collect them separately and also make it explicit
> that those are the key-value pairs passed along to Ceph.

If these are useful things that users of ceph need to be able to
control for their QEMU VMs, then this is not "pollution".

> > If exposing rbd_cache_policy option, rbd_cache option (enabled/disabled
> > bool) should likely be exposed as well.  It doesn't make much sense to
> > provide a built-in way to adjust the cache policy without also providing
> > a built-in way to disable the cache entirely.  Then the question of what
> > would be better from the QAPI perspective arises: a bool option to map
> > to Ceph as close as possible or perhaps an additional 'disabled' value
> > in RbdCachePolicy enum?  And regardless of that, the need to remember
> > to update QEMU if a new cache policy is ever added because even though
> > these are just strings, QEMU is going to be validating them...
> 
> Good point! If going with the key-value-pairs approach, it would be
> nicer to go with a direct mapping IMHO. If adding it to the top-level,
> I'd be in favor of the 'disabled' value.

If this is stuff that users expect/need to be able to configure for
their VMs, that would bias towards formal modelling in QAPI, not plain
passthrough.

NB, any features intended to be configurable via libvirt would need
to be formalling modelled, as libvirt won't introduce a dependency
on things that QEMU declares "unstable" in QAPI.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|


Re: [PATCH 1/2] block/rbd: support selected key-value-pairs via QAPI
Posted by Daniel P. Berrangé 6 months ago
On Mon, Jun 16, 2025 at 11:25:54AM +0200, Ilya Dryomov wrote:
> On Thu, May 15, 2025 at 1:29 PM Fiona Ebner <f.ebner@proxmox.com> wrote:
> >
> > Currently, most Ceph configuration options are not exposed via QAPI.
> > While it is possible to specify a dedicated Ceph configuration file,
> > specialized options are often only required for a selection of images
> > on the RBD storage, not all of them. To avoid the need to generate a
> > dedicated Ceph configuration file for each image (or for each required
> > combination of options), support a selection of key-value pairs via
> > QAPI.
> >
> > Initially, this is just 'rbd_cache_policy'. For example, this is
> > useful with small images used as a pflash for EFI variables. Setting
> > the 'rbd_cache_policy' to 'writeback' yields a substantial improvement
> > there [0].
> >
> > The function qemu_rbd_extract_key_value_pairs() was copied/adapted
> > from the existing qemu_rbd_extract_encryption_create_options().
> >
> > [0]: https://bugzilla.proxmox.com/show_bug.cgi?id=3329#c9
> >
> > Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>

snip

> >  ##
> >  # @BlockdevOptionsRbd:
> >  #
> > @@ -4327,6 +4360,9 @@
> >  #     authentication.  This maps to Ceph configuration option "key".
> >  #     (Since 3.0)
> >  #
> > +# @key-value-pairs: Key-value pairs for additional Ceph configuraton.
> > +#     (Since 10.1)
> > +#
> >  # @server: Monitor host address and port.  This maps to the "mon_host"
> >  #     Ceph option.
> >  #
> > @@ -4342,6 +4378,7 @@
> >              '*user': 'str',
> >              '*auth-client-required': ['RbdAuthMode'],
> >              '*key-secret': 'str',
> > +            '*key-value-pairs' : 'RbdKeyValuePairs',
> 
> To side-step all of the above, have you considered implementing
> a straightforward passthrough to Ceph instead?  Something like
> 
>   '*key-value-pairs': ['RbdKeyValuePair']
> 
> where RbdKeyValuePair is just a pair arbitrary strings (and
> key-value-pairs is thus an optional list of those).  rados_conf_set()
> would be called just the same but the user would be able to override
> any Ceph option they wish, not just a few that we thought of here.

Passing through arbitrary key/value pairs as strings is essentially
abdicating our design responsibility in QAPI. enums would no longer
be introspectable. Integers / booleans would require abnormal formatting
by clients. API stability / deprecation promises can no longer be made.
and more besides.

Given that limitation, if we did go the string pairs route, I would
expect it to be marked as "unstable" in the QAPI schema, so apps have
a suitable warning NOT to rely on this.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|


Re: [PATCH 1/2] block/rbd: support selected key-value-pairs via QAPI
Posted by Ilya Dryomov 6 months ago
On Mon, Jun 16, 2025 at 11:52 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Mon, Jun 16, 2025 at 11:25:54AM +0200, Ilya Dryomov wrote:
> > On Thu, May 15, 2025 at 1:29 PM Fiona Ebner <f.ebner@proxmox.com> wrote:
> > >
> > > Currently, most Ceph configuration options are not exposed via QAPI.
> > > While it is possible to specify a dedicated Ceph configuration file,
> > > specialized options are often only required for a selection of images
> > > on the RBD storage, not all of them. To avoid the need to generate a
> > > dedicated Ceph configuration file for each image (or for each required
> > > combination of options), support a selection of key-value pairs via
> > > QAPI.
> > >
> > > Initially, this is just 'rbd_cache_policy'. For example, this is
> > > useful with small images used as a pflash for EFI variables. Setting
> > > the 'rbd_cache_policy' to 'writeback' yields a substantial improvement
> > > there [0].
> > >
> > > The function qemu_rbd_extract_key_value_pairs() was copied/adapted
> > > from the existing qemu_rbd_extract_encryption_create_options().
> > >
> > > [0]: https://bugzilla.proxmox.com/show_bug.cgi?id=3329#c9
> > >
> > > Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
>
> snip
>
> > >  ##
> > >  # @BlockdevOptionsRbd:
> > >  #
> > > @@ -4327,6 +4360,9 @@
> > >  #     authentication.  This maps to Ceph configuration option "key".
> > >  #     (Since 3.0)
> > >  #
> > > +# @key-value-pairs: Key-value pairs for additional Ceph configuraton.
> > > +#     (Since 10.1)
> > > +#
> > >  # @server: Monitor host address and port.  This maps to the "mon_host"
> > >  #     Ceph option.
> > >  #
> > > @@ -4342,6 +4378,7 @@
> > >              '*user': 'str',
> > >              '*auth-client-required': ['RbdAuthMode'],
> > >              '*key-secret': 'str',
> > > +            '*key-value-pairs' : 'RbdKeyValuePairs',
> >
> > To side-step all of the above, have you considered implementing
> > a straightforward passthrough to Ceph instead?  Something like
> >
> >   '*key-value-pairs': ['RbdKeyValuePair']
> >
> > where RbdKeyValuePair is just a pair arbitrary strings (and
> > key-value-pairs is thus an optional list of those).  rados_conf_set()
> > would be called just the same but the user would be able to override
> > any Ceph option they wish, not just a few that we thought of here.
>
> Passing through arbitrary key/value pairs as strings is essentially
> abdicating our design responsibility in QAPI. enums would no longer
> be introspectable. Integers / booleans would require abnormal formatting
> by clients. API stability / deprecation promises can no longer be made.
> and more besides.
>
> Given that limitation, if we did go the string pairs route, I would
> expect it to be marked as "unstable" in the QAPI schema, so apps have
> a suitable warning NOT to rely on this.

This sounds sensible to me.  We can continue exposing the most common
Ceph options through a proper QAPI schema but add key-value-pairs as an
alternative low-level route for those who want to avoid dealing with
physical configuration files.

Thanks,

                Ilya
Re: [PATCH 1/2] block/rbd: support selected key-value-pairs via QAPI
Posted by Fiona Ebner 6 months ago
Am 16.06.25 um 12:28 schrieb Ilya Dryomov:
> On Mon, Jun 16, 2025 at 11:52 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
>> On Mon, Jun 16, 2025 at 11:25:54AM +0200, Ilya Dryomov wrote:
>>> On Thu, May 15, 2025 at 1:29 PM Fiona Ebner <f.ebner@proxmox.com> wrote:
>>>>  ##
>>>>  # @BlockdevOptionsRbd:
>>>>  #
>>>> @@ -4327,6 +4360,9 @@
>>>>  #     authentication.  This maps to Ceph configuration option "key".
>>>>  #     (Since 3.0)
>>>>  #
>>>> +# @key-value-pairs: Key-value pairs for additional Ceph configuraton.
>>>> +#     (Since 10.1)
>>>> +#
>>>>  # @server: Monitor host address and port.  This maps to the "mon_host"
>>>>  #     Ceph option.
>>>>  #
>>>> @@ -4342,6 +4378,7 @@
>>>>              '*user': 'str',
>>>>              '*auth-client-required': ['RbdAuthMode'],
>>>>              '*key-secret': 'str',
>>>> +            '*key-value-pairs' : 'RbdKeyValuePairs',
>>>
>>> To side-step all of the above, have you considered implementing
>>> a straightforward passthrough to Ceph instead?  Something like
>>>
>>>   '*key-value-pairs': ['RbdKeyValuePair']
>>>
>>> where RbdKeyValuePair is just a pair arbitrary strings (and
>>> key-value-pairs is thus an optional list of those).  rados_conf_set()
>>> would be called just the same but the user would be able to override
>>> any Ceph option they wish, not just a few that we thought of here.
>>
>> Passing through arbitrary key/value pairs as strings is essentially
>> abdicating our design responsibility in QAPI. enums would no longer
>> be introspectable. Integers / booleans would require abnormal formatting
>> by clients. API stability / deprecation promises can no longer be made.
>> and more besides.

Yes, and I also was under the impression that there is no desire to
re-introduce arbitrary key-value pairs with QMP/blockdev options.

>> Given that limitation, if we did go the string pairs route, I would
>> expect it to be marked as "unstable" in the QAPI schema, so apps have
>> a suitable warning NOT to rely on this.
> 
> This sounds sensible to me.  We can continue exposing the most common
> Ceph options through a proper QAPI schema but add key-value-pairs as an
> alternative low-level route for those who want to avoid dealing with
> physical configuration files.

As written in the commit message, the cache option should not apply to
all volumes, so using configuration files is rather impractical there.

I'd prefer defining the cache option(s) explicitly, and have people add
additional key-value pairs they require explicitly going forward. But if
you really don't want me to, I can still go with the unstable, arbitrary
strings approach instead.

Best Regards,
Fiona


Re: [PATCH 1/2] block/rbd: support selected key-value-pairs via QAPI
Posted by Ilya Dryomov 6 months ago
On Mon, Jun 16, 2025 at 2:38 PM Fiona Ebner <f.ebner@proxmox.com> wrote:
>
> Am 16.06.25 um 12:28 schrieb Ilya Dryomov:
> > On Mon, Jun 16, 2025 at 11:52 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
> >> On Mon, Jun 16, 2025 at 11:25:54AM +0200, Ilya Dryomov wrote:
> >>> On Thu, May 15, 2025 at 1:29 PM Fiona Ebner <f.ebner@proxmox.com> wrote:
> >>>>  ##
> >>>>  # @BlockdevOptionsRbd:
> >>>>  #
> >>>> @@ -4327,6 +4360,9 @@
> >>>>  #     authentication.  This maps to Ceph configuration option "key".
> >>>>  #     (Since 3.0)
> >>>>  #
> >>>> +# @key-value-pairs: Key-value pairs for additional Ceph configuraton.
> >>>> +#     (Since 10.1)
> >>>> +#
> >>>>  # @server: Monitor host address and port.  This maps to the "mon_host"
> >>>>  #     Ceph option.
> >>>>  #
> >>>> @@ -4342,6 +4378,7 @@
> >>>>              '*user': 'str',
> >>>>              '*auth-client-required': ['RbdAuthMode'],
> >>>>              '*key-secret': 'str',
> >>>> +            '*key-value-pairs' : 'RbdKeyValuePairs',
> >>>
> >>> To side-step all of the above, have you considered implementing
> >>> a straightforward passthrough to Ceph instead?  Something like
> >>>
> >>>   '*key-value-pairs': ['RbdKeyValuePair']
> >>>
> >>> where RbdKeyValuePair is just a pair arbitrary strings (and
> >>> key-value-pairs is thus an optional list of those).  rados_conf_set()
> >>> would be called just the same but the user would be able to override
> >>> any Ceph option they wish, not just a few that we thought of here.
> >>
> >> Passing through arbitrary key/value pairs as strings is essentially
> >> abdicating our design responsibility in QAPI. enums would no longer
> >> be introspectable. Integers / booleans would require abnormal formatting
> >> by clients. API stability / deprecation promises can no longer be made.
> >> and more besides.
>
> Yes, and I also was under the impression that there is no desire to
> re-introduce arbitrary key-value pairs with QMP/blockdev options.

Hi Fiona,

What do you mean by re-introduce?

>
> >> Given that limitation, if we did go the string pairs route, I would
> >> expect it to be marked as "unstable" in the QAPI schema, so apps have
> >> a suitable warning NOT to rely on this.
> >
> > This sounds sensible to me.  We can continue exposing the most common
> > Ceph options through a proper QAPI schema but add key-value-pairs as an
> > alternative low-level route for those who want to avoid dealing with
> > physical configuration files.
>
> As written in the commit message, the cache option should not apply to
> all volumes, so using configuration files is rather impractical there.
>
> I'd prefer defining the cache option(s) explicitly, and have people add
> additional key-value pairs they require explicitly going forward. But if
> you really don't want me to, I can still go with the unstable, arbitrary
> strings approach instead.

The RBD cache policy option would definitely count as one of the most
common, so I don't have an objection to it being added in an explicit
form.  I'm also fine with the "disabled" enum value that you expressed
a preference for in another email.

Thanks,

                Ilya
Re: [PATCH 1/2] block/rbd: support selected key-value-pairs via QAPI
Posted by Ilya Dryomov 6 months ago
On Thu, Jun 19, 2025 at 8:38 PM Ilya Dryomov <idryomov@gmail.com> wrote:
>
> On Mon, Jun 16, 2025 at 2:38 PM Fiona Ebner <f.ebner@proxmox.com> wrote:
> >
> > Am 16.06.25 um 12:28 schrieb Ilya Dryomov:
> > > On Mon, Jun 16, 2025 at 11:52 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
> > >> On Mon, Jun 16, 2025 at 11:25:54AM +0200, Ilya Dryomov wrote:
> > >>> On Thu, May 15, 2025 at 1:29 PM Fiona Ebner <f.ebner@proxmox.com> wrote:
> > >>>>  ##
> > >>>>  # @BlockdevOptionsRbd:
> > >>>>  #
> > >>>> @@ -4327,6 +4360,9 @@
> > >>>>  #     authentication.  This maps to Ceph configuration option "key".
> > >>>>  #     (Since 3.0)
> > >>>>  #
> > >>>> +# @key-value-pairs: Key-value pairs for additional Ceph configuraton.
> > >>>> +#     (Since 10.1)
> > >>>> +#
> > >>>>  # @server: Monitor host address and port.  This maps to the "mon_host"
> > >>>>  #     Ceph option.
> > >>>>  #
> > >>>> @@ -4342,6 +4378,7 @@
> > >>>>              '*user': 'str',
> > >>>>              '*auth-client-required': ['RbdAuthMode'],
> > >>>>              '*key-secret': 'str',
> > >>>> +            '*key-value-pairs' : 'RbdKeyValuePairs',
> > >>>
> > >>> To side-step all of the above, have you considered implementing
> > >>> a straightforward passthrough to Ceph instead?  Something like
> > >>>
> > >>>   '*key-value-pairs': ['RbdKeyValuePair']
> > >>>
> > >>> where RbdKeyValuePair is just a pair arbitrary strings (and
> > >>> key-value-pairs is thus an optional list of those).  rados_conf_set()
> > >>> would be called just the same but the user would be able to override
> > >>> any Ceph option they wish, not just a few that we thought of here.
> > >>
> > >> Passing through arbitrary key/value pairs as strings is essentially
> > >> abdicating our design responsibility in QAPI. enums would no longer
> > >> be introspectable. Integers / booleans would require abnormal formatting
> > >> by clients. API stability / deprecation promises can no longer be made.
> > >> and more besides.
> >
> > Yes, and I also was under the impression that there is no desire to
> > re-introduce arbitrary key-value pairs with QMP/blockdev options.
>
> Hi Fiona,
>
> What do you mean by re-introduce?
>
> >
> > >> Given that limitation, if we did go the string pairs route, I would
> > >> expect it to be marked as "unstable" in the QAPI schema, so apps have
> > >> a suitable warning NOT to rely on this.
> > >
> > > This sounds sensible to me.  We can continue exposing the most common
> > > Ceph options through a proper QAPI schema but add key-value-pairs as an
> > > alternative low-level route for those who want to avoid dealing with
> > > physical configuration files.
> >
> > As written in the commit message, the cache option should not apply to
> > all volumes, so using configuration files is rather impractical there.
> >
> > I'd prefer defining the cache option(s) explicitly, and have people add
> > additional key-value pairs they require explicitly going forward. But if
> > you really don't want me to, I can still go with the unstable, arbitrary
> > strings approach instead.
>
> The RBD cache policy option would definitely count as one of the most
> common, so I don't have an objection to it being added in an explicit
> form.  I'm also fine with the "disabled" enum value that you expressed
> a preference for in another email.

The QEMU block layer-wide "cache" option is kind of in the way though:
if it's set to "off"/"none" or the more specific "cache.direct" option
is set to "on", we disable the RBD cache.  So there is an existing way
to control that, but it's at another level and QEMU cache modes aren't
distinguished (i.e. there is no mapping to RBD which means that one can
have "cache=writeback" set in QEMU but still get e.g. "writethrough"
RBD cache policy come from the ceph.conf file).  An extra enum value
for disabling the RBD cache might muddy the waters further.

Another thing that comes to mind is that if you need to control the
cache policy (or any other RBD option) on a per-image basis as opposed
to per-user basis, you can employ image-level configuration overrides
on the RBD side -- see "rbd config image get/set/ls/rm" commands.

Thanks,

                Ilya
Re: [PATCH 1/2] block/rbd: support selected key-value-pairs via QAPI
Posted by Fiona Ebner 5 months, 4 weeks ago
Am 19.06.25 um 23:20 schrieb Ilya Dryomov:
> On Thu, Jun 19, 2025 at 8:38 PM Ilya Dryomov <idryomov@gmail.com> wrote:
>>
>> On Mon, Jun 16, 2025 at 2:38 PM Fiona Ebner <f.ebner@proxmox.com> wrote:
>>>
>>> Am 16.06.25 um 12:28 schrieb Ilya Dryomov:
>>>> On Mon, Jun 16, 2025 at 11:52 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
>>>>> On Mon, Jun 16, 2025 at 11:25:54AM +0200, Ilya Dryomov wrote:
>>>>>> On Thu, May 15, 2025 at 1:29 PM Fiona Ebner <f.ebner@proxmox.com> wrote:
>>>>>>>  ##
>>>>>>>  # @BlockdevOptionsRbd:
>>>>>>>  #
>>>>>>> @@ -4327,6 +4360,9 @@
>>>>>>>  #     authentication.  This maps to Ceph configuration option "key".
>>>>>>>  #     (Since 3.0)
>>>>>>>  #
>>>>>>> +# @key-value-pairs: Key-value pairs for additional Ceph configuraton.
>>>>>>> +#     (Since 10.1)
>>>>>>> +#
>>>>>>>  # @server: Monitor host address and port.  This maps to the "mon_host"
>>>>>>>  #     Ceph option.
>>>>>>>  #
>>>>>>> @@ -4342,6 +4378,7 @@
>>>>>>>              '*user': 'str',
>>>>>>>              '*auth-client-required': ['RbdAuthMode'],
>>>>>>>              '*key-secret': 'str',
>>>>>>> +            '*key-value-pairs' : 'RbdKeyValuePairs',
>>>>>>
>>>>>> To side-step all of the above, have you considered implementing
>>>>>> a straightforward passthrough to Ceph instead?  Something like
>>>>>>
>>>>>>   '*key-value-pairs': ['RbdKeyValuePair']
>>>>>>
>>>>>> where RbdKeyValuePair is just a pair arbitrary strings (and
>>>>>> key-value-pairs is thus an optional list of those).  rados_conf_set()
>>>>>> would be called just the same but the user would be able to override
>>>>>> any Ceph option they wish, not just a few that we thought of here.
>>>>>
>>>>> Passing through arbitrary key/value pairs as strings is essentially
>>>>> abdicating our design responsibility in QAPI. enums would no longer
>>>>> be introspectable. Integers / booleans would require abnormal formatting
>>>>> by clients. API stability / deprecation promises can no longer be made.
>>>>> and more besides.
>>>
>>> Yes, and I also was under the impression that there is no desire to
>>> re-introduce arbitrary key-value pairs with QMP/blockdev options.
>>
>> Hi Fiona,
>>
>> What do you mean by re-introduce?

Sorry, should've stated this better. It is possible to specify arbitrary
key-value pairs with '-drive', and by "re-introduce" I mean allowing the
same for '-blockdev'.

>>>>> Given that limitation, if we did go the string pairs route, I would
>>>>> expect it to be marked as "unstable" in the QAPI schema, so apps have
>>>>> a suitable warning NOT to rely on this.
>>>>
>>>> This sounds sensible to me.  We can continue exposing the most common
>>>> Ceph options through a proper QAPI schema but add key-value-pairs as an
>>>> alternative low-level route for those who want to avoid dealing with
>>>> physical configuration files.
>>>
>>> As written in the commit message, the cache option should not apply to
>>> all volumes, so using configuration files is rather impractical there.
>>>
>>> I'd prefer defining the cache option(s) explicitly, and have people add
>>> additional key-value pairs they require explicitly going forward. But if
>>> you really don't want me to, I can still go with the unstable, arbitrary
>>> strings approach instead.
>>
>> The RBD cache policy option would definitely count as one of the most
>> common, so I don't have an objection to it being added in an explicit
>> form.  I'm also fine with the "disabled" enum value that you expressed
>> a preference for in another email.
> 
> The QEMU block layer-wide "cache" option is kind of in the way though:
> if it's set to "off"/"none" or the more specific "cache.direct" option
> is set to "on", we disable the RBD cache.  So there is an existing way
> to control that, but it's at another level and QEMU cache modes aren't
> distinguished (i.e. there is no mapping to RBD which means that one can
> have "cache=writeback" set in QEMU but still get e.g. "writethrough"
> RBD cache policy come from the ceph.conf file).  An extra enum value
> for disabling the RBD cache might muddy the waters further.

Even with "cache=writeback" in QEMU, the RBD cache policy will default
to "writearound". And we do need "writeback" on the RBD side too:
https://git.proxmox.com/?p=qemu-server.git;a=commitdiff;h=738dc81cbae03ff03592c30a82c7e4d0d39a54ba;hp=e43b19109e57e6e7654c4fa6e2ed14a39b4a6fe2

> Another thing that comes to mind is that if you need to control the
> cache policy (or any other RBD option) on a per-image basis as opposed
> to per-user basis, you can employ image-level configuration overrides
> on the RBD side -- see "rbd config image get/set/ls/rm" commands.

Oh, that sounds like a good alternative :)

Best Regards,
Fiona