[PATCH v4 2/4] qcow2: add configurations for zoned format extension

Sam Li posted 4 patches 11 months, 3 weeks ago
Maintainers: Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>
There is a newer version of this series
[PATCH v4 2/4] qcow2: add configurations for zoned format extension
Posted by Sam Li 11 months, 3 weeks ago
To configure the zoned format feature on the qcow2 driver, it
requires settings as: the device size, zone model, zone size,
zone capacity, number of conventional zones, limits on zone
resources (max append sectors, max open zones, and max_active_zones).

To create a qcow2 file with zoned format, use command like this:
$ qemu-img create -f qcow2 test.qcow2 -o size=768M -o
zone_size=64M -o zone_capacity=64M -o nr_conv_zones=0 -o
max_append_sectors=512 -o max_open_zones=0 -o max_active_zones=0
-o zone_model=1

Signed-off-by: Sam Li <faithilikerun@gmail.com>
---
 block/qcow2.c                    | 186 ++++++++++++++++++++++++++++++-
 block/qcow2.h                    |  28 +++++
 docs/interop/qcow2.txt           |  36 ++++++
 include/block/block_int-common.h |  13 +++
 qapi/block-core.json             |  30 ++++-
 5 files changed, 291 insertions(+), 2 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index b48cd9ce63..521276fc51 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -73,6 +73,7 @@ typedef struct {
 #define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
 #define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
 #define  QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
+#define  QCOW2_EXT_MAGIC_ZONED_FORMAT 0x7a6264
 
 static int coroutine_fn
 qcow2_co_preadv_compressed(BlockDriverState *bs,
@@ -210,6 +211,7 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
     uint64_t offset;
     int ret;
     Qcow2BitmapHeaderExt bitmaps_ext;
+    Qcow2ZonedHeaderExtension zoned_ext;
 
     if (need_update_header != NULL) {
         *need_update_header = false;
@@ -431,6 +433,55 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
             break;
         }
 
+        case QCOW2_EXT_MAGIC_ZONED_FORMAT:
+        {
+            if (ext.len != sizeof(zoned_ext)) {
+                error_setg(errp, "zoned_ext: Invalid extension length");
+                return -EINVAL;
+            }
+            ret = bdrv_pread(bs->file, offset, ext.len, &zoned_ext, 0);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret, "zoned_ext: "
+                                             "Could not read ext header");
+                return ret;
+            }
+
+            zoned_ext.zone_size = be32_to_cpu(zoned_ext.zone_size);
+            zoned_ext.zone_capacity = be32_to_cpu(zoned_ext.zone_capacity);
+            zoned_ext.nr_conv_zones = be32_to_cpu(zoned_ext.nr_conv_zones);
+            zoned_ext.nr_zones = be32_to_cpu(zoned_ext.nr_zones);
+            zoned_ext.max_open_zones = be32_to_cpu(zoned_ext.max_open_zones);
+            zoned_ext.max_active_zones =
+                be32_to_cpu(zoned_ext.max_active_zones);
+            zoned_ext.max_append_sectors =
+                be32_to_cpu(zoned_ext.max_append_sectors);
+            s->zoned_header = zoned_ext;
+
+            /* refuse to open broken images */
+            if (zoned_ext.zone_size == 0) {
+                error_setg(errp, "Zoned extension header zone_size field "
+                                 "can not be 0");
+                return -EINVAL;
+            }
+            if (zoned_ext.zone_capacity > zoned_ext.zone_size) {
+                error_setg(errp, "Zoned extension header zone_capacity field "
+                                 "can not be larger that zone_size field");
+                return -EINVAL;
+            }
+            if (zoned_ext.nr_zones != DIV_ROUND_UP(
+                bs->total_sectors * BDRV_SECTOR_SIZE, zoned_ext.zone_size)) {
+                error_setg(errp, "Zoned extension header nr_zones field "
+                                 "is wrong");
+                return -EINVAL;
+            }
+
+#ifdef DEBUG_EXT
+            printf("Qcow2: Got zoned format extension: "
+                   "offset=%" PRIu32 "\n", offset);
+#endif
+            break;
+        }
+
         default:
             /* unknown magic - save it in case we need to rewrite the header */
             /* If you add a new feature, make sure to also update the fast
@@ -1967,6 +2018,14 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
     }
     bs->bl.pwrite_zeroes_alignment = s->subcluster_size;
     bs->bl.pdiscard_alignment = s->cluster_size;
+    bs->bl.zoned = s->zoned_header.zoned;
+    bs->bl.nr_zones = s->zoned_header.nr_zones;
+    bs->bl.max_append_sectors = s->zoned_header.max_append_sectors;
+    bs->bl.max_active_zones = s->zoned_header.max_active_zones;
+    bs->bl.max_open_zones = s->zoned_header.max_open_zones;
+    bs->bl.zone_size = s->zoned_header.zone_size;
+    bs->bl.zone_capacity = s->zoned_header.zone_capacity;
+    bs->bl.write_granularity = BDRV_SECTOR_SIZE;
 }
 
 static int qcow2_reopen_prepare(BDRVReopenState *state,
@@ -3089,6 +3148,30 @@ int qcow2_update_header(BlockDriverState *bs)
         buflen -= ret;
     }
 
+    /* Zoned devices header extension */
+    if (s->zoned_header.zoned == BLK_Z_HM) {
+        Qcow2ZonedHeaderExtension zoned_header = {
+            .zoned              = s->zoned_header.zoned,
+            .zone_size          = cpu_to_be32(s->zoned_header.zone_size),
+            .zone_capacity      = cpu_to_be32(s->zoned_header.zone_capacity),
+            .nr_conv_zones      = cpu_to_be32(s->zoned_header.nr_conv_zones),
+            .nr_zones           = cpu_to_be32(s->zoned_header.nr_zones),
+            .max_open_zones     = cpu_to_be32(s->zoned_header.max_open_zones),
+            .max_active_zones   =
+                cpu_to_be32(s->zoned_header.max_active_zones),
+            .max_append_sectors =
+                cpu_to_be32(s->zoned_header.max_append_sectors)
+        };
+        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_ZONED_FORMAT,
+                             &zoned_header, sizeof(zoned_header),
+                             buflen);
+        if (ret < 0) {
+            goto fail;
+        }
+        buf += ret;
+        buflen -= ret;
+    }
+
     /* Keep unknown header extensions */
     QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
         ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
@@ -3768,11 +3851,70 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
     }
 
     /* Set the external data file if necessary */
+    BDRVQcow2State *s = blk_bs(blk)->opaque;
     if (data_bs) {
-        BDRVQcow2State *s = blk_bs(blk)->opaque;
         s->image_data_file = g_strdup(data_bs->filename);
     }
 
+    if (qcow2_opts->has_zone_model && qcow2_opts->zone_model == BLK_Z_HM) {
+        if (!qcow2_opts->has_zone_size) {
+            error_setg(errp, "Missing zone_size parameter");
+            ret = -EINVAL;
+            goto out;
+        }
+
+        if (qcow2_opts->zone_size == 0) {
+            s->zoned_header.zoned = BLK_Z_NONE;
+            error_setg(errp, "Zoned devices can not allow a larger-than-zero "
+                             "zone_size");
+            ret = -EINVAL;
+            goto out;
+        }
+
+        s->zoned_header.zoned = qcow2_opts->zone_model;
+        s->zoned_header.zone_size = qcow2_opts->zone_size;
+        s->zoned_header.nr_zones = DIV_ROUND_UP(qcow2_opts->size,
+                                                qcow2_opts->zone_size);
+
+        if (qcow2_opts->has_zone_capacity) {
+            if (qcow2_opts->zone_capacity > qcow2_opts->zone_size) {
+                s->zoned_header.zoned = BLK_Z_NONE;
+                error_setg(errp, "zone capacity %" PRIu64 "B exceeds zone size "
+                           "%" PRIu64"B", qcow2_opts->zone_capacity,
+                           qcow2_opts->zone_size);
+                ret = -EINVAL;
+                goto out;
+            }
+            s->zoned_header.zone_capacity = qcow2_opts->zone_capacity;
+        } else {
+            s->zoned_header.zone_capacity = qcow2_opts->zone_size;
+        }
+
+        if (qcow2_opts->has_nr_conv_zones) {
+            s->zoned_header.nr_conv_zones = qcow2_opts->nr_conv_zones;
+        }
+
+        if (qcow2_opts->has_max_active_zones) {
+            if (qcow2_opts->max_open_zones > qcow2_opts->max_active_zones) {
+                s->zoned_header.zoned = BLK_Z_NONE;
+                error_setg(errp, "max_open_zones %" PRIu32 " exceeds "
+                           "max_active_zones %" PRIu32"",
+                           qcow2_opts->max_open_zones,
+                           qcow2_opts->max_active_zones);
+                ret = -EINVAL;
+                goto out;
+            }
+            if (qcow2_opts->has_max_open_zones) {
+                s->zoned_header.max_open_zones = qcow2_opts->max_active_zones;
+            } else {
+                s->zoned_header.max_open_zones = qcow2_opts->max_active_zones;
+            }
+        }
+        s->zoned_header.max_append_sectors = qcow2_opts->max_append_sectors;
+    } else {
+        s->zoned_header.zoned = BLK_Z_NONE;
+    }
+
     /* Create a full header (including things like feature table) */
     ret = qcow2_update_header(blk_bs(blk));
     bdrv_graph_co_rdunlock();
@@ -3903,6 +4045,13 @@ qcow2_co_create_opts(BlockDriver *drv, const char *filename, QemuOpts *opts,
         { BLOCK_OPT_COMPAT_LEVEL,       "version" },
         { BLOCK_OPT_DATA_FILE_RAW,      "data-file-raw" },
         { BLOCK_OPT_COMPRESSION_TYPE,   "compression-type" },
+        { BLOCK_OPT_Z_MODEL,            "zone-model"},
+        { BLOCK_OPT_Z_NR_COV,           "nr-conv-zones"},
+        { BLOCK_OPT_Z_MOZ,              "max-open-zones"},
+        { BLOCK_OPT_Z_MAZ,              "max-active-zones"},
+        { BLOCK_OPT_Z_MAS,              "max-append-sectors"},
+        { BLOCK_OPT_Z_SIZE,             "zone-size"},
+        { BLOCK_OPT_Z_CAP,              "zone-capacity"},
         { NULL, NULL },
     };
 
@@ -6067,6 +6216,41 @@ static QemuOptsList qcow2_create_opts = {
             .help = "Compression method used for image cluster "        \
                     "compression",                                      \
             .def_value_str = "zlib"                                     \
+        },                                                              \
+        {                                                               \
+            .name = BLOCK_OPT_Z_MODEL,                                  \
+            .type = QEMU_OPT_NUMBER,                                    \
+            .help = "zone model",                                      \
+        },                                                              \
+        {                                                               \
+            .name = BLOCK_OPT_Z_SIZE,                                   \
+            .type = QEMU_OPT_SIZE,                                      \
+            .help = "zone size",                                        \
+        },                                                              \
+        {                                                               \
+            .name = BLOCK_OPT_Z_CAP,                                    \
+            .type = QEMU_OPT_SIZE,                                      \
+            .help = "zone capacity",                                    \
+        },                                                              \
+        {                                                               \
+            .name = BLOCK_OPT_Z_NR_COV,                                 \
+            .type = QEMU_OPT_NUMBER,                                    \
+            .help = "numbers of conventional zones",                    \
+        },                                                              \
+        {                                                               \
+            .name = BLOCK_OPT_Z_MAS,                                    \
+            .type = QEMU_OPT_NUMBER,                                    \
+            .help = "max append sectors",                               \
+        },                                                              \
+        {                                                               \
+            .name = BLOCK_OPT_Z_MAZ,                                    \
+            .type = QEMU_OPT_NUMBER,                                    \
+            .help = "max active zones",                                 \
+        },                                                              \
+        {                                                               \
+            .name = BLOCK_OPT_Z_MOZ,                                    \
+            .type = QEMU_OPT_NUMBER,                                    \
+            .help = "max open zones",                                   \
         },
         QCOW_COMMON_OPTIONS,
         { /* end of list */ }
diff --git a/block/qcow2.h b/block/qcow2.h
index f789ce3ae0..1929788494 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -236,6 +236,25 @@ typedef struct Qcow2CryptoHeaderExtension {
     uint64_t length;
 } QEMU_PACKED Qcow2CryptoHeaderExtension;
 
+typedef struct Qcow2ZonedHeaderExtension {
+    /* Zoned device attributes */
+    uint8_t zoned;
+    uint8_t reserved[3];
+    uint32_t zone_size;
+    uint32_t zone_capacity;
+    uint32_t nr_conv_zones;
+    uint32_t nr_zones;
+    uint32_t max_active_zones;
+    uint32_t max_open_zones;
+    uint32_t max_append_sectors;
+} QEMU_PACKED Qcow2ZonedHeaderExtension;
+
+typedef struct Qcow2Wp {
+    uint64_t wp;
+    QLIST_ENTRY(Qcow2Wp) exp_open_zone_entry;
+    QLIST_ENTRY(Qcow2Wp) imp_open_zone_entry;
+} Qcow2Wp;
+
 typedef struct Qcow2UnknownHeaderExtension {
     uint32_t magic;
     uint32_t len;
@@ -422,6 +441,15 @@ typedef struct BDRVQcow2State {
      * is to convert the image with the desired compression type set.
      */
     Qcow2CompressionType compression_type;
+
+    /* States of zoned device */
+    Qcow2ZonedHeaderExtension zoned_header;
+    QLIST_HEAD(, Qcow2Wp) exp_open_zones;
+    QLIST_HEAD(, Qcow2Wp) imp_open_zones;
+    Qcow2Wp *wp;
+    uint32_t nr_zones_exp_open;
+    uint32_t nr_zones_imp_open;
+    uint32_t nr_zones_closed;
 } BDRVQcow2State;
 
 typedef struct Qcow2COWRegion {
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index 2c4618375a..80314614aa 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -331,6 +331,42 @@ The fields of the bitmaps extension are:
                    Offset into the image file at which the bitmap directory
                    starts. Must be aligned to a cluster boundary.
 
+== Zoned extension ==
+
+The zoned extension is an optional header extension. It contains fields for
+emulating the zoned stroage model (https://zonedstorage.io/).
+
+The fields of the zoned extension are:
+    Byte        0:  zoned
+                    Zoned model, 1 for host-managed and 0 for non-zoned devices.
+
+            1 - 3:  Reserved, must be zero.
+
+            4 - 7:  zone_size
+                    Total number of logical blocks within the zones in bytes.
+
+           8 - 11:  zone_capacity
+                    The number of writable logical blocks within the zones in
+                    bytes. A zone capacity is always smaller or equal to the
+                    zone size.
+
+          12 - 15:  nr_conv_zones
+                    The number of conventional zones.
+
+          16 - 19:  nr_zones
+                    The number of zones.
+
+          20 - 23:  max_active_zones
+                    The limit of the zones that have the implicit open,
+                    explicit open or closed state.
+
+          24 - 27:  max_open_zones
+                    The maximal allowed open zones.
+
+          28 - 35:  max_append_sectors
+                    The maximal number of 512-byte sectors of a zone
+                    append request that can be issued to the device.
+
 == Full disk encryption header pointer ==
 
 The full disk encryption header must be present if, and only if, the
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 85be256c09..d169d15dd6 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -57,6 +57,13 @@
 #define BLOCK_OPT_DATA_FILE_RAW     "data_file_raw"
 #define BLOCK_OPT_COMPRESSION_TYPE  "compression_type"
 #define BLOCK_OPT_EXTL2             "extended_l2"
+#define BLOCK_OPT_Z_MODEL           "zone_model"
+#define BLOCK_OPT_Z_SIZE            "zone_size"
+#define BLOCK_OPT_Z_CAP             "zone_capacity"
+#define BLOCK_OPT_Z_NR_COV          "nr_conv_zones"
+#define BLOCK_OPT_Z_MAS             "max_append_sectors"
+#define BLOCK_OPT_Z_MAZ             "max_active_zones"
+#define BLOCK_OPT_Z_MOZ             "max_open_zones"
 
 #define BLOCK_PROBE_BUF_SIZE        512
 
@@ -878,6 +885,12 @@ typedef struct BlockLimits {
     /* zone size expressed in bytes */
     uint32_t zone_size;
 
+    /*
+     * the number of usable logical blocks within the zone, expressed
+     * in bytes. A zone capacity is smaller or equal to the zone size.
+     */
+    uint32_t zone_capacity;
+
     /* total number of zones */
     uint32_t nr_zones;
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2b1d493d6e..2aad82c399 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -5021,6 +5021,27 @@
 # @compression-type: The image cluster compression method
 #     (default: zlib, since 5.1)
 #
+# @zone-model: Zoned device model, 1 for host-managed and 0 for
+#     non-zoned devices (default: 0, since 8.2)
+#
+# @zone-size: Total number of logical blocks within zones in bytes
+#     (since 8.2)
+#
+# @zone-capacity: The number of usable logical blocks within zones
+#     in bytes.  A zone capacity is always smaller or equal to the
+#     zone size. (since 8.2)
+#
+# @nr-conv-zones: The number of conventional zones of the zoned device
+#     (since 8.2)
+#
+# @max-open-zones: The maximal number of open zones (since 8.2)
+#
+# @max-active-zones: The limit of the zones that have the implicit
+#     open, explicit open or closed state (since 8.2)
+#
+# @max-append-sectors: The maximal number of 512-byte sectors of a zone
+#     append request that can be issued to the device. (since 8.2)
+#
 # Since: 2.12
 ##
 { 'struct': 'BlockdevCreateOptionsQcow2',
@@ -5037,7 +5058,14 @@
             '*preallocation':   'PreallocMode',
             '*lazy-refcounts':  'bool',
             '*refcount-bits':   'int',
-            '*compression-type':'Qcow2CompressionType' } }
+            '*compression-type':'Qcow2CompressionType',
+            '*zone-model':         'uint8',
+            '*zone-size':          'size',
+            '*zone-capacity':      'size',
+            '*nr-conv-zones':      'uint32',
+            '*max-open-zones':     'uint32',
+            '*max-active-zones':   'uint32',
+            '*max-append-sectors': 'uint32' } }
 
 ##
 # @BlockdevCreateOptionsQed:
-- 
2.40.1
Re: [PATCH v4 2/4] qcow2: add configurations for zoned format extension
Posted by Eric Blake 11 months, 2 weeks ago
On Mon, Sep 18, 2023 at 05:53:11PM +0800, Sam Li wrote:
> To configure the zoned format feature on the qcow2 driver, it
> requires settings as: the device size, zone model, zone size,
> zone capacity, number of conventional zones, limits on zone
> resources (max append sectors, max open zones, and max_active_zones).
> 
> To create a qcow2 file with zoned format, use command like this:
> $ qemu-img create -f qcow2 test.qcow2 -o size=768M -o
> zone_size=64M -o zone_capacity=64M -o nr_conv_zones=0 -o
> max_append_sectors=512 -o max_open_zones=0 -o max_active_zones=0
> -o zone_model=1
> 
> Signed-off-by: Sam Li <faithilikerun@gmail.com>
> ---
>  block/qcow2.c                    | 186 ++++++++++++++++++++++++++++++-
>  block/qcow2.h                    |  28 +++++
>  docs/interop/qcow2.txt           |  36 ++++++
>  include/block/block_int-common.h |  13 +++
>  qapi/block-core.json             |  30 ++++-
>  5 files changed, 291 insertions(+), 2 deletions(-)

Below, I'll focus only on the spec change, not the implementation:

> 
> diff --git a/block/qcow2.c b/block/qcow2.c
> index b48cd9ce63..521276fc51 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -73,6 +73,7 @@ typedef struct {
>  #define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
>  #define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
>  #define  QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
> +#define  QCOW2_EXT_MAGIC_ZONED_FORMAT 0x7a6264

Why not spell it 0x007a6264 with 8 hex digits, like the others?  (I
get why you choose that constant, though - ascii 'zbd')

> +++ b/docs/interop/qcow2.txt
> @@ -331,6 +331,42 @@ The fields of the bitmaps extension are:
>                     Offset into the image file at which the bitmap directory
>                     starts. Must be aligned to a cluster boundary.
>  
> +== Zoned extension ==

Where is the magic number for this extension called out?  That's
missing, and MUST be part of the spec.

Back-compatibility constraints: you should consider what happens in
both of the following cases:

a program that intends to do read-only access to the qcow2 file but
which does not understand this extension header (for example, an older
version of 'qemu-img convert' being used to extract data from a newer
.qcow2 file with this header present - but also the new 'nbdkit
qcow2dec' decoder plugin just released in nbdkit 1.36).  Is it safe to
read the data as-is, by basically ignoring zone informations?  Or will
that ever produce wrong data (for example, if operations on a
particular zone imply that the guest should read all zeroes after the
current zone offset within that zone, regardless of whether non-zero
content was previously stored at those offsets - then not honoring the
existence of the extension header would require you to add and
document an incompatible feature bit so that reader apps fail to open
the file rather than reading wrong data).

a program that intends to edit the qcow2 file but which does not
understand this extension header (again, consider access by an older
version of qemu).  Is it safe to just write data anywhere in the disk,
but where failure to update the zone metadata means that all
subsequent use of the file MUST behave as if it is now a non-zeoned
device?  If so, then it is sufficient to document an autoclear feature
bit: any time a newer qcow2 writer creates a file with a zoned
extension, it also sets the autoclear feature bit; any time an older
qcow2 writer edits a file with the autoclear bit, it clears the bit
(because it has no idea if its edits invalidated the unknown
extension).  Then when the new qcow2 program again accesses the file,
it knows that the zone information is no longer reliable, and can fall
back to forcing the image to behave as flat.

> +
> +The zoned extension is an optional header extension. It contains fields for
> +emulating the zoned stroage model (https://zonedstorage.io/).

Assuming that you'll need to add one or two feature bits (most likely
an autoclear bit, to prevent editing the file without keeping the zone
information up-to-data, and possibly also an incompatible feature bit,
if interpreting the file even without editing it is impossible without
understanding zones), you'll want to mention this header's relation to
those feature bit(s).

> +
> +The fields of the zoned extension are:
> +    Byte        0:  zoned
> +                    Zoned model, 1 for host-managed and 0 for non-zoned devices.

This tells me nothing about what those two models mean.  You'll need
to describe them better for an independent implementation of a qcow2
reader (such as 'nbdkit qcow2dec') to be able to properly read such a
file with either value, even if it doesn't plan on editing it.

If we do add feature bits, what happens when reading a file when the
feature bits are set but this extension header is missing?

> +
> +            1 - 3:  Reserved, must be zero.
> +
> +            4 - 7:  zone_size
> +                    Total number of logical blocks within the zones in bytes.

This is confusing.  It is a number of blocks, or a number of bytes?  I
would prefer bytes (will we ever have to worry about a device that can
have zones larger than 4G - then make this 8 bytes instead of 4, and
see comments below about alignment), but then word it as:

Total size of each zone, in bytes.

Also, should you require that the zone_size be a multiple of the
cluster size and/or a power of 2?  (That is, if I have a qcow2 file
with 2M clusters, does it make sense to permit a zone size of only 1M,
or should zone size always be at least as large as a cluster?)

> +
> +           8 - 11:  zone_capacity
> +                    The number of writable logical blocks within the zones in
> +                    bytes. A zone capacity is always smaller or equal to the
> +                    zone size.

Again, mixing the term blocks and bytes in the same sentence is confusing.

> +
> +          12 - 15:  nr_conv_zones
> +                    The number of conventional zones.
> +
> +          16 - 19:  nr_zones
> +                    The number of zones.

What's the difference between these two numbers?

> +
> +          20 - 23:  max_active_zones
> +                    The limit of the zones that have the implicit open,
> +                    explicit open or closed state.
> +
> +          24 - 27:  max_open_zones
> +                    The maximal allowed open zones.
> +
> +          28 - 35:  max_append_sectors
> +                    The maximal number of 512-byte sectors of a zone
> +                    append request that can be issued to the device.

Is this value in sectors instead of bytes?  I'd prefer bytes, but then
document that the value must be a multiple of 512.  Also, having a
64-bit number not be 64-bit aligned within the extension header is
unwise.  Please rearrange fields so that all 64-bit fields are 8-byte
aligned.

This structure as written has implicit tail padding (all extension
headers must be 8-byte multiples in the end; but the spec already
documents how that is handled).  Is it worth explicitly calling out
tail padding up to an 8-byte boundary?

> +
>  == Full disk encryption header pointer ==
>  
>  The full disk encryption header must be present if, and only if, the
> diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
> index 85be256c09..d169d15dd6 100644
> --- a/include/block/block_int-common.h

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization:  qemu.org | libguestfs.org
Re: [PATCH v4 2/4] qcow2: add configurations for zoned format extension
Posted by Sam Li 11 months ago
Hello Eric,

Eric Blake <eblake@redhat.com> 于2023年9月28日周四 23:15写道:
>
> On Mon, Sep 18, 2023 at 05:53:11PM +0800, Sam Li wrote:
> > To configure the zoned format feature on the qcow2 driver, it
> > requires settings as: the device size, zone model, zone size,
> > zone capacity, number of conventional zones, limits on zone
> > resources (max append sectors, max open zones, and max_active_zones).
> >
> > To create a qcow2 file with zoned format, use command like this:
> > $ qemu-img create -f qcow2 test.qcow2 -o size=768M -o
> > zone_size=64M -o zone_capacity=64M -o nr_conv_zones=0 -o
> > max_append_sectors=512 -o max_open_zones=0 -o max_active_zones=0
> > -o zone_model=1
> >
> > Signed-off-by: Sam Li <faithilikerun@gmail.com>
> > ---
> >  block/qcow2.c                    | 186 ++++++++++++++++++++++++++++++-
> >  block/qcow2.h                    |  28 +++++
> >  docs/interop/qcow2.txt           |  36 ++++++
> >  include/block/block_int-common.h |  13 +++
> >  qapi/block-core.json             |  30 ++++-
> >  5 files changed, 291 insertions(+), 2 deletions(-)
>
> Below, I'll focus only on the spec change, not the implementation:
>
> >
> > diff --git a/block/qcow2.c b/block/qcow2.c
> > index b48cd9ce63..521276fc51 100644
> > --- a/block/qcow2.c
> > +++ b/block/qcow2.c
> > @@ -73,6 +73,7 @@ typedef struct {
> >  #define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
> >  #define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
> >  #define  QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
> > +#define  QCOW2_EXT_MAGIC_ZONED_FORMAT 0x7a6264
>
> Why not spell it 0x007a6264 with 8 hex digits, like the others?  (I
> get why you choose that constant, though - ascii 'zbd')
>
> > +++ b/docs/interop/qcow2.txt
> > @@ -331,6 +331,42 @@ The fields of the bitmaps extension are:
> >                     Offset into the image file at which the bitmap directory
> >                     starts. Must be aligned to a cluster boundary.
> >
> > +== Zoned extension ==
>
> Where is the magic number for this extension called out?  That's
> missing, and MUST be part of the spec.

It's a part of the header extension type in the spec. I will add it.

>
> Back-compatibility constraints: you should consider what happens in
> both of the following cases:
>
> a program that intends to do read-only access to the qcow2 file but
> which does not understand this extension header (for example, an older
> version of 'qemu-img convert' being used to extract data from a newer
> .qcow2 file with this header present - but also the new 'nbdkit
> qcow2dec' decoder plugin just released in nbdkit 1.36).  Is it safe to
> read the data as-is, by basically ignoring zone informations?  Or will
> that ever produce wrong data (for example, if operations on a
> particular zone imply that the guest should read all zeroes after the
> current zone offset within that zone, regardless of whether non-zero
> content was previously stored at those offsets - then not honoring the
> existence of the extension header would require you to add and
> document an incompatible feature bit so that reader apps fail to open
> the file rather than reading wrong data).
>
> a program that intends to edit the qcow2 file but which does not
> understand this extension header (again, consider access by an older
> version of qemu).  Is it safe to just write data anywhere in the disk,
> but where failure to update the zone metadata means that all
> subsequent use of the file MUST behave as if it is now a non-zeoned
> device?  If so, then it is sufficient to document an autoclear feature
> bit: any time a newer qcow2 writer creates a file with a zoned
> extension, it also sets the autoclear feature bit; any time an older
> qcow2 writer edits a file with the autoclear bit, it clears the bit
> (because it has no idea if its edits invalidated the unknown
> extension).  Then when the new qcow2 program again accesses the file,
> it knows that the zone information is no longer reliable, and can fall
> back to forcing the image to behave as flat.

Considering access by an older version of qemu ('old qemu' for abbr.)
with a qcow2 file created with zoned extension ('new file' for abbr.),
reads from a new file on old qemu which does not understand zoned
information are safe. The zoned extension represents necessary zone
states for all zones, which puts constraints to operations on the
zones. For example, writes to offsets that are over the capacity of
that zone are not allowed, where it will be read as zeroes. The old
qemu ignores that and reads the new file as a regular one anyway.

However, what is unsafe is when an old qemu program gets involved in
editing a new file. The new qemu will not see the write pointer
changes of the new file that was done sometime by old qemu programs.
Then the zone information is no longer reliable as you illustrated.

Therefore I will add an autoclear bit for the latter case. It clears
the zoned extension when it is set by old qemu programs.

>
> > +
> > +The zoned extension is an optional header extension. It contains fields for
> > +emulating the zoned stroage model (https://zonedstorage.io/).
>
> Assuming that you'll need to add one or two feature bits (most likely
> an autoclear bit, to prevent editing the file without keeping the zone
> information up-to-data, and possibly also an incompatible feature bit,
> if interpreting the file even without editing it is impossible without
> understanding zones), you'll want to mention this header's relation to
> those feature bit(s).
>
> > +
> > +The fields of the zoned extension are:
> > +    Byte        0:  zoned
> > +                    Zoned model, 1 for host-managed and 0 for non-zoned devices.
>
> This tells me nothing about what those two models mean.  You'll need
> to describe them better for an independent implementation of a qcow2
> reader (such as 'nbdkit qcow2dec') to be able to properly read such a
> file with either value, even if it doesn't plan on editing it.
>
> If we do add feature bits, what happens when reading a file when the
> feature bits are set but this extension header is missing?

It's not allowed to set the zone model to host-managed type without
other extension header fields like zone size.

>
> > +
> > +            1 - 3:  Reserved, must be zero.
> > +
> > +            4 - 7:  zone_size
> > +                    Total number of logical blocks within the zones in bytes.
>
> This is confusing.  It is a number of blocks, or a number of bytes?  I
> would prefer bytes (will we ever have to worry about a device that can
> have zones larger than 4G - then make this 8 bytes instead of 4, and
> see comments below about alignment), but then word it as:
>
> Total size of each zone, in bytes.
>
> Also, should you require that the zone_size be a multiple of the
> cluster size and/or a power of 2?  (That is, if I have a qcow2 file
> with 2M clusters, does it make sense to permit a zone size of only 1M,
> or should zone size always be at least as large as a cluster?)

In general the zone size must be a power of two. It is not related to
the cluster size.

>
> > +
> > +           8 - 11:  zone_capacity
> > +                    The number of writable logical blocks within the zones in
> > +                    bytes. A zone capacity is always smaller or equal to the
> > +                    zone size.
>
> Again, mixing the term blocks and bytes in the same sentence is confusing.
>
> > +
> > +          12 - 15:  nr_conv_zones
> > +                    The number of conventional zones.
> > +
> > +          16 - 19:  nr_zones
> > +                    The number of zones.
>
> What's the difference between these two numbers?

nr_zone = conventional_zones + sequential_zones.

>
> > +
> > +          20 - 23:  max_active_zones
> > +                    The limit of the zones that have the implicit open,
> > +                    explicit open or closed state.
> > +
> > +          24 - 27:  max_open_zones
> > +                    The maximal allowed open zones.
> > +
> > +          28 - 35:  max_append_sectors
> > +                    The maximal number of 512-byte sectors of a zone
> > +                    append request that can be issued to the device.
>
> Is this value in sectors instead of bytes?  I'd prefer bytes, but then
> document that the value must be a multiple of 512.  Also, having a
> 64-bit number not be 64-bit aligned within the extension header is
> unwise.  Please rearrange fields so that all 64-bit fields are 8-byte
> aligned.
>
> This structure as written has implicit tail padding (all extension
> headers must be 8-byte multiples in the end; but the spec already
> documents how that is handled).  Is it worth explicitly calling out
> tail padding up to an 8-byte boundary?

I see. I will rearrange it to 8-byte alignment.

Thanks for your comments!

>
> > +
> >  == Full disk encryption header pointer ==
> >
> >  The full disk encryption header must be present if, and only if, the
> > diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
> > index 85be256c09..d169d15dd6 100644
> > --- a/include/block/block_int-common.h
>
> --
> Eric Blake, Principal Software Engineer
> Red Hat, Inc.
> Virtualization:  qemu.org | libguestfs.org
>
Re: [PATCH v4 2/4] qcow2: add configurations for zoned format extension
Posted by Markus Armbruster 11 months, 2 weeks ago
Sam Li <faithilikerun@gmail.com> writes:

> To configure the zoned format feature on the qcow2 driver, it
> requires settings as: the device size, zone model, zone size,
> zone capacity, number of conventional zones, limits on zone
> resources (max append sectors, max open zones, and max_active_zones).
>
> To create a qcow2 file with zoned format, use command like this:
> $ qemu-img create -f qcow2 test.qcow2 -o size=768M -o
> zone_size=64M -o zone_capacity=64M -o nr_conv_zones=0 -o
> max_append_sectors=512 -o max_open_zones=0 -o max_active_zones=0
> -o zone_model=1
>
> Signed-off-by: Sam Li <faithilikerun@gmail.com>

[...]

> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 2b1d493d6e..2aad82c399 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -5021,6 +5021,27 @@
>  # @compression-type: The image cluster compression method
>  #     (default: zlib, since 5.1)
>  #
> +# @zone-model: Zoned device model, 1 for host-managed and 0 for
> +#     non-zoned devices (default: 0, since 8.2)

Shouldn't this be a QAPI enum rather than a number?

> +#
> +# @zone-size: Total number of logical blocks within zones in bytes
> +#     (since 8.2)
> +#
> +# @zone-capacity: The number of usable logical blocks within zones
> +#     in bytes.  A zone capacity is always smaller or equal to the
> +#     zone size. (since 8.2)
> +#
> +# @nr-conv-zones: The number of conventional zones of the zoned device
> +#     (since 8.2)

I still think @conventional-zones would be more obvious.

> +#
> +# @max-open-zones: The maximal number of open zones (since 8.2)
> +#
> +# @max-active-zones: The limit of the zones that have the implicit
> +#     open, explicit open or closed state (since 8.2)

Maybe "The maximum number of zones in the implicit open, explicit open
or closed state".

(I'll repeat suggestions until you reject them, just to make sure they
get ignored by accident)

> +#
> +# @max-append-sectors: The maximal number of 512-byte sectors of a zone
> +#     append request that can be issued to the device. (since 8.2)
> +#
>  # Since: 2.12
>  ##
>  { 'struct': 'BlockdevCreateOptionsQcow2',
> @@ -5037,7 +5058,14 @@
>              '*preallocation':   'PreallocMode',
>              '*lazy-refcounts':  'bool',
>              '*refcount-bits':   'int',
> -            '*compression-type':'Qcow2CompressionType' } }
> +            '*compression-type':'Qcow2CompressionType',
> +            '*zone-model':         'uint8',
> +            '*zone-size':          'size',
> +            '*zone-capacity':      'size',
> +            '*nr-conv-zones':      'uint32',
> +            '*max-open-zones':     'uint32',
> +            '*max-active-zones':   'uint32',
> +            '*max-append-sectors': 'uint32' } }
>  
>  ##
>  # @BlockdevCreateOptionsQed:
Re: [PATCH v4 2/4] qcow2: add configurations for zoned format extension
Posted by Sam Li 11 months, 2 weeks ago
Markus Armbruster <armbru@redhat.com> 于2023年9月25日周一 21:05写道:
>
> Sam Li <faithilikerun@gmail.com> writes:
>
> > To configure the zoned format feature on the qcow2 driver, it
> > requires settings as: the device size, zone model, zone size,
> > zone capacity, number of conventional zones, limits on zone
> > resources (max append sectors, max open zones, and max_active_zones).
> >
> > To create a qcow2 file with zoned format, use command like this:
> > $ qemu-img create -f qcow2 test.qcow2 -o size=768M -o
> > zone_size=64M -o zone_capacity=64M -o nr_conv_zones=0 -o
> > max_append_sectors=512 -o max_open_zones=0 -o max_active_zones=0
> > -o zone_model=1
> >
> > Signed-off-by: Sam Li <faithilikerun@gmail.com>
>
> [...]
>
> > diff --git a/qapi/block-core.json b/qapi/block-core.json
> > index 2b1d493d6e..2aad82c399 100644
> > --- a/qapi/block-core.json
> > +++ b/qapi/block-core.json
> > @@ -5021,6 +5021,27 @@
> >  # @compression-type: The image cluster compression method
> >  #     (default: zlib, since 5.1)
> >  #
> > +# @zone-model: Zoned device model, 1 for host-managed and 0 for
> > +#     non-zoned devices (default: 0, since 8.2)
>
> Shouldn't this be a QAPI enum rather than a number?
>
> > +#
> > +# @zone-size: Total number of logical blocks within zones in bytes
> > +#     (since 8.2)
> > +#
> > +# @zone-capacity: The number of usable logical blocks within zones
> > +#     in bytes.  A zone capacity is always smaller or equal to the
> > +#     zone size. (since 8.2)
> > +#
> > +# @nr-conv-zones: The number of conventional zones of the zoned device
> > +#     (since 8.2)
>
> I still think @conventional-zones would be more obvious.
>
> > +#
> > +# @max-open-zones: The maximal number of open zones (since 8.2)
> > +#
> > +# @max-active-zones: The limit of the zones that have the implicit
> > +#     open, explicit open or closed state (since 8.2)
>
> Maybe "The maximum number of zones in the implicit open, explicit open
> or closed state".
>
> (I'll repeat suggestions until you reject them, just to make sure they
> get ignored by accident)

Thanks for noticing. I will change them (enum, conv, maz) in v5.


>
> > +#
> > +# @max-append-sectors: The maximal number of 512-byte sectors of a zone
> > +#     append request that can be issued to the device. (since 8.2)
> > +#
> >  # Since: 2.12
> >  ##
> >  { 'struct': 'BlockdevCreateOptionsQcow2',
> > @@ -5037,7 +5058,14 @@
> >              '*preallocation':   'PreallocMode',
> >              '*lazy-refcounts':  'bool',
> >              '*refcount-bits':   'int',
> > -            '*compression-type':'Qcow2CompressionType' } }
> > +            '*compression-type':'Qcow2CompressionType',
> > +            '*zone-model':         'uint8',
> > +            '*zone-size':          'size',
> > +            '*zone-capacity':      'size',
> > +            '*nr-conv-zones':      'uint32',
> > +            '*max-open-zones':     'uint32',
> > +            '*max-active-zones':   'uint32',
> > +            '*max-append-sectors': 'uint32' } }
> >
> >  ##
> >  # @BlockdevCreateOptionsQed:
>