[RFC v2 7/7] hw/nvme: make ZDED persistent

Sam Li posted 7 patches 1 year ago
There is a newer version of this series
[RFC v2 7/7] hw/nvme: make ZDED persistent
Posted by Sam Li 1 year ago
Zone descriptor extension data (ZDED) is not persistent across QEMU
restarts. The zone descriptor extension valid bit (ZDEV) is part of
zone attributes, which sets to one when the ZDED is associated with
the zone.

With the qcow2 img as the backing file, the NVMe ZNS device stores
the zone attributes at the following eight bit of zone type bit of write
pointers for each zone. The ZDED is stored as part of zoned metadata as
write pointers.

Signed-off-by: Sam Li <faithilikerun@gmail.com>
---
 block/qcow2.c                | 45 ++++++++++++++++++++++++++++++++++++
 hw/nvme/ctrl.c               |  1 +
 include/block/block-common.h |  1 +
 3 files changed, 47 insertions(+)

diff --git a/block/qcow2.c b/block/qcow2.c
index 74d2e2bf39..861a8f9f06 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 
 #include "block/qdict.h"
+#include "block/nvme.h"
 #include "sysemu/block-backend.h"
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
@@ -235,6 +236,17 @@ static inline BlockZoneState qcow2_get_zone_state(BlockDriverState *bs,
     return BLK_ZS_NOT_WP;
 }
 
+static inline void qcow2_set_za(uint64_t *wp, uint8_t za)
+{
+    /*
+     * The zone attribute takes up one byte. Store it after the zoned
+     * bit.
+     */
+    uint64_t addr = *wp;
+    addr |= ((uint64_t)za << 51);
+    *wp = addr;
+}
+
 /*
  * Write the new wp value to the dedicated location of the image file.
  */
@@ -4990,6 +5002,36 @@ unlock:
     return ret;
 }
 
+static int qcow2_zns_set_zded(BlockDriverState *bs, uint32_t index)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int ret;
+
+    qemu_co_mutex_lock(&bs->wps->colock);
+    uint64_t *wp = &bs->wps->wp[index];
+    BlockZoneState zs = qcow2_get_zone_state(bs, index);
+    if (zs == BLK_ZS_EMPTY) {
+        ret = qcow2_check_zone_resources(bs, zs);
+        if (ret < 0) {
+            goto unlock;
+        }
+
+        qcow2_set_za(wp, NVME_ZA_ZD_EXT_VALID);
+        ret = qcow2_write_wp_at(bs, wp, index);
+        if (ret < 0) {
+            error_report("Failed to set zone extension at 0x%" PRIx64 "", *wp);
+            goto unlock;
+        }
+        s->nr_zones_closed++;
+        qemu_co_mutex_unlock(&bs->wps->colock);
+        return ret;
+    }
+
+unlock:
+    qemu_co_mutex_unlock(&bs->wps->colock);
+    return NVME_ZONE_INVAL_TRANSITION;
+}
+
 static int coroutine_fn qcow2_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
                                            int64_t offset, int64_t len)
 {
@@ -5046,6 +5088,9 @@ static int coroutine_fn qcow2_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
     case BLK_ZO_OFFLINE:
         /* There are no transitions from the offline state to any other state */
         break;
+    case BLK_ZO_SET_ZDED:
+        ret = qcow2_zns_set_zded(bs, index);
+        break;
     default:
         error_report("Unsupported zone op: 0x%x", op);
         ret = -ENOTSUP;
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index f65a87646e..c33e24e303 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -3474,6 +3474,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
         break;
 
     case NVME_ZONE_ACTION_SET_ZD_EXT:
+        op = BLK_ZO_SET_ZDED;
         int zd_ext_size = blk_get_zd_ext_size(blk);
         trace_pci_nvme_set_descriptor_extension(slba, zone_idx);
         if (all || !zd_ext_size) {
diff --git a/include/block/block-common.h b/include/block/block-common.h
index ea213c3887..b61541599f 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -91,6 +91,7 @@ typedef enum BlockZoneOp {
     BLK_ZO_FINISH,
     BLK_ZO_RESET,
     BLK_ZO_OFFLINE,
+    BLK_ZO_SET_ZDED,
 } BlockZoneOp;
 
 typedef enum BlockZoneModel {
-- 
2.40.1