From nobody Sat Feb 7 03:06:09 2026 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org ARC-Seal: i=1; a=rsa-sha256; t=1593511553; cv=none; d=zohomail.com; s=zohoarc; b=PInIDOV+dfatnGRle5PInTsO7e2Uw2yZqHcaUmD6Gz10g4dz+esH+VAGDNO1tAXrlsjQL4tuY1+QvewYeAbPwCVzTMpuuzIEizC//KGEEf8196Bo3ivTXYlJSS1DH0qA8nJjSQ4YiQscYOGKXvsGP+6ayNdQI6HQBE2pNpoAp1Q= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1593511553; h=Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=aYNtQAH2aCfc6z5HrPQEj4wsXriOgG6Vw510vOPv8dM=; b=IbGS0waxF6HnA/LI49SbaGusvv1QKcEwfmSxR9JwTbeHwqrIHWoLss5k3SppLf+knB7SRmXvCv3/I0xNpybo4n4ZA51x9sPe2nSJUlXjMToH1yXbQ3cjKVFGP6gjTxUcR+SzO78PWf0khgPcHekXYwIjjFTQEI73IqSKLDSeCRg= ARC-Authentication-Results: i=1; mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1593511553224332.97868313208835; Tue, 30 Jun 2020 03:05:53 -0700 (PDT) Received: from localhost ([::1]:37696 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1jqD9D-000522-Oz for importer@patchew.org; Tue, 30 Jun 2020 06:05:51 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58004) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1jqD5f-0007oR-KA; Tue, 30 Jun 2020 06:02:13 -0400 Received: from charlie.dont.surf ([128.199.63.193]:47646) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1jqD5U-0004LX-SH; Tue, 30 Jun 2020 06:02:11 -0400 Received: from apples.local (80-167-98-190-cable.dk.customer.tdc.net [80.167.98.190]) by charlie.dont.surf (Postfix) with ESMTPSA id 66748BF7F3; Tue, 30 Jun 2020 10:01:56 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH 07/10] hw/block/nvme: track and enforce zone resources Date: Tue, 30 Jun 2020 12:01:36 +0200 Message-Id: <20200630100139.1483002-8-its@irrelevant.dk> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20200630100139.1483002-1-its@irrelevant.dk> References: <20200630100139.1483002-1-its@irrelevant.dk> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=128.199.63.193; envelope-from=its@irrelevant.dk; helo=charlie.dont.surf X-detected-operating-system: by eggs.gnu.org: First seen = 2020/06/30 04:46:49 X-ACL-Warn: Detected OS = Linux 3.11 and newer [fuzzy] X-Spam_score_int: -18 X-Spam_score: -1.9 X-Spam_bar: - X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_NONE=0.001, SPF_PASS=-0.001, URIBL_BLOCKED=0.001 autolearn=_AUTOLEARN X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Niklas Cassel , Damien Le Moal , Dmitry Fomichev , Klaus Jensen , qemu-devel@nongnu.org, Max Reitz , Klaus Jensen , Keith Busch , Javier Gonzalez , Maxim Levitsky , =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= , Matias Bjorling Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" Content-Type: text/plain; charset="utf-8" Move all zone transition rules to a single state machine that also manages zone resources. Signed-off-by: Klaus Jensen --- hw/block/nvme-ns.c | 17 ++- hw/block/nvme-ns.h | 7 ++ hw/block/nvme.c | 304 ++++++++++++++++++++++++++++++++------------- 3 files changed, 242 insertions(+), 86 deletions(-) diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c index 68996c2f0e72..5a55a0191f55 100644 --- a/hw/block/nvme-ns.c +++ b/hw/block/nvme-ns.c @@ -262,8 +262,13 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns) =20 id_ns->ncap =3D ns->zns.info.num_zones * ns->params.zns.zcap; =20 - id_ns_zns->mar =3D 0xffffffff; - id_ns_zns->mor =3D 0xffffffff; + id_ns_zns->mar =3D cpu_to_le32(ns->params.zns.mar); + id_ns_zns->mor =3D cpu_to_le32(ns->params.zns.mor); + + ns->zns.resources.active =3D ns->params.zns.mar !=3D 0xffffffff ? + ns->params.zns.mar + 1 : ns->zns.info.num_zones; + ns->zns.resources.open =3D ns->params.zns.mor !=3D 0xffffffff ? + ns->params.zns.mor + 1 : ns->zns.info.num_zones; } =20 static void nvme_ns_init(NvmeNamespace *ns) @@ -426,6 +431,12 @@ static int nvme_ns_check_constraints(NvmeCtrl *n, Nvme= Namespace *ns, Error return -1; } =20 + if (ns->params.zns.mor > ns->params.zns.mar) { + error_setg(errp, "maximum open resources (MOR) must be less " + "than or equal to maximum active resources (MAR)"); + return -1; + } + break; =20 default: @@ -499,6 +510,8 @@ static Property nvme_ns_props[] =3D { DEFINE_PROP_UINT8("zns.zdes", NvmeNamespace, params.zns.zdes, 0), DEFINE_PROP_UINT16("zns.zoc", NvmeNamespace, params.zns.zoc, 0), DEFINE_PROP_UINT16("zns.ozcs", NvmeNamespace, params.zns.ozcs, 0), + DEFINE_PROP_UINT32("zns.mar", NvmeNamespace, params.zns.mar, 0xfffffff= f), + DEFINE_PROP_UINT32("zns.mor", NvmeNamespace, params.zns.mor, 0xfffffff= f), DEFINE_PROP_END_OF_LIST(), }; =20 diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h index 5940fb73e72b..5660934d6199 100644 --- a/hw/block/nvme-ns.h +++ b/hw/block/nvme-ns.h @@ -29,6 +29,8 @@ typedef struct NvmeNamespaceParams { uint8_t zdes; uint16_t zoc; uint16_t ozcs; + uint32_t mar; + uint32_t mor; } zns; } NvmeNamespaceParams; =20 @@ -63,6 +65,11 @@ typedef struct NvmeNamespace { uint64_t num_zones; NvmeZone *zones; } info; + + struct { + uint32_t open; + uint32_t active; + } resources; } zns; } NvmeNamespace; =20 diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 6b394d374c8e..d5d521954cfc 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1187,6 +1187,155 @@ static void nvme_update_zone_descr(NvmeNamespace *n= s, NvmeRequest *req, nvme_req_add_aio(req, aio); } =20 +/* + * nvme_zrm_transition validates zone state transitions under the constrai= nt of + * the Number of Active and Open Resources (NAR and NOR) limits as reporte= d by + * the Identify Namespace Data Structure. + * + * The function does NOT change the Zone Attribute field; this must be don= e by + * the caller. + */ +static uint16_t nvme_zrm_transition(NvmeNamespace *ns, NvmeZone *zone, + NvmeZoneState to) +{ + NvmeZoneState from =3D nvme_zs(zone); + + /* fast path */ + if (from =3D=3D to) { + return NVME_SUCCESS; + } + + switch (from) { + case NVME_ZS_ZSE: + switch (to) { + case NVME_ZS_ZSRO: + case NVME_ZS_ZSO: + case NVME_ZS_ZSF: + nvme_zs_set(zone, to); + return NVME_SUCCESS; + + case NVME_ZS_ZSC: + if (!ns->zns.resources.active) { + return NVME_TOO_MANY_ACTIVE_ZONES; + } + + ns->zns.resources.active--; + + nvme_zs_set(zone, to); + + return NVME_SUCCESS; + + case NVME_ZS_ZSIO: + case NVME_ZS_ZSEO: + if (!ns->zns.resources.active) { + return NVME_TOO_MANY_ACTIVE_ZONES; + } + + if (!ns->zns.resources.open) { + return NVME_TOO_MANY_OPEN_ZONES; + } + + ns->zns.resources.active--; + ns->zns.resources.open--; + + nvme_zs_set(zone, to); + + return NVME_SUCCESS; + + default: + return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; + } + + case NVME_ZS_ZSEO: + switch (to) { + case NVME_ZS_ZSIO: + return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; + default: + break; + } + + /* fallthrough */ + + case NVME_ZS_ZSIO: + switch (to) { + case NVME_ZS_ZSEO: + nvme_zs_set(zone, to); + return NVME_SUCCESS; + + case NVME_ZS_ZSE: + case NVME_ZS_ZSF: + case NVME_ZS_ZSRO: + case NVME_ZS_ZSO: + ns->zns.resources.active++; + + /* fallthrough */ + + case NVME_ZS_ZSC: + ns->zns.resources.open++; + + nvme_zs_set(zone, to); + + return NVME_SUCCESS; + + default: + return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; + } + + case NVME_ZS_ZSC: + switch (to) { + case NVME_ZS_ZSE: + case NVME_ZS_ZSF: + case NVME_ZS_ZSRO: + case NVME_ZS_ZSO: + ns->zns.resources.active++; + nvme_zs_set(zone, to); + + return NVME_SUCCESS; + + case NVME_ZS_ZSIO: + case NVME_ZS_ZSEO: + if (!ns->zns.resources.open) { + return NVME_TOO_MANY_OPEN_ZONES; + } + + ns->zns.resources.open--; + + nvme_zs_set(zone, to); + + return NVME_SUCCESS; + + default: + return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; + } + + case NVME_ZS_ZSRO: + switch (to) { + case NVME_ZS_ZSO: + nvme_zs_set(zone, to); + return NVME_SUCCESS; + default: + return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; + } + + case NVME_ZS_ZSF: + switch (to) { + case NVME_ZS_ZSE: + case NVME_ZS_ZSRO: + case NVME_ZS_ZSO: + nvme_zs_set(zone, to); + return NVME_SUCCESS; + default: + return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; + } + + case NVME_ZS_ZSO: + return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; + + default: + return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; + } +} + static void nvme_aio_write_cb(NvmeAIO *aio, void *opaque, int ret) { NvmeRequest *req =3D aio->req; @@ -1212,7 +1361,8 @@ static void nvme_zone_advance_wp(NvmeZone *zone, uint= 32_t nlb, =20 wp +=3D nlb; if (wp =3D=3D zslba + nvme_zcap(zone)) { - nvme_zs_set(zone, NVME_ZS_ZSF); + /* if we cannot transition to ZFS something is horribly wrong */ + assert(nvme_zrm_transition(req->ns, zone, NVME_ZS_ZSF) =3D=3D NVME= _SUCCESS); } =20 zd->wp =3D cpu_to_le64(wp); @@ -1280,7 +1430,8 @@ static void nvme_aio_zone_reset_cb(NvmeAIO *aio, void= *opaque, int ret) =20 trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), ns->params.nsid, zslba= ); =20 - nvme_zs_set(zone, NVME_ZS_ZSE); + /* if we cannot transition to ZSE something is horribly wrong */ + assert(nvme_zrm_transition(ns, zone, NVME_ZS_ZSE) =3D=3D NVME_SUCCESS); NVME_ZA_CLEAR(zone->zd.za); =20 zone->zd.wp =3D zone->zd.zslba; @@ -1360,7 +1511,7 @@ static void nvme_aio_cb(void *opaque, int ret) if (nvme_ns_zoned(ns)) { NvmeZone *zone =3D nvme_ns_get_zone(ns, req->slba); =20 - nvme_zs_set(zone, NVME_ZS_ZSO); + assert(!nvme_zrm_transition(ns, zone, NVME_ZS_ZSO)); NVME_ZA_CLEAR(zone->zd.za); =20 nvme_update_zone_info(ns, req, zone); @@ -1431,10 +1582,11 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest= *req) } =20 static uint16_t nvme_do_zone_append(NvmeCtrl *n, NvmeRequest *req, - NvmeZone *zone) + NvmeZone *zone) { NvmeAIO *aio; NvmeNamespace *ns =3D req->ns; + NvmeZoneState zs_orig =3D nvme_zs(zone); =20 uint64_t zslba =3D nvme_zslba(zone); uint64_t wp =3D zone->wp_staging; @@ -1459,17 +1611,20 @@ static uint16_t nvme_do_zone_append(NvmeCtrl *n, Nv= meRequest *req, goto invalid; } =20 - switch (nvme_zs(zone)) { - case NVME_ZS_ZSE: - case NVME_ZS_ZSC: - nvme_zs_set(zone, NVME_ZS_ZSIO); - default: + switch (zs_orig) { + case NVME_ZS_ZSIO: + case NVME_ZS_ZSEO: break; + default: + status =3D nvme_zrm_transition(ns, zone, NVME_ZS_ZSIO); + if (status) { + goto invalid; + } } =20 status =3D nvme_map(n, len, req); if (status) { - goto invalid; + goto zrm_revert; } =20 aio =3D g_new0(NvmeAIO, 1); @@ -1496,6 +1651,10 @@ static uint16_t nvme_do_zone_append(NvmeCtrl *n, Nvm= eRequest *req, =20 return NVME_NO_COMPLETE; =20 +zrm_revert: + /* if we cannot revert the transition something is horribly wrong */ + assert(nvme_zrm_transition(ns, zone, zs_orig) =3D=3D NVME_SUCCESS); + invalid: block_acct_invalid(blk_get_stats(ns->blk), BLOCK_ACCT_WRITE); return status; @@ -1532,91 +1691,66 @@ static uint16_t nvme_zone_mgmt_send_close(NvmeCtrl = *n, NvmeRequest *req, NvmeZone *zone) { NvmeNamespace *ns =3D req->ns; + NvmeZoneState zs =3D nvme_zs(zone); + uint16_t status; =20 trace_pci_nvme_zone_mgmt_send_close(nvme_cid(req), nvme_nsid(ns), nvme_zslba(zone), nvme_zs_str(zone= )); =20 - - switch (nvme_zs(zone)) { - case NVME_ZS_ZSIO: - case NVME_ZS_ZSEO: - nvme_zs_set(zone, NVME_ZS_ZSC); - - nvme_update_zone_info(ns, req, zone); - - return NVME_NO_COMPLETE; - - case NVME_ZS_ZSC: - return NVME_SUCCESS; - - default: - break; + /* + * The state machine in nvme_zrm_transition allows zones to transition= fram + * ZSE to ZSC. That transition is only valid if done as part Set Zone + * Descriptor, so do an early check here. + */ + if (zs =3D=3D NVME_ZS_ZSE) { + return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; } =20 - trace_pci_nvme_err_invalid_zone_condition(nvme_cid(req), nvme_zslba(zo= ne), - nvme_zs(zone)); - return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; + status =3D nvme_zrm_transition(ns, zone, NVME_ZS_ZSC); + if (status) { + return status; + } + + nvme_update_zone_info(ns, req, zone); + + return NVME_NO_COMPLETE; } =20 static uint16_t nvme_zone_mgmt_send_finish(NvmeCtrl *n, NvmeRequest *req, NvmeZone *zone) { NvmeNamespace *ns =3D req->ns; + uint16_t status; =20 trace_pci_nvme_zone_mgmt_send_finish(nvme_cid(req), nvme_nsid(ns), nvme_zslba(zone), nvme_zs_str(zon= e)); =20 - - switch (nvme_zs(zone)) { - case NVME_ZS_ZSIO: - case NVME_ZS_ZSEO: - case NVME_ZS_ZSC: - case NVME_ZS_ZSE: - nvme_zs_set(zone, NVME_ZS_ZSF); - - nvme_update_zone_info(ns, req, zone); - - return NVME_NO_COMPLETE; - - case NVME_ZS_ZSF: - return NVME_SUCCESS; - - default: - break; + status =3D nvme_zrm_transition(ns, zone, NVME_ZS_ZSF); + if (status) { + return status; } =20 - trace_pci_nvme_err_invalid_zone_condition(nvme_cid(req), nvme_zslba(zo= ne), - nvme_zs(zone)); - return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; + nvme_update_zone_info(ns, req, zone); + return NVME_NO_COMPLETE; } =20 static uint16_t nvme_zone_mgmt_send_open(NvmeCtrl *n, NvmeRequest *req, NvmeZone *zone) { NvmeNamespace *ns =3D req->ns; + uint16_t status; =20 trace_pci_nvme_zone_mgmt_send_open(nvme_cid(req), nvme_nsid(ns), nvme_zslba(zone), nvme_zs_str(zone)= ); =20 - switch (nvme_zs(zone)) { - case NVME_ZS_ZSE: - case NVME_ZS_ZSC: - case NVME_ZS_ZSIO: - nvme_zs_set(zone, NVME_ZS_ZSEO); - - nvme_update_zone_info(ns, req, zone); - return NVME_NO_COMPLETE; - - case NVME_ZS_ZSEO: - return NVME_SUCCESS; - - default: - break; + status =3D nvme_zrm_transition(ns, zone, NVME_ZS_ZSEO); + if (status) { + return status; } =20 - trace_pci_nvme_err_invalid_zone_condition(nvme_cid(req), nvme_zslba(zo= ne), - nvme_zs(zone)); - return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; + nvme_update_zone_info(ns, req, zone); + + return NVME_NO_COMPLETE; } =20 static uint16_t nvme_zone_mgmt_send_reset(NvmeCtrl *n, NvmeRequest *req, @@ -1624,6 +1758,7 @@ static uint16_t nvme_zone_mgmt_send_reset(NvmeCtrl *n= , NvmeRequest *req, { NvmeAIO *aio; NvmeNamespace *ns =3D req->ns; + NvmeZoneState zs =3D nvme_zs(zone); uint64_t zslba =3D nvme_zslba(zone); uint64_t zcap =3D nvme_zcap(zone); uint8_t lbads =3D nvme_ns_lbads(ns); @@ -1631,7 +1766,10 @@ static uint16_t nvme_zone_mgmt_send_reset(NvmeCtrl *= n, NvmeRequest *req, trace_pci_nvme_zone_mgmt_send_reset(nvme_cid(req), nvme_nsid(ns), nvme_zslba(zone), nvme_zs_str(zone= )); =20 - switch (nvme_zs(zone)) { + switch (zs) { + case NVME_ZS_ZSE: + return NVME_SUCCESS; + case NVME_ZS_ZSIO: case NVME_ZS_ZSEO: case NVME_ZS_ZSC: @@ -1653,18 +1791,13 @@ static uint16_t nvme_zone_mgmt_send_reset(NvmeCtrl = *n, NvmeRequest *req, =20 return NVME_NO_COMPLETE; =20 - case NVME_ZS_ZSE: - return NVME_SUCCESS; - case NVME_ZS_ZSRO: - nvme_zs_set(zone, NVME_ZS_ZSO); - + assert(nvme_zrm_transition(ns, zone, NVME_ZS_ZSO) =3D=3D NVME_SUCC= ESS); nvme_update_zone_info(ns, req, zone); - return NVME_NO_COMPLETE; =20 - default: - break; + case NVME_ZS_ZSO: + return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; } =20 trace_pci_nvme_err_invalid_zone_condition(nvme_cid(req), nvme_zslba(zo= ne), @@ -1682,14 +1815,10 @@ static uint16_t nvme_zone_mgmt_send_offline(NvmeCtr= l *n, NvmeRequest *req, =20 switch (nvme_zs(zone)) { case NVME_ZS_ZSRO: - nvme_zs_set(zone, NVME_ZS_ZSO); - + assert(!nvme_zrm_transition(ns, zone, NVME_ZS_ZSO)); nvme_update_zone_info(ns, req, zone); return NVME_NO_COMPLETE; =20 - case NVME_ZS_ZSO: - return NVME_SUCCESS; - default: break; } @@ -1715,11 +1844,15 @@ static uint16_t nvme_zone_mgmt_send_set_zde(NvmeCtr= l *n, NvmeRequest *req, return NVME_INVALID_ZONE_STATE_TRANSITION | NVME_DNR; } =20 - nvme_zs_set(zone, NVME_ZS_ZSEO); + status =3D nvme_zrm_transition(ns, zone, NVME_ZS_ZSC); + if (status) { + return status; + } =20 status =3D nvme_dma(n, zone->zde, nvme_ns_zdes_bytes(ns), DMA_DIRECTION_TO_DEVICE, req); if (status) { + assert(!nvme_zrm_transition(ns, zone, NVME_ZS_ZSE)); return status; } =20 @@ -2024,11 +2157,14 @@ static uint16_t nvme_do_rw(NvmeCtrl *n, NvmeRequest= *req) =20 if (nvme_req_is_write(req)) { switch (nvme_zs(zone)) { - case NVME_ZS_ZSE: - case NVME_ZS_ZSC: - nvme_zs_set(zone, NVME_ZS_ZSIO); - default: + case NVME_ZS_ZSIO: + case NVME_ZS_ZSEO: break; + default: + status =3D nvme_zrm_transition(ns, zone, NVME_ZS_ZSIO); + if (status) { + return status; + } } =20 cb =3D nvme_aio_zone_write_cb; --=20 2.27.0