From nobody Fri Dec 19 17:35:44 2025 Delivered-To: importer@patchew.org Authentication-Results: mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org ARC-Seal: i=1; a=rsa-sha256; t=1593511745; cv=none; d=zohomail.com; s=zohoarc; b=GIsuMrjjnpALP64uUOnybikqNEV4fF6rGhmVqmNcyrULGJXBKvVNqHSCzaTGUFw+E+DmXT7KykeTleAkArYGGw5uOI7J0QSnGIByiUJMjvFmJLqN88Zy4JqDiRTdZhe3WOyQAJ2IiOb1MQNKE7YuIvk2Kw1gsPId19GDYtAr53M= ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=zohomail.com; s=zohoarc; t=1593511745; h=Content-Transfer-Encoding:Cc:Date:From:In-Reply-To:List-Subscribe:List-Post:List-Id:List-Archive:List-Help:List-Unsubscribe:MIME-Version:Message-ID:References:Sender:Subject:To; bh=C7WRAdFgdnYn0yFQ6JWiYqaNv/IgpfQ7eCsaGK98JnA=; b=CFA0P852p+tKXGFqJ1BT4nVj0FoexvNpMnroiFpBlYhlSsI3vM2amCPftwSSPCdI2PHJ0Ug3vYNcwq2S4E2FokwzinUzHLcENt/bOXi1rkgNt997oDE/STXAklKfIfltqveASZyZbkT7EFd9Q+s9PWx7oCqrXex47ZUHZeJo6tU= ARC-Authentication-Results: i=1; mx.zohomail.com; spf=pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) by mx.zohomail.com with SMTPS id 1593511745438855.1188919561301; Tue, 30 Jun 2020 03:09:05 -0700 (PDT) Received: from localhost ([::1]:53546 helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1jqDCJ-0003Hg-Kr for importer@patchew.org; Tue, 30 Jun 2020 06:09:03 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]:58028) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1jqD5i-0007r1-7R; Tue, 30 Jun 2020 06:02:14 -0400 Received: from charlie.dont.surf ([128.199.63.193]:47648) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1jqD5a-0004MA-5f; Tue, 30 Jun 2020 06:02:13 -0400 Received: from apples.local (80-167-98-190-cable.dk.customer.tdc.net [80.167.98.190]) by charlie.dont.surf (Postfix) with ESMTPSA id 934B7BF7FB; Tue, 30 Jun 2020 10:01:57 +0000 (UTC) From: Klaus Jensen To: qemu-block@nongnu.org Subject: [PATCH 09/10] hw/block/nvme: allow zone excursions Date: Tue, 30 Jun 2020 12:01:38 +0200 Message-Id: <20200630100139.1483002-10-its@irrelevant.dk> X-Mailer: git-send-email 2.27.0 In-Reply-To: <20200630100139.1483002-1-its@irrelevant.dk> References: <20200630100139.1483002-1-its@irrelevant.dk> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Received-SPF: pass (zohomail.com: domain of gnu.org designates 209.51.188.17 as permitted sender) client-ip=209.51.188.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Received-SPF: pass client-ip=128.199.63.193; envelope-from=its@irrelevant.dk; helo=charlie.dont.surf X-detected-operating-system: by eggs.gnu.org: First seen = 2020/06/30 04:46:49 X-ACL-Warn: Detected OS = Linux 3.11 and newer [fuzzy] X-Spam_score_int: -18 X-Spam_score: -1.9 X-Spam_bar: - X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_NONE=0.001, SPF_PASS=-0.001 autolearn=_AUTOLEARN X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kevin Wolf , Niklas Cassel , Damien Le Moal , Dmitry Fomichev , Klaus Jensen , qemu-devel@nongnu.org, Max Reitz , Klaus Jensen , Keith Busch , Javier Gonzalez , Maxim Levitsky , =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= , Matias Bjorling Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" Content-Type: text/plain; charset="utf-8" Allow the controller to release active resources by transitioning zones to the full state. Signed-off-by: Klaus Jensen --- hw/block/nvme-ns.h | 2 + hw/block/nvme.c | 171 ++++++++++++++++++++++++++++++++++++++---- hw/block/trace-events | 4 + include/block/nvme.h | 10 +++ 4 files changed, 174 insertions(+), 13 deletions(-) diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h index 6d3a6dc07cd8..6acda5c2cf3f 100644 --- a/hw/block/nvme-ns.h +++ b/hw/block/nvme-ns.h @@ -75,6 +75,8 @@ typedef struct NvmeNamespace { QTAILQ_HEAD(, NvmeZone) lru_open; QTAILQ_HEAD(, NvmeZone) lru_active; } resources; + + NvmeChangedZoneList changed_list; } zns; } NvmeNamespace; =20 diff --git a/hw/block/nvme.c b/hw/block/nvme.c index f7b4618bc805..6db6daa62bc5 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -859,10 +859,11 @@ static void nvme_process_aers(void *opaque) =20 req =3D n->aer_reqs[n->outstanding_aers]; =20 - result =3D (NvmeAerResult *) &req->cqe.dw0; + result =3D (NvmeAerResult *) &req->cqe.qw0; result->event_type =3D event->result.event_type; result->event_info =3D event->result.event_info; result->log_page =3D event->result.log_page; + result->nsid =3D event->result.nsid; g_free(event); =20 req->status =3D NVME_SUCCESS; @@ -874,8 +875,9 @@ static void nvme_process_aers(void *opaque) } } =20 -static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type, - uint8_t event_info, uint8_t log_page) +static void nvme_enqueue_event(NvmeCtrl *n, NvmeNamespace *ns, + uint8_t event_type, uint8_t event_info, + uint8_t log_page) { NvmeAsyncEvent *event; =20 @@ -893,6 +895,11 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t ev= ent_type, .log_page =3D log_page, }; =20 + if (event_info =3D=3D NVME_AER_INFO_NOTICE_ZONE_DESCR_CHANGED) { + assert(ns); + event->result.nsid =3D ns->params.nsid; + } + QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry); n->aer_queued++; =20 @@ -1187,15 +1194,50 @@ static void nvme_update_zone_descr(NvmeNamespace *n= s, NvmeRequest *req, nvme_req_add_aio(req, aio); } =20 +static void nvme_zone_changed(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zo= ne) +{ + uint16_t num_ids =3D le16_to_cpu(ns->zns.changed_list.num_ids); + + trace_pci_nvme_zone_changed(ns->params.nsid, nvme_zslba(zone)); + + if (num_ids < NVME_CHANGED_ZONE_LIST_MAX_IDS) { + ns->zns.changed_list.ids[num_ids] =3D zone->zd.zslba; + ns->zns.changed_list.num_ids =3D cpu_to_le16(num_ids + 1); + } else { + memset(&ns->zns.changed_list, 0x0, sizeof(NvmeChangedZoneList)); + ns->zns.changed_list.num_ids =3D cpu_to_le16(0xffff); + } + + nvme_enqueue_event(n, ns, NVME_AER_TYPE_NOTICE, + NVME_AER_INFO_NOTICE_ZONE_DESCR_CHANGED, + NVME_LOG_CHANGED_ZONE_LIST); +} + static uint16_t nvme_zrm_transition(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *zone, NvmeZoneState to, NvmeRequest *req); =20 +static void nvme_zone_excursion(NvmeCtrl *n, NvmeNamespace *ns, NvmeZone *= zone, + NvmeRequest *req) +{ + trace_pci_nvme_zone_excursion(ns->params.nsid, nvme_zslba(zone), + nvme_zs_str(zone)); + + assert(nvme_zrm_transition(n, ns, zone, NVME_ZS_ZSF, req) =3D=3D NVME_= SUCCESS); + + NVME_ZA_SET_ZFC(zone->zd.za, 0x1); + + nvme_zone_changed(n, ns, zone); + + nvme_update_zone_info(ns, req, zone); +} + static uint16_t nvme_zrm_release_open(NvmeCtrl *n, NvmeNamespace *ns, NvmeRequest *req) { NvmeZone *candidate; NvmeZoneState zs; + uint16_t status; =20 trace_pci_nvme_zone_zrm_release_open(nvme_cid(req), ns->params.nsid); =20 @@ -1216,12 +1258,73 @@ static uint16_t nvme_zrm_release_open(NvmeCtrl *n, = NvmeNamespace *ns, continue; } =20 - return nvme_zrm_transition(n, ns, candidate, NVME_ZS_ZSC, req); + status =3D nvme_zrm_transition(n, ns, candidate, NVME_ZS_ZSC, req); + if (status) { + return status; + } + + nvme_update_zone_info(ns, req, candidate); + return NVME_SUCCESS; } =20 return NVME_TOO_MANY_OPEN_ZONES; } =20 +static uint16_t nvme_zrm_release_active(NvmeCtrl *n, NvmeNamespace *ns, + NvmeRequest *req) +{ + NvmeIdNsZns *id_ns_zns =3D nvme_ns_id_zoned(ns); + NvmeZone *candidate =3D NULL; + NvmeZoneDescriptor *zd; + NvmeZoneState zs; + + trace_pci_nvme_zone_zrm_release_active(nvme_cid(req), ns->params.nsid); + + /* bail out if Zone Active Excursions are not permitted */ + if (!(le16_to_cpu(id_ns_zns->zoc) & NVME_ID_NS_ZNS_ZOC_ZAE)) { + trace_pci_nvme_zone_zrm_excursion_not_allowed(nvme_cid(req), + ns->params.nsid); + return NVME_TOO_MANY_ACTIVE_ZONES; + } + + QTAILQ_FOREACH(candidate, &ns->zns.resources.lru_active, lru_entry) { + zd =3D &candidate->zd; + zs =3D nvme_zs(candidate); + + trace_pci_nvme_zone_zrm_candidate(nvme_cid(req), ns->params.nsid, + nvme_zslba(candidate), + nvme_wp(candidate), zs); + + goto out; + } + + /* + * If all zone resources are tied up on open zones we have to transiti= on + * one of those to full. + */ + QTAILQ_FOREACH(candidate, &ns->zns.resources.lru_open, lru_entry) { + zd =3D &candidate->zd; + zs =3D nvme_zs(candidate); + + trace_pci_nvme_zone_zrm_candidate(nvme_cid(req), ns->params.nsid, + nvme_zslba(candidate), + nvme_wp(candidate), zs); + + /* the zone cannot be finished if it is currently writing */ + if (candidate->wp_staging !=3D le64_to_cpu(zd->wp)) { + continue; + } + + break; + } + + assert(candidate); + +out: + nvme_zone_excursion(n, ns, candidate, req); + return NVME_SUCCESS; +} + /* * nvme_zrm_transition validates zone state transitions under the constrai= nt of * the Number of Active and Open Resources (NAR and NOR) limits as reporte= d by @@ -1253,8 +1356,10 @@ static uint16_t nvme_zrm_transition(NvmeCtrl *n, Nvm= eNamespace *ns, =20 case NVME_ZS_ZSC: if (!ns->zns.resources.active) { - trace_pci_nvme_err_too_many_active_zones(nvme_cid(req)); - return NVME_TOO_MANY_ACTIVE_ZONES; + status =3D nvme_zrm_release_active(n, ns, req); + if (status) { + return status; + } } =20 ns->zns.resources.active--; @@ -1266,8 +1371,10 @@ static uint16_t nvme_zrm_transition(NvmeCtrl *n, Nvm= eNamespace *ns, case NVME_ZS_ZSIO: case NVME_ZS_ZSEO: if (!ns->zns.resources.active) { - trace_pci_nvme_err_too_many_active_zones(nvme_cid(req)); - return NVME_TOO_MANY_ACTIVE_ZONES; + status =3D nvme_zrm_release_active(n, ns, req); + if (status) { + return status; + } } =20 if (!ns->zns.resources.open) { @@ -2716,6 +2823,41 @@ static uint16_t nvme_effects_log(NvmeCtrl *n, uint32= _t buf_len, uint64_t off, DMA_DIRECTION_FROM_DEVICE, req); } =20 +static uint16_t nvme_changed_zone_info(NvmeCtrl *n, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint32_t nsid =3D le32_to_cpu(req->cmd.nsid); + NvmeNamespace *ns =3D nvme_ns(n, nsid); + uint32_t trans_len; + uint16_t status; + + if (unlikely(!ns)) { + return NVME_INVALID_NSID | NVME_DNR; + } + + if (!nvme_ns_zoned(ns)) { + return NVME_INVALID_LOG_ID | NVME_DNR; + } + + if (off > 4096) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trans_len =3D MIN(4096 - off, buf_len); + + status =3D nvme_dma(n, (uint8_t *) &ns->zns.changed_list + off, trans_= len, + DMA_DIRECTION_FROM_DEVICE, req); + if (status) { + return status; + } + + memset(&ns->zns.changed_list, 0x0, sizeof(NvmeChangedZoneList)); + + nvme_clear_events(n, NVME_AER_TYPE_NOTICE); + + return NVME_SUCCESS; +} + static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) { NvmeCmd *cmd =3D &req->cmd; @@ -2761,6 +2903,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest= *req) return nvme_fw_log_info(n, len, off, req); case NVME_LOG_EFFECTS: return nvme_effects_log(n, len, off, req); + case NVME_LOG_CHANGED_ZONE_LIST: + return nvme_changed_zone_info(n, len, off, req); default: trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); return NVME_INVALID_FIELD | NVME_DNR; @@ -3359,7 +3503,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeReq= uest *req) if (((n->temperature >=3D n->features.temp_thresh_hi) || (n->temperature <=3D n->features.temp_thresh_low)) && NVME_AEC_SMART(n->features.async_config) & NVME_SMART_TEMPERAT= URE) { - nvme_enqueue_event(n, NVME_AER_TYPE_SMART, + nvme_enqueue_event(n, NULL, NVME_AER_TYPE_SMART, NVME_AER_INFO_SMART_TEMP_THRESH, NVME_LOG_SMART_INFO); } @@ -3924,7 +4068,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr,= int val) " sqid=3D%"PRIu32", ignoring", qid); =20 if (n->outstanding_aers) { - nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + nvme_enqueue_event(n, NULL, NVME_AER_TYPE_ERROR, NVME_AER_INFO_ERR_INVALID_DB_REGISTER, NVME_LOG_ERROR_INFO); } @@ -3941,7 +4085,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr,= int val) qid, new_head); =20 if (n->outstanding_aers) { - nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + nvme_enqueue_event(n, NULL, NVME_AER_TYPE_ERROR, NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO); } @@ -3978,7 +4122,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr,= int val) " sqid=3D%"PRIu32", ignoring", qid); =20 if (n->outstanding_aers) { - nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + nvme_enqueue_event(n, NULL, NVME_AER_TYPE_ERROR, NVME_AER_INFO_ERR_INVALID_DB_REGISTER, NVME_LOG_ERROR_INFO); } @@ -3995,7 +4139,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr,= int val) qid, new_tail); =20 if (n->outstanding_aers) { - nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + nvme_enqueue_event(n, NULL, NVME_AER_TYPE_ERROR, NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO); } @@ -4286,6 +4430,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pc= i_dev) id->mdts =3D n->params.mdts; id->ver =3D cpu_to_le32(NVME_SPEC_VER); id->cntrltype =3D 0x1; + id->oaes =3D cpu_to_le32(NVME_OAES_ZDCN); id->oacs =3D cpu_to_le16(0); =20 /* diff --git a/hw/block/trace-events b/hw/block/trace-events index 4b4f2ed7605f..c4c80644f782 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -101,6 +101,10 @@ pci_nvme_update_zone_descr(uint16_t cid, uint32_t nsid= , uint64_t zslba) "cid %"P pci_nvme_zone_zrm_transition(uint16_t cid, uint32_t nsid, uint64_t zslba, = uint8_t from, uint8_t to) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" f= rom 0x%"PRIx8" to 0x%"PRIx8"" pci_nvme_zone_zrm_candidate(uint16_t cid, uint32_t nsid, uint64_t zslba, u= int64_t wp, uint8_t zc) "cid %"PRIu16" nsid %"PRIu32" zslba 0x%"PRIx64" wp = 0x%"PRIx64" zc 0x%"PRIx8"" pci_nvme_zone_zrm_release_open(uint16_t cid, uint32_t nsid) "cid %"PRIu16"= nsid %"PRIu32"" +pci_nvme_zone_zrm_release_active(uint16_t cid, uint32_t nsid) "cid %"PRIu1= 6" nsid %"PRIu32"" +pci_nvme_zone_zrm_excursion_not_allowed(uint16_t cid, uint32_t nsid) "cid = %"PRIu16" nsid %"PRIu32"" +pci_nvme_zone_changed(uint32_t nsid, uint64_t zslba) "nsid %"PRIu32" zslba= 0x%"PRIx64"" +pci_nvme_zone_excursion(uint32_t nsid, uint64_t zslba, const char *zc) "ns= id %"PRIu32" zslba 0x%"PRIx64" zc \"%s\"" pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, inte= rrupt mask set, data=3D0x%"PRIx64", new_mask=3D0x%"PRIx64"" pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, inte= rrupt mask clr, data=3D0x%"PRIx64", new_mask=3D0x%"PRIx64"" pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=3D0= x%"PRIx64"" diff --git a/include/block/nvme.h b/include/block/nvme.h index 68dac2582b06..688ee5496168 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -778,6 +778,7 @@ typedef struct NvmeDsmRange { enum NvmeAsyncEventRequest { NVME_AER_TYPE_ERROR =3D 0, NVME_AER_TYPE_SMART =3D 1, + NVME_AER_TYPE_NOTICE =3D 2, NVME_AER_TYPE_IO_SPECIFIC =3D 6, NVME_AER_TYPE_VENDOR_SPECIFIC =3D 7, NVME_AER_INFO_ERR_INVALID_DB_REGISTER =3D 0, @@ -993,6 +994,14 @@ typedef struct NvmeZoneDescriptor { #define NVME_ZS(zs) (((zs) >> 4) & 0xf) #define NVME_ZS_SET(zs, state) ((zs) =3D ((state) << 4)) =20 +#define NVME_CHANGED_ZONE_LIST_MAX_IDS 511 + +typedef struct NvmeChangedZoneList { + uint16_t num_ids; + uint8_t rsvd2[6]; + uint64_t ids[NVME_CHANGED_ZONE_LIST_MAX_IDS]; +} NvmeChangedZoneList; + #define NVME_ZA_ZFC(za) ((za) & (1 << 0)) #define NVME_ZA_FZR(za) ((za) & (1 << 1)) #define NVME_ZA_RZR(za) ((za) & (1 << 2)) @@ -1428,5 +1437,6 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeEffectsLog) !=3D 4096); QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescriptor) !=3D 64); QEMU_BUILD_BUG_ON(sizeof(NvmeLBAFE) !=3D 16); + QEMU_BUILD_BUG_ON(sizeof(NvmeChangedZoneList) !=3D 4096); } #endif --=20 2.27.0