From: Svetly Todorov <svetly.todorov@memverge.com>
Introduce an API for validating DC adds, removes, and responses
against a multi-headed device.
mhd_reserve_extents() is called during a DC add request. This allows
a multi-headed device to check whether the requested extents belong
to another host. If not, then this function can claim those extents
in the MHD state and allow the cxl_type3 code to follow suit in the
host-local blk_bitmap.
mhd_reclaim_extents() is called during the DC add response. It allows
the MHD to reclaim extents that were preallocated to a host during the
request but rejected in the response.
mhd_release_extent() is called during the DC release response. It can
be invoked after a host frees an extent in its local bitmap, allowing
the MHD handler to release that same extent in the multi-host state.
Signed-off-by: Gregory Price <gourry@gourry.net>
Signed-off-by: Svetly Todorov <svetly.todorov@memverge.com>
---
hw/cxl/cxl-mailbox-utils.c | 28 +++++++++++++++++++++++++++-
hw/mem/cxl_type3.c | 17 +++++++++++++++++
include/hw/cxl/cxl_device.h | 8 ++++++++
3 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 10de26605c..112272e9ac 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -2545,6 +2545,7 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
{
CXLUpdateDCExtentListInPl *in = (void *)payload_in;
CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
CXLDCExtentList *extent_list = &ct3d->dc.extents;
uint32_t i;
uint64_t dpa, len;
@@ -2579,6 +2580,11 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
ct3d->dc.total_extent_count += 1;
ct3_set_region_block_backed(ct3d, dpa, len);
}
+
+ if (cvc->mhd_reclaim_extents)
+ cvc->mhd_reclaim_extents(&ct3d->parent_obj, &ct3d->dc.extents_pending,
+ in);
+
/* Remove the first extent group in the pending list */
cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending);
@@ -2612,6 +2618,7 @@ static CXLRetCode cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
uint32_t *updated_list_size)
{
CXLDCExtent *ent, *ent_next;
+ CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
uint64_t dpa, len;
uint32_t i;
int cnt_delta = 0;
@@ -2632,6 +2639,13 @@ static CXLRetCode cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
goto free_and_exit;
}
+ /* In an MHD, check that this DPA range belongs to this host */
+ if (cvc->mhd_access_valid &&
+ !cvc->mhd_access_valid(&ct3d->parent_obj, dpa, len)) {
+ ret = CXL_MBOX_INVALID_PA;
+ goto free_and_exit;
+ }
+
/* After this point, extent overflow is the only error can happen */
while (len > 0) {
QTAILQ_FOREACH(ent, updated_list, node) {
@@ -2704,9 +2718,11 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
{
CXLUpdateDCExtentListInPl *in = (void *)payload_in;
CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
CXLDCExtentList updated_list;
CXLDCExtent *ent, *ent_next;
- uint32_t updated_list_size;
+ uint32_t updated_list_size, i;
+ uint64_t dpa, len;
CXLRetCode ret;
if (in->num_entries_updated == 0) {
@@ -2724,6 +2740,16 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
return ret;
}
+ /* Updated_entries contains the released extents. Free those in the MHD */
+ for (i = 0; cvc->mhd_release_extent && i < in->num_entries_updated; ++i) {
+ dpa = in->updated_entries[i].start_dpa;
+ len = in->updated_entries[i].len;
+
+ if (cvc->mhd_release_extent) {
+ cvc->mhd_release_extent(&ct3d->parent_obj, dpa, len);
+ }
+ }
+
/*
* If the dry run release passes, the returned updated_list will
* be the updated extent list and we just need to clear the extents
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index b7b24b6a32..a94b9931d2 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -799,6 +799,7 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
{
CXLDCExtent *ent, *ent_next;
CXLDCExtentGroup *group, *group_next;
+ CXLType3Class *cvc = CXL_TYPE3_CLASS(ct3d);
int i;
CXLDCRegion *region;
@@ -817,6 +818,10 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
for (i = 0; i < ct3d->dc.num_regions; i++) {
region = &ct3d->dc.regions[i];
g_free(region->blk_bitmap);
+ if (cvc->mhd_release_extent) {
+ cvc->mhd_release_extent(&ct3d->parent_obj, region->base,
+ region->len);
+ }
}
}
@@ -2077,6 +2082,7 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
CXLEventDynamicCapacity dCap = {};
CXLEventRecordHdr *hdr = &dCap.hdr;
CXLType3Dev *dcd;
+ CXLType3Class *cvc;
uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
uint32_t num_extents = 0;
CxlDynamicCapacityExtentList *list;
@@ -2094,6 +2100,7 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
}
dcd = CXL_TYPE3(obj);
+ cvc = CXL_TYPE3_GET_CLASS(dcd);
if (!dcd->dc.num_regions) {
error_setg(errp, "No dynamic capacity support from the device");
return;
@@ -2166,6 +2173,13 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
num_extents++;
}
+ /* If this is an MHD, attempt to reserve the extents */
+ if (type == DC_EVENT_ADD_CAPACITY && cvc->mhd_reserve_extents &&
+ !cvc->mhd_reserve_extents(&dcd->parent_obj, records, rid)) {
+ error_setg(errp, "mhsld is enabled and extent reservation failed");
+ return;
+ }
+
/* Create extent list for event being passed to host */
i = 0;
list = records;
@@ -2304,6 +2318,9 @@ static void ct3_class_init(ObjectClass *oc, void *data)
cvc->set_cacheline = set_cacheline;
cvc->mhd_get_info = NULL;
cvc->mhd_access_valid = NULL;
+ cvc->mhd_reserve_extents = NULL;
+ cvc->mhd_reclaim_extents = NULL;
+ cvc->mhd_release_extent = NULL;
}
static const TypeInfo ct3d_info = {
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index b2dc7fb769..13c97b576f 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -14,6 +14,7 @@
#include "hw/pci/pci_device.h"
#include "hw/register.h"
#include "hw/cxl/cxl_events.h"
+#include "qapi/qapi-commands-cxl.h"
#include "hw/cxl/cxl_cpmu.h"
/*
@@ -682,6 +683,13 @@ struct CXLType3Class {
size_t *len_out,
CXLCCI *cci);
bool (*mhd_access_valid)(PCIDevice *d, uint64_t addr, unsigned int size);
+ bool (*mhd_reserve_extents)(PCIDevice *d,
+ CxlDynamicCapacityExtentList *records,
+ uint8_t rid);
+ bool (*mhd_reclaim_extents)(PCIDevice *d,
+ CXLDCExtentGroupList *groups,
+ CXLUpdateDCExtentListInPl *in);
+ bool (*mhd_release_extent)(PCIDevice *d, uint64_t dpa, uint64_t len);
};
struct CSWMBCCIDev {
--
2.43.0
On Fri, 18 Oct 2024 12:12:51 -0400
Gregory Price <gourry@gourry.net> wrote:
> From: Svetly Todorov <svetly.todorov@memverge.com>
>
> Introduce an API for validating DC adds, removes, and responses
> against a multi-headed device.
>
> mhd_reserve_extents() is called during a DC add request. This allows
> a multi-headed device to check whether the requested extents belong
> to another host. If not, then this function can claim those extents
> in the MHD state and allow the cxl_type3 code to follow suit in the
> host-local blk_bitmap.
>
> mhd_reclaim_extents() is called during the DC add response. It allows
> the MHD to reclaim extents that were preallocated to a host during the
> request but rejected in the response.
>
> mhd_release_extent() is called during the DC release response. It can
> be invoked after a host frees an extent in its local bitmap, allowing
> the MHD handler to release that same extent in the multi-host state.
>
> Signed-off-by: Gregory Price <gourry@gourry.net>
> Signed-off-by: Svetly Todorov <svetly.todorov@memverge.com>
Hi Gregory, Svetly,
A few minor comments inline. I want to think a bit more on the general
approach before providing a broader reply.
If I apply this on my cxl staging tree an can easily tidy the stuff up I mention
whilst doing so.
Thanks,
Jonathan
> ---
> hw/cxl/cxl-mailbox-utils.c | 28 +++++++++++++++++++++++++++-
> hw/mem/cxl_type3.c | 17 +++++++++++++++++
> include/hw/cxl/cxl_device.h | 8 ++++++++
> 3 files changed, 52 insertions(+), 1 deletion(-)
>
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 10de26605c..112272e9ac 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -2545,6 +2545,7 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
> {
> CXLUpdateDCExtentListInPl *in = (void *)payload_in;
> CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> + CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
> CXLDCExtentList *extent_list = &ct3d->dc.extents;
> uint32_t i;
> uint64_t dpa, len;
> @@ -2579,6 +2580,11 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
> ct3d->dc.total_extent_count += 1;
> ct3_set_region_block_backed(ct3d, dpa, len);
> }
> +
> + if (cvc->mhd_reclaim_extents)
> + cvc->mhd_reclaim_extents(&ct3d->parent_obj, &ct3d->dc.extents_pending,
> + in);
> +
Trivial but qemu style requires {} always.
I'd also indent that in to be aligned under the &
> /* Remove the first extent group in the pending list */
> cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending);
>
> @@ -2612,6 +2618,7 @@ static CXLRetCode cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
> uint32_t *updated_list_size)
> {
> CXLDCExtent *ent, *ent_next;
> + CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
> uint64_t dpa, len;
> uint32_t i;
> int cnt_delta = 0;
> @@ -2632,6 +2639,13 @@ static CXLRetCode cxl_dc_extent_release_dry_run(CXLType3Dev *ct3d,
> goto free_and_exit;
> }
>
> + /* In an MHD, check that this DPA range belongs to this host */
> + if (cvc->mhd_access_valid &&
> + !cvc->mhd_access_valid(&ct3d->parent_obj, dpa, len)) {
> + ret = CXL_MBOX_INVALID_PA;
> + goto free_and_exit;
> + }
> +
> /* After this point, extent overflow is the only error can happen */
> while (len > 0) {
> QTAILQ_FOREACH(ent, updated_list, node) {
> @@ -2704,9 +2718,11 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
> {
> CXLUpdateDCExtentListInPl *in = (void *)payload_in;
> CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> + CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d);
> CXLDCExtentList updated_list;
> CXLDCExtent *ent, *ent_next;
> - uint32_t updated_list_size;
> + uint32_t updated_list_size, i;
> + uint64_t dpa, len;
> CXLRetCode ret;
>
> if (in->num_entries_updated == 0) {
> @@ -2724,6 +2740,16 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
> return ret;
> }
>
> + /* Updated_entries contains the released extents. Free those in the MHD */
> + for (i = 0; cvc->mhd_release_extent && i < in->num_entries_updated; ++i) {
local style is post increment.
Also, indent isn't too bad if you pull the mhd_release_extent out of the loop
condition as an if statement. It think that ends up more reaable.
> + dpa = in->updated_entries[i].start_dpa;
> + len = in->updated_entries[i].len;
> +
> + if (cvc->mhd_release_extent) {
Checked in loop condition as well so this isn't needed.
> + cvc->mhd_release_extent(&ct3d->parent_obj, dpa, len);
> + }
> + }
> +
> /*
> * If the dry run release passes, the returned updated_list will
> * be the updated extent list and we just need to clear the extents
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index b7b24b6a32..a94b9931d2 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -799,6 +799,7 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> {
> CXLDCExtent *ent, *ent_next;
> CXLDCExtentGroup *group, *group_next;
> + CXLType3Class *cvc = CXL_TYPE3_CLASS(ct3d);
> int i;
> CXLDCRegion *region;
>
> @@ -817,6 +818,10 @@ static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> for (i = 0; i < ct3d->dc.num_regions; i++) {
> region = &ct3d->dc.regions[i];
> g_free(region->blk_bitmap);
> + if (cvc->mhd_release_extent) {
> + cvc->mhd_release_extent(&ct3d->parent_obj, region->base,
> + region->len);
Indent to align after (
> + }
> }
> }
>
> @@ -2077,6 +2082,7 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
> CXLEventDynamicCapacity dCap = {};
> CXLEventRecordHdr *hdr = &dCap.hdr;
> CXLType3Dev *dcd;
> + CXLType3Class *cvc;
> uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
> uint32_t num_extents = 0;
> CxlDynamicCapacityExtentList *list;
> @@ -2094,6 +2100,7 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
> }
>
> dcd = CXL_TYPE3(obj);
> + cvc = CXL_TYPE3_GET_CLASS(dcd);
> if (!dcd->dc.num_regions) {
> error_setg(errp, "No dynamic capacity support from the device");
> return;
> @@ -2166,6 +2173,13 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
> num_extents++;
> }
>
> + /* If this is an MHD, attempt to reserve the extents */
> + if (type == DC_EVENT_ADD_CAPACITY && cvc->mhd_reserve_extents &&
> + !cvc->mhd_reserve_extents(&dcd->parent_obj, records, rid)) {
> + error_setg(errp, "mhsld is enabled and extent reservation failed");
I'd reorganize this so you can get the return value. It might be useful in the long run.
> + return;
> + }
> +
> /* Create extent list for event being passed to host */
> i = 0;
> list = records;
> @@ -2304,6 +2318,9 @@ static void ct3_class_init(ObjectClass *oc, void *data)
> cvc->set_cacheline = set_cacheline;
> cvc->mhd_get_info = NULL;
> cvc->mhd_access_valid = NULL;
> + cvc->mhd_reserve_extents = NULL;
> + cvc->mhd_reclaim_extents = NULL;
> + cvc->mhd_release_extent = NULL;
> }
>
> static const TypeInfo ct3d_info = {
> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index b2dc7fb769..13c97b576f 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -14,6 +14,7 @@
> #include "hw/pci/pci_device.h"
> #include "hw/register.h"
> #include "hw/cxl/cxl_events.h"
> +#include "qapi/qapi-commands-cxl.h"
>
> #include "hw/cxl/cxl_cpmu.h"
> /*
> @@ -682,6 +683,13 @@ struct CXLType3Class {
> size_t *len_out,
> CXLCCI *cci);
> bool (*mhd_access_valid)(PCIDevice *d, uint64_t addr, unsigned int size);
> + bool (*mhd_reserve_extents)(PCIDevice *d,
> + CxlDynamicCapacityExtentList *records,
> + uint8_t rid);
> + bool (*mhd_reclaim_extents)(PCIDevice *d,
> + CXLDCExtentGroupList *groups,
> + CXLUpdateDCExtentListInPl *in);
> + bool (*mhd_release_extent)(PCIDevice *d, uint64_t dpa, uint64_t len);
> };
>
> struct CSWMBCCIDev {
© 2016 - 2026 Red Hat, Inc.