The CXL Fixed Memory Window Structure (CFMWS) describes zero or more
Host Physical Address (HPA) windows that are associated with each CXL
Host Bridge. Each window represents a contiguous HPA that may be
interleaved with one or more targets (CXL v3.2 - 9.18.1.3).
The Low Memory Hole (LMH) of x86 is a range of addresses of physical low
memory to which systems cannot send transactions. In some case the size
of that hole is not compatible with the constraint that the CFMWS size
shall be multiple of Interleave Ways * 256 MB. (CXL v3.2 - Table 9-22).
On those systems, the BIOS publishes CFMWS which communicate the active
System Physical Address (SPA) ranges that map to a subset of the Host
Physical Address (HPA) ranges. The SPA range trims out the hole, and the
capacity in the endpoint is lost with no SPA to map to CXL HPA in that
hole.
In the early stages of CXL regions construction and attach on platforms
that have Low Memory Holes, cxl_add_to_region() fails and returns an
error for it can't find any CFMWS range that matches a given endpoint
decoder.
Detect an LMH by comparing root decoder and endpoint decoder range.
Match root decoders HPA range and constructed region with the
corresponding endpoint decoders. Construct CXL region with the end of
its HPA ranges end adjusted to the matching SPA and adjust the DPA
resource end of the hardware decoders to fit the region. Allow the
attach target process to complete by allowing regions and decoders to
bypass the constraints that don't hold when an LMH is present.[1]
[1] commit 7a81173f3 ("cxl: Documentation/driver-api/cxl: Describe the x86 Low Memory Hole solution")
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
---
drivers/cxl/core/region.c | 47 ++++++++++++++++++++++++++++++++-------
tools/testing/cxl/Kbuild | 1 +
2 files changed, 40 insertions(+), 8 deletions(-)
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 43a854036202..9a499bfca23d 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -14,6 +14,7 @@
#include <linux/string_choices.h>
#include <cxlmem.h>
#include <cxl.h>
+#include "platform_quirks.h"
#include "core.h"
/**
@@ -841,6 +842,8 @@ static int match_free_decoder(struct device *dev, const void *data)
static bool region_res_match_cxl_range(const struct cxl_region_params *p,
struct range *range)
{
+ struct cxl_decoder *cxld;
+
if (!p->res)
return false;
@@ -849,8 +852,13 @@ static bool region_res_match_cxl_range(const struct cxl_region_params *p,
* to be fronted by the DRAM range in current known implementation.
* This assumption will be made until a variant implementation exists.
*/
- return p->res->start + p->cache_size == range->start &&
- p->res->end == range->end;
+ if (p->res->start + p->cache_size == range->start &&
+ p->res->end == range->end)
+ return true;
+
+ cxld = container_of(range, struct cxl_decoder, hpa_range);
+
+ return platform_region_matches_cxld(p, cxld);
}
static int match_auto_decoder(struct device *dev, const void *data)
@@ -1770,6 +1778,7 @@ static int match_cxlsd_to_cxled_by_range(struct device *dev, const void *data)
{
const struct cxl_endpoint_decoder *cxled = data;
struct cxl_switch_decoder *cxlsd;
+ struct cxl_root_decoder *cxlrd;
const struct range *r1, *r2;
if (!is_switch_decoder(dev))
@@ -1779,8 +1788,13 @@ static int match_cxlsd_to_cxled_by_range(struct device *dev, const void *data)
r1 = &cxlsd->cxld.hpa_range;
r2 = &cxled->cxld.hpa_range;
- if (is_root_decoder(dev))
- return range_contains(r1, r2);
+ if (is_root_decoder(dev)) {
+ if (range_contains(r1, r2))
+ return 1;
+ cxlrd = to_cxl_root_decoder(dev);
+ if (platform_cxlrd_matches_cxled(cxlrd, cxled))
+ return 1;
+ }
return (r1->start == r2->start && r1->end == r2->end);
}
@@ -1997,7 +2011,7 @@ static int cxl_region_attach(struct cxl_region *cxlr,
}
if (resource_size(cxled->dpa_res) * p->interleave_ways + p->cache_size !=
- resource_size(p->res)) {
+ resource_size(p->res) && !platform_cxlrd_matches_cxled(cxlrd, cxled)) {
dev_dbg(&cxlr->dev,
"%s:%s-size-%#llx * ways-%d + cache-%#llx != region-size-%#llx\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
@@ -3357,7 +3371,8 @@ static int match_cxlrd_to_cxled_by_range(struct device *dev, const void *data)
r1 = &cxlrd->cxlsd.cxld.hpa_range;
r2 = &cxled->cxld.hpa_range;
- return range_contains(r1, r2);
+ return (range_contains(r1, r2)) ||
+ (platform_cxlrd_matches_cxled(cxlrd, cxled));
}
static struct cxl_decoder *
@@ -3406,8 +3421,12 @@ static int match_region_to_cxled_by_range(struct device *dev, const void *data)
p = &cxlr->params;
guard(rwsem_read)(&cxl_rwsem.region);
- if (p->res && p->res->start == r->start && p->res->end == r->end)
- return 1;
+ if (p->res) {
+ if (p->res->start == r->start && p->res->end == r->end)
+ return 1;
+ if (platform_region_matches_cxld(p, &cxled->cxld))
+ return 1;
+ }
return 0;
}
@@ -3479,6 +3498,12 @@ static int __construct_region(struct cxl_region *cxlr,
*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
dev_name(&cxlr->dev));
+ /*
+ * Trim the HPA retrieved from hardware to fit the SPA mapped by the
+ * platform
+ */
+ platform_res_adjust(res, cxled, cxlrd);
+
rc = cxl_extended_linear_cache_resize(cxlr, res);
if (rc && rc != -EOPNOTSUPP) {
/*
@@ -3588,6 +3613,12 @@ int cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
cxl_find_region_by_range(cxlrd, cxled);
if (!cxlr)
cxlr = construct_region(cxlrd, cxled);
+ else
+ /*
+ * Adjust the Endpoint Decoder's dpa_res to fit the Region which
+ * it has to be attached to
+ */
+ platform_res_adjust(NULL, cxled, cxlrd);
mutex_unlock(&cxlrd->range_lock);
rc = PTR_ERR_OR_ZERO(cxlr);
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 0d5ce4b74b9f..205f4c813468 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -61,6 +61,7 @@ cxl_core-y += $(CXL_CORE_SRC)/cdat.o
cxl_core-y += $(CXL_CORE_SRC)/ras.o
cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
+cxl_core-$(CONFIG_CXL_PLATFORM_QUIRKS) += $(CXL_CORE_SRC)/platform_quirks.o
cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o
--
2.50.1
On Mon, Oct 06, 2025 at 05:58:06PM +0200, Fabio M. De Francesco wrote:
> The CXL Fixed Memory Window Structure (CFMWS) describes zero or more
> Host Physical Address (HPA) windows that are associated with each CXL
> Host Bridge. Each window represents a contiguous HPA that may be
> interleaved with one or more targets (CXL v3.2 - 9.18.1.3).
>
> The Low Memory Hole (LMH) of x86 is a range of addresses of physical low
> memory to which systems cannot send transactions. In some case the size
> of that hole is not compatible with the constraint that the CFMWS size
> shall be multiple of Interleave Ways * 256 MB. (CXL v3.2 - Table 9-22).
>
> On those systems, the BIOS publishes CFMWS which communicate the active
> System Physical Address (SPA) ranges that map to a subset of the Host
> Physical Address (HPA) ranges. The SPA range trims out the hole, and the
> capacity in the endpoint is lost with no SPA to map to CXL HPA in that
> hole.
>
> In the early stages of CXL regions construction and attach on platforms
> that have Low Memory Holes, cxl_add_to_region() fails and returns an
> error for it can't find any CFMWS range that matches a given endpoint
> decoder.
>
> Detect an LMH by comparing root decoder and endpoint decoder range.
> Match root decoders HPA range and constructed region with the
> corresponding endpoint decoders. Construct CXL region with the end of
> its HPA ranges end adjusted to the matching SPA and adjust the DPA
> resource end of the hardware decoders to fit the region. Allow the
> attach target process to complete by allowing regions and decoders to
> bypass the constraints that don't hold when an LMH is present.[1]
>
> [1] commit 7a81173f3 ("cxl: Documentation/driver-api/cxl: Describe the x86 Low Memory Hole solution")
>
> Cc: Alison Schofield <alison.schofield@intel.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
> ---
> drivers/cxl/core/region.c | 47 ++++++++++++++++++++++++++++++++-------
> tools/testing/cxl/Kbuild | 1 +
> 2 files changed, 40 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 43a854036202..9a499bfca23d 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -14,6 +14,7 @@
> #include <linux/string_choices.h>
> #include <cxlmem.h>
> #include <cxl.h>
> +#include "platform_quirks.h"
> #include "core.h"
>
snip
> @@ -3479,6 +3498,12 @@ static int __construct_region(struct cxl_region *cxlr,
> *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
> dev_name(&cxlr->dev));
>
> + /*
> + * Trim the HPA retrieved from hardware to fit the SPA mapped by the
> + * platform
> + */
> + platform_res_adjust(res, cxled, cxlrd);
> +
Noted this a bit in other patch, not so sure about that comment.
But anyway, do we really want to say what it is doing or let it be
a mystery of the quirks. I'm really not clear on where we are going
with these quirks and the naming of the helper functions.
If you split into 2 helpers, you can try something like:
*res = platform_adjust_region_resource(...);
And then later, do the endpoint adjust. See below:
> rc = cxl_extended_linear_cache_resize(cxlr, res);
> if (rc && rc != -EOPNOTSUPP) {
> /*
> @@ -3588,6 +3613,12 @@ int cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
> cxl_find_region_by_range(cxlrd, cxled);
> if (!cxlr)
> cxlr = construct_region(cxlrd, cxled);
> + else
> + /*
> + * Adjust the Endpoint Decoder's dpa_res to fit the Region which
> + * it has to be attached to
> + */
> + platform_res_adjust(NULL, cxled, cxlrd);
Following from above, would it work to skip the else, and knowing
that the region resource was adjusted in construct_region(), only
do this here for every cxled that attaches.
cxled = platform_adjust_endpoint_resource(...)
snip to end.
On Mon, Oct 06, 2025 at 05:58:06PM +0200, Fabio M. De Francesco wrote:
> The CXL Fixed Memory Window Structure (CFMWS) describes zero or more
> Host Physical Address (HPA) windows that are associated with each CXL
> Host Bridge. Each window represents a contiguous HPA that may be
> interleaved with one or more targets (CXL v3.2 - 9.18.1.3).
>
...
>
> Cc: Alison Schofield <alison.schofield@intel.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
Couple inlines but just nits
Reviewed-by: Gregory Price <gourry@gourry.net>
> @@ -1770,6 +1778,7 @@ static int match_cxlsd_to_cxled_by_range(struct device *dev, const void *data)
> {
> const struct cxl_endpoint_decoder *cxled = data;
> struct cxl_switch_decoder *cxlsd;
> + struct cxl_root_decoder *cxlrd;
> const struct range *r1, *r2;
>
> if (!is_switch_decoder(dev))
> @@ -1779,8 +1788,13 @@ static int match_cxlsd_to_cxled_by_range(struct device *dev, const void *data)
> r1 = &cxlsd->cxld.hpa_range;
> r2 = &cxled->cxld.hpa_range;
>
> - if (is_root_decoder(dev))
> - return range_contains(r1, r2);
> + if (is_root_decoder(dev)) {
> + if (range_contains(r1, r2))
> + return 1;
> + cxlrd = to_cxl_root_decoder(dev);
> + if (platform_cxlrd_matches_cxled(cxlrd, cxled))
> + return 1;
> + }
Is there any concern for longer term maintainability if addition
match_*() functions are added? Or is this upkeep just the unfortunate
maintenance cost of supportering the quirk?
>
> static struct cxl_decoder *
> @@ -3406,8 +3421,12 @@ static int match_region_to_cxled_by_range(struct device *dev, const void *data)
> p = &cxlr->params;
>
> guard(rwsem_read)(&cxl_rwsem.region);
> - if (p->res && p->res->start == r->start && p->res->end == r->end)
> - return 1;
> + if (p->res) {
> + if (p->res->start == r->start && p->res->end == r->end)
> + return 1;
> + if (platform_region_matches_cxld(p, &cxled->cxld))
> + return 1;
> + }
if (!p->res)
return 0;
if (p->res->start == r->start && p->res->end == r->end)
return 1;
if (platform_region_matches_cxld(p, &cxled->cxld))
return 1;
return 0;
?
I like flat, but I also dislike not-logic. Style choice here, unless
others have a strong feeling this is fine.
~Gregory
On 10/6/25 10:46 AM, Gregory Price wrote:
> On Mon, Oct 06, 2025 at 05:58:06PM +0200, Fabio M. De Francesco wrote:
>> The CXL Fixed Memory Window Structure (CFMWS) describes zero or more
>> Host Physical Address (HPA) windows that are associated with each CXL
>> Host Bridge. Each window represents a contiguous HPA that may be
>> interleaved with one or more targets (CXL v3.2 - 9.18.1.3).
>>
> ...
>>
>> Cc: Alison Schofield <alison.schofield@intel.com>
>> Cc: Dan Williams <dan.j.williams@intel.com>
>> Cc: Dave Jiang <dave.jiang@intel.com>
>> Cc: Ira Weiny <ira.weiny@intel.com>
>> Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
>
> Couple inlines but just nits
>
> Reviewed-by: Gregory Price <gourry@gourry.net>
>
>> @@ -1770,6 +1778,7 @@ static int match_cxlsd_to_cxled_by_range(struct device *dev, const void *data)
>> {
>> const struct cxl_endpoint_decoder *cxled = data;
>> struct cxl_switch_decoder *cxlsd;
>> + struct cxl_root_decoder *cxlrd;
>> const struct range *r1, *r2;
>>
>> if (!is_switch_decoder(dev))
>> @@ -1779,8 +1788,13 @@ static int match_cxlsd_to_cxled_by_range(struct device *dev, const void *data)
>> r1 = &cxlsd->cxld.hpa_range;
>> r2 = &cxled->cxld.hpa_range;
>>
>> - if (is_root_decoder(dev))
>> - return range_contains(r1, r2);
>> + if (is_root_decoder(dev)) {
>> + if (range_contains(r1, r2))
>> + return 1;
>> + cxlrd = to_cxl_root_decoder(dev);
>> + if (platform_cxlrd_matches_cxled(cxlrd, cxled))
>> + return 1;
>> + }
>
> Is there any concern for longer term maintainability if addition
> match_*() functions are added? Or is this upkeep just the unfortunate
> maintenance cost of supportering the quirk?
Suggestions welcome. Would be nice if we have cleaner ways of dealing with this.
>
>>
>> static struct cxl_decoder *
>> @@ -3406,8 +3421,12 @@ static int match_region_to_cxled_by_range(struct device *dev, const void *data)
>> p = &cxlr->params;
>>
>> guard(rwsem_read)(&cxl_rwsem.region);
>> - if (p->res && p->res->start == r->start && p->res->end == r->end)
>> - return 1;
>> + if (p->res) {
>> + if (p->res->start == r->start && p->res->end == r->end)
>> + return 1;
>> + if (platform_region_matches_cxld(p, &cxled->cxld))
>> + return 1;
>> + }
>
>
> if (!p->res)
> return 0;
> if (p->res->start == r->start && p->res->end == r->end)
> return 1;
> if (platform_region_matches_cxld(p, &cxled->cxld))
> return 1;
> return 0;
>
> ?
>
> I like flat, but I also dislike not-logic. Style choice here, unless
> others have a strong feeling this is fine.
More flat is definitely the preferred way. With the changes, the last one we can actually do:
return platform_region_matches_cxld(p, &cxled->cxld));
>
> ~Gregory
On Tue, Oct 07, 2025 at 01:25:11PM -0700, Dave Jiang wrote:
> On 10/6/25 10:46 AM, Gregory Price wrote:
> >> @@ -1779,8 +1788,13 @@ static int match_cxlsd_to_cxled_by_range(struct device *dev, const void *data)
> >> r1 = &cxlsd->cxld.hpa_range;
> >> r2 = &cxled->cxld.hpa_range;
> >>
> >> - if (is_root_decoder(dev))
> >> - return range_contains(r1, r2);
> >> + if (is_root_decoder(dev)) {
> >> + if (range_contains(r1, r2))
> >> + return 1;
> >> + cxlrd = to_cxl_root_decoder(dev);
> >> + if (platform_cxlrd_matches_cxled(cxlrd, cxled))
> >> + return 1;
> >> + }
> >
> > Is there any concern for longer term maintainability if addition
> > match_*() functions are added? Or is this upkeep just the unfortunate
> > maintenance cost of supportering the quirk?
>
> Suggestions welcome. Would be nice if we have cleaner ways of dealing with this.
>
Had a bit of a think about it, but nothing immediately pops out
that doesn't just end with more obfuscation. It is what it is.
~Gregory
© 2016 - 2025 Red Hat, Inc.