[PATCH 2/4 v4] cxl/core: Add helpers to detect Low Memory Holes on x86

Fabio M. De Francesco posted 4 patches 2 months, 1 week ago
[PATCH 2/4 v4] cxl/core: Add helpers to detect Low Memory Holes on x86
Posted by Fabio M. De Francesco 2 months, 1 week ago
In x86 with Low memory Hole, the BIOS may publishes CFMWS that describe
SPA ranges which are subsets of the corresponding CXL Endpoint Decoders
HPA's because the CFMWS never intersects LMH's while EP Decoders HPA's
ranges are always guaranteed to align to the NIW * 256M rule.

In order to construct Regions and attach Decoders, the driver needs to
match Root Decoders and Regions with Endpoint Decoders, but it fails and
the entire process returns errors because it doesn't expect to deal with
SPA range lengths smaller than corresponding HPA's.

Introduce functions that indirectly detect x86 LMH's by comparing SPA's
with corresponding HPA's. They will be used in the process of Regions
creation and Endpoint attachments to prevent driver failures in a few
steps of the above-mentioned process.

The helpers return true when HPA/SPA misalignments are detected under
specific conditions: both the SPA and HPA ranges must start at
LMH_CFMWS_RANGE_START (that in x86 with LMH's is 0x0), SPA range sizes
be less than HPA's, SPA's range's size be less than 4G, HPA's size be
aligned to the NIW * 256M rule.

Also introduce a function to adjust the range end of the Regions to be
created on x86 with LMH's.

Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
---
 drivers/cxl/Kconfig         |  5 +++
 drivers/cxl/core/Makefile   |  1 +
 drivers/cxl/core/platform.c | 85 +++++++++++++++++++++++++++++++++++++
 drivers/cxl/core/platform.h | 32 ++++++++++++++
 4 files changed, 123 insertions(+)
 create mode 100644 drivers/cxl/core/platform.c
 create mode 100644 drivers/cxl/core/platform.h

diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 48b7314afdb8..eca90baeac10 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -211,6 +211,11 @@ config CXL_REGION
 
 	  If unsure say 'y'
 
+config CXL_PLATFORM_QUIRKS
+	def_bool y
+	depends on CXL_REGION
+	depends on X86
+
 config CXL_REGION_INVALIDATION_TEST
 	bool "CXL: Region Cache Management Bypass (TEST)"
 	depends on CXL_REGION
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 79e2ef81fde8..4be729fb7d64 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -18,6 +18,7 @@ cxl_core-y += ras.o
 cxl_core-y += acpi.o
 cxl_core-$(CONFIG_TRACING) += trace.o
 cxl_core-$(CONFIG_CXL_REGION) += region.o
+cxl_core-$(CONFIG_CXL_PLATFORM_QUIRKS) += platform.o
 cxl_core-$(CONFIG_CXL_MCE) += mce.o
 cxl_core-$(CONFIG_CXL_FEATURES) += features.o
 cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
diff --git a/drivers/cxl/core/platform.c b/drivers/cxl/core/platform.c
new file mode 100644
index 000000000000..8202750742d0
--- /dev/null
+++ b/drivers/cxl/core/platform.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/range.h>
+#include "platform.h"
+#include "cxlmem.h"
+#include "core.h"
+
+/* Start of CFMWS range that end before x86 Low Memory Holes */
+#define LMH_CFMWS_RANGE_START 0x0ULL
+
+/*
+ * Match CXL Root and Endpoint Decoders by comparing SPA and HPA ranges.
+ *
+ * On x86, CFMWS ranges never intersect memory holes while endpoint decoders
+ * HPA range sizes are always guaranteed aligned to NIW * 256MB; therefore,
+ * the given endpoint decoder HPA range size is always expected aligned and
+ * also larger than that of the matching root decoder. If there are LMH's,
+ * the root decoder range end is always less than SZ_4G.
+ */
+bool platform_root_decoder_contains(const struct cxl_root_decoder *cxlrd,
+				    const struct cxl_endpoint_decoder *cxled)
+{
+	const struct range *r1, *r2;
+	int niw;
+
+	r1 = &cxlrd->cxlsd.cxld.hpa_range;
+	r2 = &cxled->cxld.hpa_range;
+	niw = cxled->cxld.interleave_ways;
+
+	if (r1->start == LMH_CFMWS_RANGE_START && r1->start == r2->start &&
+	    r1->end < (LMH_CFMWS_RANGE_START + SZ_4G) && r1->end < r2->end &&
+	    IS_ALIGNED(range_len(r2), niw * SZ_256M))
+		return true;
+
+	return false;
+}
+
+/*
+ * Similar to platform_root_decoder_contains(), it matches regions and
+ * decoders
+ */
+bool platform_region_contains(const struct cxl_region_params *p,
+			      const struct cxl_decoder *cxld)
+{
+	const struct range *r = &cxld->hpa_range;
+	const struct resource *res = p->res;
+	int niw = cxld->interleave_ways;
+
+	if (res->start == LMH_CFMWS_RANGE_START && res->start == r->start &&
+	    res->end < (LMH_CFMWS_RANGE_START + SZ_4G) && res->end < r->end &&
+	    IS_ALIGNED(range_len(r), niw * SZ_256M))
+		return true;
+
+	return false;
+}
+
+void platform_res_adjust(struct resource *res,
+			 struct cxl_endpoint_decoder *cxled,
+			 const struct cxl_root_decoder *cxlrd)
+{
+	if (!platform_root_decoder_contains(cxlrd, cxled))
+		return;
+
+	guard(rwsem_write)(&cxl_dpa_rwsem);
+	dev_info(cxled_to_memdev(cxled)->dev.parent,
+		 "(LMH) Resources were (%s: %pr, %pr)\n",
+		 dev_name(&cxled->cxld.dev), res, cxled->dpa_res);
+	if (res) {
+		/*
+		 * A region must be constructed with Endpoint Decoder's
+		 * HPA range end adjusted to Root Decoder's resource end
+		 */
+		res->end = cxlrd->res->end;
+	}
+	/*
+	 * The Endpoint Decoder's dpa_res->end must be adjusted with Root
+	 * Decoder's resource end
+	 */
+	cxled->dpa_res->end =
+		cxled->dpa_res->start +
+		resource_size(cxlrd->res) / cxled->cxld.interleave_ways - 1;
+	dev_info(cxled_to_memdev(cxled)->dev.parent,
+		 "(LMH) Resources have been adjusted (%s: %pr, %pr)\n",
+		 dev_name(&cxled->cxld.dev), res, cxled->dpa_res);
+}
diff --git a/drivers/cxl/core/platform.h b/drivers/cxl/core/platform.h
new file mode 100644
index 000000000000..0baa39938729
--- /dev/null
+++ b/drivers/cxl/core/platform.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include "cxl.h"
+
+#ifdef CONFIG_CXL_PLATFORM_QUIRKS
+bool platform_root_decoder_contains(const struct cxl_root_decoder *cxlrd,
+				    const struct cxl_endpoint_decoder *cxled);
+bool platform_region_contains(const struct cxl_region_params *p,
+			      const struct cxl_decoder *cxld);
+void platform_res_adjust(struct resource *res,
+			 struct cxl_endpoint_decoder *cxled,
+			 const struct cxl_root_decoder *cxlrd);
+#else
+static bool
+platform_root_decoder_contains(const struct cxl_root_decoder *cxlrd,
+			       const struct cxl_endpoint_decoder *cxled)
+{
+	return false;
+}
+
+static bool platform_region_contains(const struct cxl_region_params *p,
+				     const struct cxl_decoder *cxld)
+{
+	return false;
+}
+
+void platform_res_adjust(struct resource *res,
+			 struct cxl_endpoint_decoder *cxled,
+			 const struct cxl_root_decoder *cxlrd)
+{
+}
+#endif /* CONFIG_CXL_PLATFORM_QUIRKS */
-- 
2.50.1
Re: [PATCH 2/4 v4] cxl/core: Add helpers to detect Low Memory Holes on x86
Posted by Cheatham, Benjamin 2 months ago
On 7/24/2025 9:20 AM, Fabio M. De Francesco wrote:
> In x86 with Low memory Hole, the BIOS may publishes CFMWS that describe

s/publishes/publish

> SPA ranges which are subsets of the corresponding CXL Endpoint Decoders
> HPA's because the CFMWS never intersects LMH's while EP Decoders HPA's
> ranges are always guaranteed to align to the NIW * 256M rule.

s/to align to/to align due to/

Also a spec reference for the rule would be helpful (same with next patch).

> 
> In order to construct Regions and attach Decoders, the driver needs to
> match Root Decoders and Regions with Endpoint Decoders, but it fails and
> the entire process returns errors because it doesn't expect to deal with
> SPA range lengths smaller than corresponding HPA's.
> 
> Introduce functions that indirectly detect x86 LMH's by comparing SPA's
> with corresponding HPA's. They will be used in the process of Regions
> creation and Endpoint attachments to prevent driver failures in a few
> steps of the above-mentioned process.
> 
> The helpers return true when HPA/SPA misalignments are detected under
> specific conditions: both the SPA and HPA ranges must start at
> LMH_CFMWS_RANGE_START (that in x86 with LMH's is 0x0), SPA range sizes

maybe sub "that in x86 with LMH's is 0x0" with "0x0 on x86 with LMH's" also
s/SPA range sizes/SPA range's size/.

> be less than HPA's, SPA's range's size be less than 4G, HPA's size be
> aligned to the NIW * 256M rule.

s/be/is/ in above list.

> 
> Also introduce a function to adjust the range end of the Regions to be
> created on x86 with LMH's.
> 
> Cc: Alison Schofield <alison.schofield@intel.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
> ---
>  drivers/cxl/Kconfig         |  5 +++
>  drivers/cxl/core/Makefile   |  1 +
>  drivers/cxl/core/platform.c | 85 +++++++++++++++++++++++++++++++++++++
>  drivers/cxl/core/platform.h | 32 ++++++++++++++
>  4 files changed, 123 insertions(+)
>  create mode 100644 drivers/cxl/core/platform.c
>  create mode 100644 drivers/cxl/core/platform.h
> 
> diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
> index 48b7314afdb8..eca90baeac10 100644
> --- a/drivers/cxl/Kconfig
> +++ b/drivers/cxl/Kconfig
> @@ -211,6 +211,11 @@ config CXL_REGION
>  
>  	  If unsure say 'y'
>  
> +config CXL_PLATFORM_QUIRKS
> +	def_bool y
> +	depends on CXL_REGION
> +	depends on X86
> +

I'm confused on the intention behind this symbol. The naming suggests it's for all platform quirks,
but the code and dependencies make this x86-specific.

I'm going to suggest making this x86-specific for now. I'm not aware of any other platforms with quirks
(someone correct me if I'm wrong), so making this x86-specific is fine for now. I would rename this
symbol to CXL_X86_QUIRKS, leave dependencies as-is, and rename platform.c to something like platform_x86.c.
Then, if someone comes along with other platform quirks they can do their own symbol and file (or come
up with a generic scheme).

>  config CXL_REGION_INVALIDATION_TEST
>  	bool "CXL: Region Cache Management Bypass (TEST)"
>  	depends on CXL_REGION
> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
> index 79e2ef81fde8..4be729fb7d64 100644
> --- a/drivers/cxl/core/Makefile
> +++ b/drivers/cxl/core/Makefile
> @@ -18,6 +18,7 @@ cxl_core-y += ras.o
>  cxl_core-y += acpi.o
>  cxl_core-$(CONFIG_TRACING) += trace.o
>  cxl_core-$(CONFIG_CXL_REGION) += region.o
> +cxl_core-$(CONFIG_CXL_PLATFORM_QUIRKS) += platform.o
>  cxl_core-$(CONFIG_CXL_MCE) += mce.o
>  cxl_core-$(CONFIG_CXL_FEATURES) += features.o
>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
> diff --git a/drivers/cxl/core/platform.c b/drivers/cxl/core/platform.c
> new file mode 100644
> index 000000000000..8202750742d0
> --- /dev/null
> +++ b/drivers/cxl/core/platform.c
> @@ -0,0 +1,85 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include <linux/range.h>
> +#include "platform.h"
> +#include "cxlmem.h"
> +#include "core.h"
> +
> +/* Start of CFMWS range that end before x86 Low Memory Holes */
> +#define LMH_CFMWS_RANGE_START 0x0ULL
> +
> +/*
> + * Match CXL Root and Endpoint Decoders by comparing SPA and HPA ranges.
> + *
> + * On x86, CFMWS ranges never intersect memory holes while endpoint decoders
> + * HPA range sizes are always guaranteed aligned to NIW * 256MB; therefore,
> + * the given endpoint decoder HPA range size is always expected aligned and
> + * also larger than that of the matching root decoder. If there are LMH's,
> + * the root decoder range end is always less than SZ_4G.
> + */

Where does the SZ_4G restraint come from?

Also, might as well make this a kdoc comment since you've already done the majority of the work.

> +bool platform_root_decoder_contains(const struct cxl_root_decoder *cxlrd,
> +				    const struct cxl_endpoint_decoder *cxled)
> +{

Assuming you take renaming suggestion above, these functions should probably be start with
"platform_x86_*" instead.

> +	const struct range *r1, *r2;
> +	int niw;
> +
> +	r1 = &cxlrd->cxlsd.cxld.hpa_range;
> +	r2 = &cxled->cxld.hpa_range;
> +	niw = cxled->cxld.interleave_ways;
> +
> +	if (r1->start == LMH_CFMWS_RANGE_START && r1->start == r2->start &&
> +	    r1->end < (LMH_CFMWS_RANGE_START + SZ_4G) && r1->end < r2->end &&
> +	    IS_ALIGNED(range_len(r2), niw * SZ_256M))
> +		return true;
> +
> +	return false;
> +}
> +
> +/*
> + * Similar to platform_root_decoder_contains(), it matches regions and
> + * decoders
> + */
> +bool platform_region_contains(const struct cxl_region_params *p,
> +			      const struct cxl_decoder *cxld)
> +{
> +	const struct range *r = &cxld->hpa_range;
> +	const struct resource *res = p->res;
> +	int niw = cxld->interleave_ways;
> +
> +	if (res->start == LMH_CFMWS_RANGE_START && res->start == r->start &&
> +	    res->end < (LMH_CFMWS_RANGE_START + SZ_4G) && res->end < r->end &&
> +	    IS_ALIGNED(range_len(r), niw * SZ_256M))
> +		return true;
> +

This if condition could probably move into its own helper function that takes the ranges and
interleave ways. My only hang up is that these functions become 2-3 lines each after doing so.

You could also get rid of these two functions and just export the "helper" function instead.
It would probably add some bloat to patch 3/4, so it's your call here.

> +	return false;
> +}
> +
> +void platform_res_adjust(struct resource *res,
> +			 struct cxl_endpoint_decoder *cxled,
> +			 const struct cxl_root_decoder *cxlrd)
> +{
> +	if (!platform_root_decoder_contains(cxlrd, cxled))
> +		return;
> +
> +	guard(rwsem_write)(&cxl_dpa_rwsem);
> +	dev_info(cxled_to_memdev(cxled)->dev.parent,
> +		 "(LMH) Resources were (%s: %pr, %pr)\n",
> +		 dev_name(&cxled->cxld.dev), res, cxled->dpa_res);
> +	if (res) {
> +		/*
> +		 * A region must be constructed with Endpoint Decoder's
> +		 * HPA range end adjusted to Root Decoder's resource end
> +		 */

This comment (and one below) are confusing to me. I'd reword as "Trim region resource
overlap with LMH".

> +		res->end = cxlrd->res->end;
> +	}
> +	/*
> +	 * The Endpoint Decoder's dpa_res->end must be adjusted with Root
> +	 * Decoder's resource end
> +	 */

and reword this one to "Match endpoint decoder's DPA resource to root decoder's". You could
also leave out this comment altogether, the below is self-explanatory IMO.
 
> +	cxled->dpa_res->end =
> +		cxled->dpa_res->start +
> +		resource_size(cxlrd->res) / cxled->cxld.interleave_ways - 1;
> +	dev_info(cxled_to_memdev(cxled)->dev.parent,
> +		 "(LMH) Resources have been adjusted (%s: %pr, %pr)\n",
> +		 dev_name(&cxled->cxld.dev), res, cxled->dpa_res);

I think this function is a bit too noisy. I'd recommend having just this print and
have it say something like "Low memory hole overlap detected, trimmed DPA to %pr".
You could also include the amount trimmed, but that may not be very useful info.

I'd make the first print a dev_dbg() and spell out LMH at the very least. If it's a
dev_info() it should be relatively easy to tell what's going on for a layman.

> +}
> diff --git a/drivers/cxl/core/platform.h b/drivers/cxl/core/platform.h
> new file mode 100644
> index 000000000000..0baa39938729
> --- /dev/null
> +++ b/drivers/cxl/core/platform.h

This is a new file so that these functions can hook into cxl_test, right?
If not, I'd move the below into cxl/core/core.h and remove this file.

> @@ -0,0 +1,32 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#include "cxl.h"
> +
> +#ifdef CONFIG_CXL_PLATFORM_QUIRKS
> +bool platform_root_decoder_contains(const struct cxl_root_decoder *cxlrd,
> +				    const struct cxl_endpoint_decoder *cxled);
> +bool platform_region_contains(const struct cxl_region_params *p,
> +			      const struct cxl_decoder *cxld);
> +void platform_res_adjust(struct resource *res,
> +			 struct cxl_endpoint_decoder *cxled,
> +			 const struct cxl_root_decoder *cxlrd);
> +#else
> +static bool
> +platform_root_decoder_contains(const struct cxl_root_decoder *cxlrd,
> +			       const struct cxl_endpoint_decoder *cxled)
> +{
> +	return false;
> +}
> +
> +static bool platform_region_contains(const struct cxl_region_params *p,
> +				     const struct cxl_decoder *cxld)
> +{
> +	return false;
> +}
> +
> +void platform_res_adjust(struct resource *res,
> +			 struct cxl_endpoint_decoder *cxled,
> +			 const struct cxl_root_decoder *cxlrd)
> +{
> +}

Don't these need "inline" in the function signatures?

> +#endif /* CONFIG_CXL_PLATFORM_QUIRKS */
Re: [PATCH 2/4 v4] cxl/core: Add helpers to detect Low Memory Holes on x86
Posted by Fabio M. De Francesco 2 weeks, 3 days ago
Hi Benjamin,

I thought again and decided to do a few more changes that you suggested... 

On Friday, August 1, 2025 10:04:16 PM Central European Summer Time Cheatham, Benjamin wrote:
> On 7/24/2025 9:20 AM, Fabio M. De Francesco wrote:
> > In x86 with Low memory Hole, the BIOS may publishes CFMWS that describe
> 
> s/publishes/publish
> 
> > SPA ranges which are subsets of the corresponding CXL Endpoint Decoders
> > HPA's because the CFMWS never intersects LMH's while EP Decoders HPA's
> > ranges are always guaranteed to align to the NIW * 256M rule.
> 
> s/to align to/to align due to/
> 
> Also a spec reference for the rule would be helpful (same with next patch).
> 
> > 
> > In order to construct Regions and attach Decoders, the driver needs to
> > match Root Decoders and Regions with Endpoint Decoders, but it fails and
> > the entire process returns errors because it doesn't expect to deal with
> > SPA range lengths smaller than corresponding HPA's.
> > 
> > Introduce functions that indirectly detect x86 LMH's by comparing SPA's
> > with corresponding HPA's. They will be used in the process of Regions
> > creation and Endpoint attachments to prevent driver failures in a few
> > steps of the above-mentioned process.
> > 
> > The helpers return true when HPA/SPA misalignments are detected under
> > specific conditions: both the SPA and HPA ranges must start at
> > LMH_CFMWS_RANGE_START (that in x86 with LMH's is 0x0), SPA range sizes
> 
> maybe sub "that in x86 with LMH's is 0x0" with "0x0 on x86 with LMH's" also
> s/SPA range sizes/SPA range's size/.
> 
> > be less than HPA's, SPA's range's size be less than 4G, HPA's size be
> > aligned to the NIW * 256M rule.
> 
> s/be/is/ in above list.
> 
> > 
> > Also introduce a function to adjust the range end of the Regions to be
> > created on x86 with LMH's.
> > 
> > Cc: Alison Schofield <alison.schofield@intel.com>
> > Cc: Dan Williams <dan.j.williams@intel.com>
> > Cc: Dave Jiang <dave.jiang@intel.com>
> > Cc: Ira Weiny <ira.weiny@intel.com>
> > Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
> > ---
> >  drivers/cxl/Kconfig         |  5 +++
> >  drivers/cxl/core/Makefile   |  1 +
> >  drivers/cxl/core/platform.c | 85 +++++++++++++++++++++++++++++++++++++
> >  drivers/cxl/core/platform.h | 32 ++++++++++++++
> >  4 files changed, 123 insertions(+)
> >  create mode 100644 drivers/cxl/core/platform.c
> >  create mode 100644 drivers/cxl/core/platform.h
> > 
> > diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
> > index 48b7314afdb8..eca90baeac10 100644
> > --- a/drivers/cxl/Kconfig
> > +++ b/drivers/cxl/Kconfig
> > @@ -211,6 +211,11 @@ config CXL_REGION
> >  
> >  	  If unsure say 'y'
> >  
> > +config CXL_PLATFORM_QUIRKS
> > +	def_bool y
> > +	depends on CXL_REGION
> > +	depends on X86
> > +
> 
> I'm confused on the intention behind this symbol. The naming suggests it's for all platform quirks,
> but the code and dependencies make this x86-specific.
> 
> I'm going to suggest making this x86-specific for now. I'm not aware of any other platforms with quirks
> (someone correct me if I'm wrong), so making this x86-specific is fine for now. I would rename this
> symbol to CXL_X86_QUIRKS, leave dependencies as-is, and rename platform.c to something like platform_x86.c.
> Then, if someone comes along with other platform quirks they can do their own symbol and file (or come
> up with a generic scheme).
> 
I'll make it x86-specific for now. platform.c will be renamed to platform_x86.c
and CXL_PLATFORM_QUIRKS to CXL_X86_QUIRKS.

Thanks,

Fabio
Re: [PATCH 2/4 v4] cxl/core: Add helpers to detect Low Memory Holes on x86
Posted by Fabio M. De Francesco 1 month, 2 weeks ago
On Friday, August 1, 2025 10:04:16 PM Central European Summer Time Cheatham, Benjamin wrote:
> On 7/24/2025 9:20 AM, Fabio M. De Francesco wrote:
> > In x86 with Low memory Hole, the BIOS may publishes CFMWS that describe
> 
> s/publishes/publish
> 
> > SPA ranges which are subsets of the corresponding CXL Endpoint Decoders
> > HPA's because the CFMWS never intersects LMH's while EP Decoders HPA's
> > ranges are always guaranteed to align to the NIW * 256M rule.
> 
> s/to align to/to align due to/
> 
> Also a spec reference for the rule would be helpful (same with next patch).
> 
> > 
> > In order to construct Regions and attach Decoders, the driver needs to
> > match Root Decoders and Regions with Endpoint Decoders, but it fails and
> > the entire process returns errors because it doesn't expect to deal with
> > SPA range lengths smaller than corresponding HPA's.
> > 
> > Introduce functions that indirectly detect x86 LMH's by comparing SPA's
> > with corresponding HPA's. They will be used in the process of Regions
> > creation and Endpoint attachments to prevent driver failures in a few
> > steps of the above-mentioned process.
> > 
> > The helpers return true when HPA/SPA misalignments are detected under
> > specific conditions: both the SPA and HPA ranges must start at
> > LMH_CFMWS_RANGE_START (that in x86 with LMH's is 0x0), SPA range sizes
> 
> maybe sub "that in x86 with LMH's is 0x0" with "0x0 on x86 with LMH's" also
> s/SPA range sizes/SPA range's size/.
> 
> > be less than HPA's, SPA's range's size be less than 4G, HPA's size be
> > aligned to the NIW * 256M rule.
> 
> s/be/is/ in above list.
> 
I'll change the lines you corrected. Thank you.
> > 
> > Also introduce a function to adjust the range end of the Regions to be
> > created on x86 with LMH's.
> > 
> > Cc: Alison Schofield <alison.schofield@intel.com>
> > Cc: Dan Williams <dan.j.williams@intel.com>
> > Cc: Dave Jiang <dave.jiang@intel.com>
> > Cc: Ira Weiny <ira.weiny@intel.com>
> > Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
> > ---
> >  drivers/cxl/Kconfig         |  5 +++
> >  drivers/cxl/core/Makefile   |  1 +
> >  drivers/cxl/core/platform.c | 85 +++++++++++++++++++++++++++++++++++++
> >  drivers/cxl/core/platform.h | 32 ++++++++++++++
> >  4 files changed, 123 insertions(+)
> >  create mode 100644 drivers/cxl/core/platform.c
> >  create mode 100644 drivers/cxl/core/platform.h
> > 
> > diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
> > index 48b7314afdb8..eca90baeac10 100644
> > --- a/drivers/cxl/Kconfig
> > +++ b/drivers/cxl/Kconfig
> > @@ -211,6 +211,11 @@ config CXL_REGION
> >  
> >  	  If unsure say 'y'
> >  
> > +config CXL_PLATFORM_QUIRKS
> > +	def_bool y
> > +	depends on CXL_REGION
> > +	depends on X86
> > +
> 
> I'm confused on the intention behind this symbol. The naming suggests it's for all platform quirks,
> but the code and dependencies make this x86-specific.
>
I'll remove this dependence on X86; it's neither needed nor wanted. 
> 
> I'm going to suggest making this x86-specific for now. I'm not aware of any other platforms with quirks
> (someone correct me if I'm wrong), so making this x86-specific is fine for now. I would rename this
> symbol to CXL_X86_QUIRKS, leave dependencies as-is, and rename platform.c to something like platform_x86.c.
> Then, if someone comes along with other platform quirks they can do their own symbol and file (or come
> up with a generic scheme).
>
Dan suggested this approach and I agree with him.[1]
> 
> >  config CXL_REGION_INVALIDATION_TEST
> >  	bool "CXL: Region Cache Management Bypass (TEST)"
> >  	depends on CXL_REGION
> > diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
> > index 79e2ef81fde8..4be729fb7d64 100644
> > --- a/drivers/cxl/core/Makefile
> > +++ b/drivers/cxl/core/Makefile
> > @@ -18,6 +18,7 @@ cxl_core-y += ras.o
> >  cxl_core-y += acpi.o
> >  cxl_core-$(CONFIG_TRACING) += trace.o
> >  cxl_core-$(CONFIG_CXL_REGION) += region.o
> > +cxl_core-$(CONFIG_CXL_PLATFORM_QUIRKS) += platform.o
> >  cxl_core-$(CONFIG_CXL_MCE) += mce.o
> >  cxl_core-$(CONFIG_CXL_FEATURES) += features.o
> >  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
> > diff --git a/drivers/cxl/core/platform.c b/drivers/cxl/core/platform.c
> > new file mode 100644
> > index 000000000000..8202750742d0
> > --- /dev/null
> > +++ b/drivers/cxl/core/platform.c
> > @@ -0,0 +1,85 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +
> > +#include <linux/range.h>
> > +#include "platform.h"
> > +#include "cxlmem.h"
> > +#include "core.h"
> > +
> > +/* Start of CFMWS range that end before x86 Low Memory Holes */
> > +#define LMH_CFMWS_RANGE_START 0x0ULL
> > +
> > +/*
> > + * Match CXL Root and Endpoint Decoders by comparing SPA and HPA ranges.
> > + *
> > + * On x86, CFMWS ranges never intersect memory holes while endpoint decoders
> > + * HPA range sizes are always guaranteed aligned to NIW * 256MB; therefore,
> > + * the given endpoint decoder HPA range size is always expected aligned and
> > + * also larger than that of the matching root decoder. If there are LMH's,
> > + * the root decoder range end is always less than SZ_4G.
> > + */
> 
> Where does the SZ_4G restraint come from?
> 
The hole is in low memory.
>
> Also, might as well make this a kdoc comment since you've already done the majority of the work.
> 
> > +bool platform_root_decoder_contains(const struct cxl_root_decoder *cxlrd,
> > +				    const struct cxl_endpoint_decoder *cxled)
> > +{
> 
> Assuming you take renaming suggestion above, these functions should probably be start with
> "platform_x86_*" instead.
> 
> > +	const struct range *r1, *r2;
> > +	int niw;
> > +
> > +	r1 = &cxlrd->cxlsd.cxld.hpa_range;
> > +	r2 = &cxled->cxld.hpa_range;
> > +	niw = cxled->cxld.interleave_ways;
> > +
> > +	if (r1->start == LMH_CFMWS_RANGE_START && r1->start == r2->start &&
> > +	    r1->end < (LMH_CFMWS_RANGE_START + SZ_4G) && r1->end < r2->end &&
> > +	    IS_ALIGNED(range_len(r2), niw * SZ_256M))
> > +		return true;
> > +
> > +	return false;
> > +}
> > +
> > +/*
> > + * Similar to platform_root_decoder_contains(), it matches regions and
> > + * decoders
> > + */
> > +bool platform_region_contains(const struct cxl_region_params *p,
> > +			      const struct cxl_decoder *cxld)
> > +{
> > +	const struct range *r = &cxld->hpa_range;
> > +	const struct resource *res = p->res;
> > +	int niw = cxld->interleave_ways;
> > +
> > +	if (res->start == LMH_CFMWS_RANGE_START && res->start == r->start &&
> > +	    res->end < (LMH_CFMWS_RANGE_START + SZ_4G) && res->end < r->end &&
> > +	    IS_ALIGNED(range_len(r), niw * SZ_256M))
> > +		return true;
> > +
> 
> This if condition could probably move into its own helper function that takes the ranges and
> interleave ways. My only hang up is that these functions become 2-3 lines each after doing so.
> 
> You could also get rid of these two functions and just export the "helper" function instead.
> It would probably add some bloat to patch 3/4, so it's your call here.
>
I'd rather leave these functions as they are.
> 
> > +	return false;
> > +}
> > +
> > +void platform_res_adjust(struct resource *res,
> > +			 struct cxl_endpoint_decoder *cxled,
> > +			 const struct cxl_root_decoder *cxlrd)
> > +{
> > +	if (!platform_root_decoder_contains(cxlrd, cxled))
> > +		return;
> > +
> > +	guard(rwsem_write)(&cxl_dpa_rwsem);
> > +	dev_info(cxled_to_memdev(cxled)->dev.parent,
> > +		 "(LMH) Resources were (%s: %pr, %pr)\n",
> > +		 dev_name(&cxled->cxld.dev), res, cxled->dpa_res);
> > +	if (res) {
> > +		/*
> > +		 * A region must be constructed with Endpoint Decoder's
> > +		 * HPA range end adjusted to Root Decoder's resource end
> > +		 */
> 
> This comment (and one below) are confusing to me. I'd reword as "Trim region resource
> overlap with LMH".
>
Okay, thanks.
> 
> > +		res->end = cxlrd->res->end;
> > +	}
> > +	/*
> > +	 * The Endpoint Decoder's dpa_res->end must be adjusted with Root
> > +	 * Decoder's resource end
> > +	 */
> 
> and reword this one to "Match endpoint decoder's DPA resource to root decoder's". You could
> also leave out this comment altogether, the below is self-explanatory IMO.
>
Okay.
>  
> > +	cxled->dpa_res->end =
> > +		cxled->dpa_res->start +
> > +		resource_size(cxlrd->res) / cxled->cxld.interleave_ways - 1;
> > +	dev_info(cxled_to_memdev(cxled)->dev.parent,
> > +		 "(LMH) Resources have been adjusted (%s: %pr, %pr)\n",
> > +		 dev_name(&cxled->cxld.dev), res, cxled->dpa_res);
> 
> I think this function is a bit too noisy. I'd recommend having just this print and
> have it say something like "Low memory hole overlap detected, trimmed DPA to %pr".
> You could also include the amount trimmed, but that may not be very useful info.
> 
> I'd make the first print a dev_dbg() and spell out LMH at the very least. If it's a
> dev_info() it should be relatively easy to tell what's going on for a layman.
>
I'm using dev_info() according to what Alison suggested.[2]
> 
> > +}
> > diff --git a/drivers/cxl/core/platform.h b/drivers/cxl/core/platform.h
> > new file mode 100644
> > index 000000000000..0baa39938729
> > --- /dev/null
> > +++ b/drivers/cxl/core/platform.h
> 
> This is a new file so that these functions can hook into cxl_test, right?
> If not, I'd move the below into cxl/core/core.h and remove this file.
> 
> > @@ -0,0 +1,32 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +
> > +#include "cxl.h"
> > +
> > +#ifdef CONFIG_CXL_PLATFORM_QUIRKS
> > +bool platform_root_decoder_contains(const struct cxl_root_decoder *cxlrd,
> > +				    const struct cxl_endpoint_decoder *cxled);
> > +bool platform_region_contains(const struct cxl_region_params *p,
> > +			      const struct cxl_decoder *cxld);
> > +void platform_res_adjust(struct resource *res,
> > +			 struct cxl_endpoint_decoder *cxled,
> > +			 const struct cxl_root_decoder *cxlrd);
> > +#else
> > +static bool
> > +platform_root_decoder_contains(const struct cxl_root_decoder *cxlrd,
> > +			       const struct cxl_endpoint_decoder *cxled)
> > +{
> > +	return false;
> > +}
> > +
> > +static bool platform_region_contains(const struct cxl_region_params *p,
> > +				     const struct cxl_decoder *cxld)
> > +{
> > +	return false;
> > +}
> > +
> > +void platform_res_adjust(struct resource *res,
> > +			 struct cxl_endpoint_decoder *cxled,
> > +			 const struct cxl_root_decoder *cxlrd)
> > +{
> > +}
> 
> Don't these need "inline" in the function signatures?
>
Yes, sure. They'll be inline in the next version.
> 
> > +#endif /* CONFIG_CXL_PLATFORM_QUIRKS */
> 
Thank you,

Fabio

[1] https://lore.kernel.org/linux-cxl/67ee07cd4f8ec_1c2c6294d5@dwillia2-xfh.jf.intel.com.notmuch
[2] https://lore.kernel.org/linux-cxl/Z9xaBaM8Mzc8rQ90@aschofie-mobl2.lan/
Re: [PATCH 2/4 v4] cxl/core: Add helpers to detect Low Memory Holes on x86
Posted by Dave Jiang 1 month ago

On 7/24/25 7:20 AM, Fabio M. De Francesco wrote:

s/Holes/Hole/ for subject. Only 1 right?

> In x86 with Low memory Hole, the BIOS may publishes CFMWS that describe
> SPA ranges which are subsets of the corresponding CXL Endpoint Decoders> HPA's because the CFMWS never intersects LMH's while EP Decoders HPA's
> ranges are always guaranteed to align to the NIW * 256M rule.

On a x86 platform with a low memory hole (LHM), the BIOS may publish CFMWS that describes
SPA ranges. The SPA ranges are subsets of the corresponding CXL endpoint decoder's
HPA ranges because the CFMWS never intersects the LHM while the endpoint decoder's HPA
ranges are always guaranteed to align to the "NIW * 256M" rule.


> 
> In order to construct Regions and attach Decoders, the driver needs to
> match Root Decoders and Regions with Endpoint Decoders, but it fails and> the entire process returns errors because it doesn't expect to deal with
> SPA range lengths smaller than corresponding HPA's.

To construct regions and attach decoders, the driver needs to match root
decoders and regions with endpoint decoders. The process fails and returns
errors because it isn't expected to deal with SPA range smaller than the
corresponding HPA range.

> 
> Introduce functions that indirectly detect x86 LMH's by comparing SPA's

s/LMH's/LMH/
s/SPA/SPA range/

> with corresponding HPA's. They will be used in the process of Regions
s/HPA's/HPA range/
s/Regions/region/

> creation and Endpoint attachments to prevent driver failures in a few

s/Endpoint/endpoints/

> steps of the above-mentioned process.

s/above-mentioned process/the process mentioned above/

> 
> The helpers return true when HPA/SPA misalignments are detected under
> specific conditions: both the SPA and HPA ranges must start at
> LMH_CFMWS_RANGE_START (that in x86 with LMH's is 0x0), SPA range sizes
> be less than HPA's, SPA's range's size be less than 4G, HPA's size be
> aligned to the NIW * 256M rule.

I would make it clearer to read by listing them:
1. ....
2. ....

> 
> Also introduce a function to adjust the range end of the Regions to be

s/Regions/region/ singular right?

> created on x86 with LMH's.

s/LMH's/LMH/

> 
> Cc: Alison Schofield <alison.schofield@intel.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
> ---
>  drivers/cxl/Kconfig         |  5 +++
>  drivers/cxl/core/Makefile   |  1 +
>  drivers/cxl/core/platform.c | 85 +++++++++++++++++++++++++++++++++++++
>  drivers/cxl/core/platform.h | 32 ++++++++++++++
>  4 files changed, 123 insertions(+)
>  create mode 100644 drivers/cxl/core/platform.c
>  create mode 100644 drivers/cxl/core/platform.h
> 
> diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
> index 48b7314afdb8..eca90baeac10 100644
> --- a/drivers/cxl/Kconfig
> +++ b/drivers/cxl/Kconfig
> @@ -211,6 +211,11 @@ config CXL_REGION
>  
>  	  If unsure say 'y'
>  
> +config CXL_PLATFORM_QUIRKS
> +	def_bool y
> +	depends on CXL_REGION
> +	depends on X86
> +
>  config CXL_REGION_INVALIDATION_TEST
>  	bool "CXL: Region Cache Management Bypass (TEST)"
>  	depends on CXL_REGION
> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
> index 79e2ef81fde8..4be729fb7d64 100644
> --- a/drivers/cxl/core/Makefile
> +++ b/drivers/cxl/core/Makefile
> @@ -18,6 +18,7 @@ cxl_core-y += ras.o
>  cxl_core-y += acpi.o
>  cxl_core-$(CONFIG_TRACING) += trace.o
>  cxl_core-$(CONFIG_CXL_REGION) += region.o
> +cxl_core-$(CONFIG_CXL_PLATFORM_QUIRKS) += platform.o

Given that you are creating a quirk, make the object obvious it's a quirk.

platform_quirks.o

>  cxl_core-$(CONFIG_CXL_MCE) += mce.o
>  cxl_core-$(CONFIG_CXL_FEATURES) += features.o
>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
> diff --git a/drivers/cxl/core/platform.c b/drivers/cxl/core/platform.c
> new file mode 100644
> index 000000000000..8202750742d0
> --- /dev/null
> +++ b/drivers/cxl/core/platform.c
> @@ -0,0 +1,85 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include <linux/range.h>
> +#include "platform.h"
> +#include "cxlmem.h"
> +#include "core.h"
> +
> +/* Start of CFMWS range that end before x86 Low Memory Holes */
> +#define LMH_CFMWS_RANGE_START 0x0ULL
> +
> +/*
> + * Match CXL Root and Endpoint Decoders by comparing SPA and HPA ranges.
> + *
> + * On x86, CFMWS ranges never intersect memory holes while endpoint decoders
> + * HPA range sizes are always guaranteed aligned to NIW * 256MB; therefore,
> + * the given endpoint decoder HPA range size is always expected aligned and
> + * also larger than that of the matching root decoder. If there are LMH's,
> + * the root decoder range end is always less than SZ_4G.
> + */
> +bool platform_root_decoder_contains(const struct cxl_root_decoder *cxlrd,
> +				    const struct cxl_endpoint_decoder *cxled)
> +{
> +	const struct range *r1, *r2;
> +	int niw;
> +
> +	r1 = &cxlrd->cxlsd.cxld.hpa_range;
> +	r2 = &cxled->cxld.hpa_range;

I would name it rd_r and ed_r so it's easier to remember which is which below when you do compare below.

> +	niw = cxled->cxld.interleave_ways;

Just make this 'align', and you can just do:
	align = cxled->cxld.interleave_ways * SZ_256M;
That way it's cleaner below when you do compare.

> +
> +	if (r1->start == LMH_CFMWS_RANGE_START && r1->start == r2->start &&

May as well make it 'r2->start == LMH_CFMWS_RANGE_START'?

> +	    r1->end < (LMH_CFMWS_RANGE_START + SZ_4G) && r1->end < r2->end &&
> +	    IS_ALIGNED(range_len(r2), niw * SZ_256M))
> +		return true;

Maybe have 1 conditional per line to make it easier to read since this is a big one

> +
> +	return false;
> +}
> +
> +/*
> + * Similar to platform_root_decoder_contains(), it matches regions and
> + * decoders
> + */
> +bool platform_region_contains(const struct cxl_region_params *p,
> +			      const struct cxl_decoder *cxld)
> +{
> +	const struct range *r = &cxld->hpa_range;
> +	const struct resource *res = p->res;
> +	int niw = cxld->interleave_ways;
> +
> +	if (res->start == LMH_CFMWS_RANGE_START && res->start == r->start &&
> +	    res->end < (LMH_CFMWS_RANGE_START + SZ_4G) && res->end < r->end &&
> +	    IS_ALIGNED(range_len(r), niw * SZ_256M))
> +		return true;

similar comments as the previous function

> +
> +	return false;
> +}
> +
> +void platform_res_adjust(struct resource *res,
> +			 struct cxl_endpoint_decoder *cxled,
> +			 const struct cxl_root_decoder *cxlrd)
> +{
> +	if (!platform_root_decoder_contains(cxlrd, cxled))
> +		return;
> +
> +	guard(rwsem_write)(&cxl_dpa_rwsem);
> +	dev_info(cxled_to_memdev(cxled)->dev.parent,
> +		 "(LMH) Resources were (%s: %pr, %pr)\n",
> +		 dev_name(&cxled->cxld.dev), res, cxled->dpa_res);
> +	if (res) {

Do you ever expect a scenario where the 'res' passed in is NULL?

> +		/*
> +		 * A region must be constructed with Endpoint Decoder's
> +		 * HPA range end adjusted to Root Decoder's resource end
> +		 */
> +		res->end = cxlrd->res->end;
> +	}
> +	/*
> +	 * The Endpoint Decoder's dpa_res->end must be adjusted with Root
> +	 * Decoder's resource end
> +	 */
> +	cxled->dpa_res->end =
> +		cxled->dpa_res->start +
> +		resource_size(cxlrd->res) / cxled->cxld.interleave_ways - 1;
> +	dev_info(cxled_to_memdev(cxled)->dev.parent,
> +		 "(LMH) Resources have been adjusted (%s: %pr, %pr)\n",
> +		 dev_name(&cxled->cxld.dev), res, cxled->dpa_res);

If the res passed in is ever NULL, this line would crash when accessing res here.

> +}
> diff --git a/drivers/cxl/core/platform.h b/drivers/cxl/core/platform.h
> new file mode 100644
> index 000000000000..0baa39938729
> --- /dev/null
> +++ b/drivers/cxl/core/platform.h
> @@ -0,0 +1,32 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#include "cxl.h"
> +
> +#ifdef CONFIG_CXL_PLATFORM_QUIRKS
> +bool platform_root_decoder_contains(const struct cxl_root_decoder *cxlrd,
> +				    const struct cxl_endpoint_decoder *cxled);
> +bool platform_region_contains(const struct cxl_region_params *p,
> +			      const struct cxl_decoder *cxld);
> +void platform_res_adjust(struct resource *res,
> +			 struct cxl_endpoint_decoder *cxled,
> +			 const struct cxl_root_decoder *cxlrd);
> +#else
> +static bool
> +platform_root_decoder_contains(const struct cxl_root_decoder *cxlrd,
> +			       const struct cxl_endpoint_decoder *cxled)
> +{
> +	return false;
> +}
> +
> +static bool platform_region_contains(const struct cxl_region_params *p,
> +				     const struct cxl_decoder *cxld)
> +{
> +	return false;
> +}
> +
> +void platform_res_adjust(struct resource *res,
> +			 struct cxl_endpoint_decoder *cxled,
> +			 const struct cxl_root_decoder *cxlrd)
> +{
> +}

these need inline

> +#endif /* CONFIG_CXL_PLATFORM_QUIRKS */