From: Srirangan Madhavan <smadhavan@nvidia.com>
Add infrastructure for quiescing the CXL data path before reset:
- Memory offlining: check if CXL-backed memory is online and offline
it via offline_and_remove_memory() before reset, per CXL
spec requirement to quiesce all CXL.mem transactions before issuing
CXL Reset.
- CPU cache flush: invalidate cache lines before reset
as a safety measure after memory offline.
Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
---
drivers/cxl/core/pci.c | 110 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 110 insertions(+)
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index f96ce884a213..9e6f0c4b3cb6 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -4,6 +4,8 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/device.h>
#include <linux/delay.h>
+#include <linux/memory_hotplug.h>
+#include <linux/memregion.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
#include <linux/aer.h>
@@ -869,3 +871,111 @@ int cxl_port_get_possible_dports(struct cxl_port *port)
return ctx.count;
}
+
+/*
+ * CXL Reset support - core-provided reset logic for CXL devices.
+ *
+ * These functions implement the CXL reset sequence.
+ */
+
+/*
+ * If CXL memory backed by this decoder is online as System RAM, offline
+ * and remove it per CXL spec requirements before issuing CXL Reset.
+ * Returns 0 if memory was not online or was successfully offlined.
+ */
+static int __maybe_unused cxl_offline_memory(struct device *dev, void *data)
+{
+ struct cxl_endpoint_decoder *cxled;
+ struct cxl_region *cxlr;
+ struct cxl_region_params *p;
+ int rc;
+
+ if (!is_endpoint_decoder(dev))
+ return 0;
+
+ cxled = to_cxl_endpoint_decoder(dev);
+ cxlr = cxled->cxld.region;
+ if (!cxlr)
+ return 0;
+
+ p = &cxlr->params;
+ if (!p->res)
+ return 0;
+
+ if (walk_iomem_res_desc(IORES_DESC_NONE,
+ IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+ p->res->start, p->res->end, NULL, NULL) <= 0)
+ return 0;
+
+ dev_info(dev, "Offlining CXL memory [%pr] for reset\n", p->res);
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+ rc = offline_and_remove_memory(p->res->start, resource_size(p->res));
+ if (rc) {
+ dev_err(dev,
+ "Failed to offline CXL memory [%pr]: %d\n",
+ p->res, rc);
+ return rc;
+ }
+#else
+ dev_err(dev, "Memory hotremove not supported, cannot offline CXL memory\n");
+ rc = -EOPNOTSUPP;
+ return rc;
+#endif
+
+ return 0;
+}
+
+static int __maybe_unused cxl_reset_prepare_memdev(struct cxl_memdev *cxlmd)
+{
+ struct cxl_port *endpoint;
+ struct device *dev;
+
+ if (!cxlmd || !cxlmd->cxlds)
+ return -ENODEV;
+
+ dev = cxlmd->cxlds->dev;
+ endpoint = cxlmd->endpoint;
+ if (!endpoint)
+ return 0;
+
+ return device_for_each_child(&endpoint->dev, NULL,
+ cxl_offline_memory);
+}
+
+static int __maybe_unused cxl_decoder_flush_cache(struct device *dev, void *data)
+{
+ struct cxl_endpoint_decoder *cxled;
+ struct cxl_region *cxlr;
+ struct resource *res;
+
+ if (!is_endpoint_decoder(dev))
+ return 0;
+
+ cxled = to_cxl_endpoint_decoder(dev);
+ cxlr = cxled->cxld.region;
+ if (!cxlr || !cxlr->params.res)
+ return 0;
+
+ res = cxlr->params.res;
+ cpu_cache_invalidate_memregion(res->start, resource_size(res));
+ return 0;
+}
+
+static int __maybe_unused cxl_reset_flush_cpu_caches(struct cxl_memdev *cxlmd)
+{
+ struct cxl_port *endpoint;
+
+ if (!cxlmd)
+ return 0;
+
+ endpoint = cxlmd->endpoint;
+ if (!endpoint || IS_ERR(endpoint))
+ return 0;
+
+ if (!cpu_cache_has_invalidate_memregion())
+ return 0;
+
+ device_for_each_child(&endpoint->dev, NULL, cxl_decoder_flush_cache);
+ return 0;
+}
--
2.43.0
On 3/6/26 2:23 AM, smadhavan@nvidia.com wrote:
> From: Srirangan Madhavan <smadhavan@nvidia.com>
>
> Add infrastructure for quiescing the CXL data path before reset:
>
> - Memory offlining: check if CXL-backed memory is online and offline
> it via offline_and_remove_memory() before reset, per CXL
> spec requirement to quiesce all CXL.mem transactions before issuing
> CXL Reset.
> - CPU cache flush: invalidate cache lines before reset
> as a safety measure after memory offline.
>
> Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
> ---
> drivers/cxl/core/pci.c | 110 +++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 110 insertions(+)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index f96ce884a213..9e6f0c4b3cb6 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -4,6 +4,8 @@
> #include <linux/io-64-nonatomic-lo-hi.h>
> #include <linux/device.h>
> #include <linux/delay.h>
> +#include <linux/memory_hotplug.h>
> +#include <linux/memregion.h>
> #include <linux/pci.h>
> #include <linux/pci-doe.h>
> #include <linux/aer.h>
> @@ -869,3 +871,111 @@ int cxl_port_get_possible_dports(struct cxl_port *port)
>
> return ctx.count;
> }
> +
> +/*
> + * CXL Reset support - core-provided reset logic for CXL devices.
> + *
> + * These functions implement the CXL reset sequence.
> + */
> +
> +/*
> + * If CXL memory backed by this decoder is online as System RAM, offline
> + * and remove it per CXL spec requirements before issuing CXL Reset.
> + * Returns 0 if memory was not online or was successfully offlined.
> + */
> +static int __maybe_unused cxl_offline_memory(struct device *dev, void *data)
> +{
> + struct cxl_endpoint_decoder *cxled;
> + struct cxl_region *cxlr;
> + struct cxl_region_params *p;
> + int rc;
> +
> + if (!is_endpoint_decoder(dev))
> + return 0;
> +
> + cxled = to_cxl_endpoint_decoder(dev);
> + cxlr = cxled->cxld.region;
> + if (!cxlr)
> + return 0;
> +
> + p = &cxlr->params;
> + if (!p->res)
> + return 0;
> +
> + if (walk_iomem_res_desc(IORES_DESC_NONE,
> + IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
> + p->res->start, p->res->end, NULL, NULL) <= 0)
This function is performed per endpoint. So if a region is backed by multiple endpoints, wouldn't this memory offline operation be performed over the same region on every related endpoint instead of just once? Maybe a temp xarray during the reset process that keeps track of the regions that are being hit with reset?
> + return 0;
> +
> + dev_info(dev, "Offlining CXL memory [%pr] for reset\n", p->res);
> +
> +#ifdef CONFIG_MEMORY_HOTREMOVE
> + rc = offline_and_remove_memory(p->res->start, resource_size(p->res));
> + if (rc) {
> + dev_err(dev,
> + "Failed to offline CXL memory [%pr]: %d\n",
> + p->res, rc);
> + return rc;
> + }
> +#else
> + dev_err(dev, "Memory hotremove not supported, cannot offline CXL memory\n");
> + rc = -EOPNOTSUPP;
> + return rc;
> +#endif
Same comment as Alex. ifdef in C files are not preferred. Maybe a helper function can be used and stubbed out when !CONFIG_MEMORY_HOTREMOVE.
> +
> + return 0;
> +}
> +
> +static int __maybe_unused cxl_reset_prepare_memdev(struct cxl_memdev *cxlmd)
> +{
> + struct cxl_port *endpoint;
> + struct device *dev;
> +
> + if (!cxlmd || !cxlmd->cxlds)
> + return -ENODEV;
> +
> + dev = cxlmd->cxlds->dev;
> + endpoint = cxlmd->endpoint;
> + if (!endpoint)
> + return 0;
> +
> + return device_for_each_child(&endpoint->dev, NULL,
> + cxl_offline_memory);
> +}
> +
> +static int __maybe_unused cxl_decoder_flush_cache(struct device *dev, void *data)
> +{
> + struct cxl_endpoint_decoder *cxled;
> + struct cxl_region *cxlr;
> + struct resource *res;
> +
> + if (!is_endpoint_decoder(dev))
> + return 0;
> +
> + cxled = to_cxl_endpoint_decoder(dev);
> + cxlr = cxled->cxld.region;
> + if (!cxlr || !cxlr->params.res)
> + return 0;
> +
> + res = cxlr->params.res;
> + cpu_cache_invalidate_memregion(res->start, resource_size(res));
Same comment as offline memory. Cache being invalidated per region for every decoder. Probably not something you want to do.
DJ
> + return 0;
> +}
> +
> +static int __maybe_unused cxl_reset_flush_cpu_caches(struct cxl_memdev *cxlmd)
> +{
> + struct cxl_port *endpoint;
> +
> + if (!cxlmd)
> + return 0;
> +
> + endpoint = cxlmd->endpoint;
> + if (!endpoint || IS_ERR(endpoint))
> + return 0;
> +
> + if (!cpu_cache_has_invalidate_memregion())
> + return 0;
> +
> + device_for_each_child(&endpoint->dev, NULL, cxl_decoder_flush_cache);
> + return 0;
> +}
> --
> 2.43.0
>
On Fri, 6 Mar 2026 09:23:18 +0000
<smadhavan@nvidia.com> wrote:
> From: Srirangan Madhavan <smadhavan@nvidia.com>
>
> Add infrastructure for quiescing the CXL data path before reset:
>
> - Memory offlining: check if CXL-backed memory is online and offline
> it via offline_and_remove_memory() before reset, per CXL
> spec requirement to quiesce all CXL.mem transactions before issuing
> CXL Reset.
> - CPU cache flush: invalidate cache lines before reset
> as a safety measure after memory offline.
>
> Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
> ---
> drivers/cxl/core/pci.c | 110 +++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 110 insertions(+)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index f96ce884a213..9e6f0c4b3cb6 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -4,6 +4,8 @@
> #include <linux/io-64-nonatomic-lo-hi.h>
> #include <linux/device.h>
> #include <linux/delay.h>
> +#include <linux/memory_hotplug.h>
> +#include <linux/memregion.h>
> #include <linux/pci.h>
> #include <linux/pci-doe.h>
> #include <linux/aer.h>
> @@ -869,3 +871,111 @@ int cxl_port_get_possible_dports(struct cxl_port *port)
>
> return ctx.count;
> }
> +
> +/*
> + * CXL Reset support - core-provided reset logic for CXL devices.
> + *
> + * These functions implement the CXL reset sequence.
> + */
> +
> +/*
> + * If CXL memory backed by this decoder is online as System RAM, offline
> + * and remove it per CXL spec requirements before issuing CXL Reset.
> + * Returns 0 if memory was not online or was successfully offlined.
> + */
> +static int __maybe_unused cxl_offline_memory(struct device *dev, void *data)
> +{
> + struct cxl_endpoint_decoder *cxled;
> + struct cxl_region *cxlr;
> + struct cxl_region_params *p;
> + int rc;
> +
> + if (!is_endpoint_decoder(dev))
> + return 0;
> +
> + cxled = to_cxl_endpoint_decoder(dev);
> + cxlr = cxled->cxld.region;
> + if (!cxlr)
> + return 0;
> +
> + p = &cxlr->params;
> + if (!p->res)
> + return 0;
> +
> + if (walk_iomem_res_desc(IORES_DESC_NONE,
> + IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
> + p->res->start, p->res->end, NULL, NULL) <= 0)
> + return 0;
> +
> + dev_info(dev, "Offlining CXL memory [%pr] for reset\n", p->res);
> +
> +#ifdef CONFIG_MEMORY_HOTREMOVE
> + rc = offline_and_remove_memory(p->res->start, resource_size(p->res));
> + if (rc) {
> + dev_err(dev,
> + "Failed to offline CXL memory [%pr]: %d\n",
> + p->res, rc);
> + return rc;
> + }
> +#else
> + dev_err(dev, "Memory hotremove not supported, cannot offline CXL memory\n");
> + rc = -EOPNOTSUPP;
> + return rc;
> +#endif
This would be cleaner if we stubbed offline_and_remove_memory() with
-EOPNOTSUPP. Thanks,
Alex
> +
> + return 0;
> +}
> +
> +static int __maybe_unused cxl_reset_prepare_memdev(struct cxl_memdev *cxlmd)
> +{
> + struct cxl_port *endpoint;
> + struct device *dev;
> +
> + if (!cxlmd || !cxlmd->cxlds)
> + return -ENODEV;
> +
> + dev = cxlmd->cxlds->dev;
> + endpoint = cxlmd->endpoint;
> + if (!endpoint)
> + return 0;
> +
> + return device_for_each_child(&endpoint->dev, NULL,
> + cxl_offline_memory);
> +}
> +
> +static int __maybe_unused cxl_decoder_flush_cache(struct device *dev, void *data)
> +{
> + struct cxl_endpoint_decoder *cxled;
> + struct cxl_region *cxlr;
> + struct resource *res;
> +
> + if (!is_endpoint_decoder(dev))
> + return 0;
> +
> + cxled = to_cxl_endpoint_decoder(dev);
> + cxlr = cxled->cxld.region;
> + if (!cxlr || !cxlr->params.res)
> + return 0;
> +
> + res = cxlr->params.res;
> + cpu_cache_invalidate_memregion(res->start, resource_size(res));
> + return 0;
> +}
> +
> +static int __maybe_unused cxl_reset_flush_cpu_caches(struct cxl_memdev *cxlmd)
> +{
> + struct cxl_port *endpoint;
> +
> + if (!cxlmd)
> + return 0;
> +
> + endpoint = cxlmd->endpoint;
> + if (!endpoint || IS_ERR(endpoint))
> + return 0;
> +
> + if (!cpu_cache_has_invalidate_memregion())
> + return 0;
> +
> + device_for_each_child(&endpoint->dev, NULL, cxl_decoder_flush_cache);
> + return 0;
> +}
> --
> 2.43.0
>
>
© 2016 - 2026 Red Hat, Inc.