From: Dan Williams <dan.j.williams@intel.com>
Previously, dax_hmem deferred to CXL only when an immediate resource
intersection with a CXL window was detected. This left a gap: if cxl_acpi
or cxl_pci probing or region assembly had not yet started, hmem could
prematurely claim ranges.
Fix this by introducing a dax_cxl_mode state machine and a deferred
work mechanism.
The new workqueue delays consideration of Soft Reserved overlaps until
the CXL subsystem has had a chance to complete its discovery and region
assembly. This avoids premature iomem claims, eliminates race conditions
with async cxl_pci probe, and provides a cleaner handoff between hmem and
CXL resource management.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
---
drivers/dax/hmem/hmem.c | 72 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 70 insertions(+), 2 deletions(-)
diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index c2c110b194e5..0498cb234c06 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -58,9 +58,45 @@ static void release_hmem(void *pdev)
platform_device_unregister(pdev);
}
+static enum dax_cxl_mode {
+ DAX_CXL_MODE_DEFER,
+ DAX_CXL_MODE_REGISTER,
+ DAX_CXL_MODE_DROP,
+} dax_cxl_mode;
+
+static int handle_deferred_cxl(struct device *host, int target_nid,
+ const struct resource *res)
+{
+ if (region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
+ IORES_DESC_CXL) != REGION_DISJOINT) {
+ if (dax_cxl_mode == DAX_CXL_MODE_DROP)
+ dev_dbg(host, "dropping CXL range: %pr\n", res);
+ }
+ return 0;
+}
+
+struct dax_defer_work {
+ struct platform_device *pdev;
+ struct work_struct work;
+};
+
+static void process_defer_work(struct work_struct *_work)
+{
+ struct dax_defer_work *work = container_of(_work, typeof(*work), work);
+ struct platform_device *pdev = work->pdev;
+
+ /* relies on cxl_acpi and cxl_pci having had a chance to load */
+ wait_for_device_probe();
+
+ dax_cxl_mode = DAX_CXL_MODE_DROP;
+
+ walk_hmem_resources(&pdev->dev, handle_deferred_cxl);
+}
+
static int hmem_register_device(struct device *host, int target_nid,
const struct resource *res)
{
+ struct dax_defer_work *work = dev_get_drvdata(host);
struct platform_device *pdev;
struct memregion_info info;
long id;
@@ -69,8 +105,18 @@ static int hmem_register_device(struct device *host, int target_nid,
if (IS_ENABLED(CONFIG_DEV_DAX_CXL) &&
region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
IORES_DESC_CXL) != REGION_DISJOINT) {
- dev_dbg(host, "deferring range to CXL: %pr\n", res);
- return 0;
+ switch (dax_cxl_mode) {
+ case DAX_CXL_MODE_DEFER:
+ dev_dbg(host, "deferring range to CXL: %pr\n", res);
+ schedule_work(&work->work);
+ return 0;
+ case DAX_CXL_MODE_REGISTER:
+ dev_dbg(host, "registering CXL range: %pr\n", res);
+ break;
+ case DAX_CXL_MODE_DROP:
+ dev_dbg(host, "dropping CXL range: %pr\n", res);
+ return 0;
+ }
}
rc = region_intersects_soft_reserve(res->start, resource_size(res),
@@ -125,8 +171,30 @@ static int hmem_register_device(struct device *host, int target_nid,
return rc;
}
+static void kill_defer_work(void *_work)
+{
+ struct dax_defer_work *work = container_of(_work, typeof(*work), work);
+
+ cancel_work_sync(&work->work);
+ kfree(work);
+}
+
static int dax_hmem_platform_probe(struct platform_device *pdev)
{
+ struct dax_defer_work *work = kzalloc(sizeof(*work), GFP_KERNEL);
+ int rc;
+
+ if (!work)
+ return -ENOMEM;
+
+ work->pdev = pdev;
+ INIT_WORK(&work->work, process_defer_work);
+
+ rc = devm_add_action_or_reset(&pdev->dev, kill_defer_work, work);
+ if (rc)
+ return rc;
+
+ platform_set_drvdata(pdev, work);
return walk_hmem_resources(&pdev->dev, hmem_register_device);
}
--
2.17.1