drivers/cxl/core/region.c | 112 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 24 deletions(-)
cxl_dpa_to_region() has expectations that cxlmd->endpoint remains valid
for the duration of the call. When userspace performs poison injection
or clearing on a region via debugfs, holding cxl_rwsem.region and
cxl_rwsem.dpa alone is insufficient, these locks do not prevent the
retrieved CXL memdev from being destroyed, nor do they protect against
concurrent driver detachment. Therefore, hold CXL memdev lock in the
debugfs callbacks to ensure the cxlmd->dev.driver remains stable for the
entire execution of the callback functions.
To keep lock sequence(cxlmd.dev -> cxl_rwsem.region -> cxl_rwsem.dpa)
for avoiding deadlock. the interfaces have to find out the correct CXL
memdev at first, holding lock in the sequence then checking if the DPA
data has been changed before holding locks.
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Li Ming <ming.li@zohomail.com>
---
drivers/cxl/core/region.c | 112 ++++++++++++++++++++++++++++++++++++----------
1 file changed, 88 insertions(+), 24 deletions(-)
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index f24b7e754727..1a509acc52a3 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -4101,12 +4101,70 @@ static int validate_region_offset(struct cxl_region *cxlr, u64 offset)
return 0;
}
+static int __cxl_region_poison_lookup(struct cxl_region *cxlr, u64 offset,
+ struct dpa_result *res)
+{
+ int rc;
+
+ *res = (struct dpa_result){ .dpa = ULLONG_MAX, .cxlmd = NULL };
+
+ if (validate_region_offset(cxlr, offset))
+ return -EINVAL;
+
+ offset -= cxlr->params.cache_size;
+ rc = region_offset_to_dpa_result(cxlr, offset, res);
+ if (rc || !res->cxlmd || res->dpa == ULLONG_MAX) {
+ dev_dbg(&cxlr->dev,
+ "Failed to resolve DPA for region offset %#llx rc %d\n",
+ offset, rc);
+
+ return rc ? rc : -EINVAL;
+ }
+
+ return 0;
+}
+
+static int cxl_region_poison_lookup(struct cxl_region *cxlr, u64 offset,
+ struct dpa_result *res)
+{
+ int rc;
+
+ ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
+ return rc;
+
+ ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
+ if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
+ return rc;
+
+ rc = __cxl_region_poison_lookup(cxlr, offset, res);
+ if (rc)
+ return rc;
+
+ /*
+ * Hold the device reference in case
+ * the device is destroyed after that.
+ */
+ get_device(&res->cxlmd->dev);
+ return 0;
+}
+
static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
{
- struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
struct cxl_region *cxlr = data;
+ struct dpa_result res1, res2;
int rc;
+ /* To retrieve the correct memdev */
+ rc = cxl_region_poison_lookup(cxlr, offset, &res1);
+ if (rc)
+ return rc;
+
+ struct device *dev __free(put_device) = &res1.cxlmd->dev;
+ ACQUIRE(device_intr, devlock)(dev);
+ if ((rc = ACQUIRE_ERR(device_intr, &devlock)))
+ return rc;
+
ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
return rc;
@@ -4115,20 +4173,18 @@ static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
return rc;
- if (validate_region_offset(cxlr, offset))
- return -EINVAL;
-
- offset -= cxlr->params.cache_size;
- rc = region_offset_to_dpa_result(cxlr, offset, &result);
- if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
+ /*
+ * Retrieve memdev and DPA data again in case that the data
+ * has been changed before holding locks.
+ */
+ rc = __cxl_region_poison_lookup(cxlr, offset, &res2);
+ if (rc || res2.cxlmd != res1.cxlmd || res2.dpa != res1.dpa) {
dev_dbg(&cxlr->dev,
- "Failed to resolve DPA for region offset %#llx rc %d\n",
- offset, rc);
-
- return rc ? rc : -EINVAL;
+ "Error injection raced region reconfiguration: %d", rc);
+ return -ENXIO;
}
- return cxl_inject_poison_locked(result.cxlmd, result.dpa);
+ return cxl_inject_poison_locked(res2.cxlmd, res2.dpa);
}
DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL,
@@ -4136,10 +4192,20 @@ DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL,
static int cxl_region_debugfs_poison_clear(void *data, u64 offset)
{
- struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
struct cxl_region *cxlr = data;
+ struct dpa_result res1, res2;
int rc;
+ /* To retrieve the correct memdev */
+ rc = cxl_region_poison_lookup(cxlr, offset, &res1);
+ if (rc)
+ return rc;
+
+ struct device *dev __free(put_device) = &res1.cxlmd->dev;
+ ACQUIRE(device_intr, devlock)(dev);
+ if ((rc = ACQUIRE_ERR(device_intr, &devlock)))
+ return rc;
+
ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
return rc;
@@ -4148,20 +4214,18 @@ static int cxl_region_debugfs_poison_clear(void *data, u64 offset)
if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
return rc;
- if (validate_region_offset(cxlr, offset))
- return -EINVAL;
-
- offset -= cxlr->params.cache_size;
- rc = region_offset_to_dpa_result(cxlr, offset, &result);
- if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
+ /*
+ * Retrieve memdev and DPA data again in case that the data
+ * has been changed before holding locks.
+ */
+ rc = __cxl_region_poison_lookup(cxlr, offset, &res2);
+ if (rc || res2.cxlmd != res1.cxlmd || res2.dpa != res1.dpa) {
dev_dbg(&cxlr->dev,
- "Failed to resolve DPA for region offset %#llx rc %d\n",
- offset, rc);
-
- return rc ? rc : -EINVAL;
+ "Error clearing raced region reconfiguration: %d", rc);
+ return -ENXIO;
}
- return cxl_clear_poison_locked(result.cxlmd, result.dpa);
+ return cxl_clear_poison_locked(res2.cxlmd, res2.dpa);
}
DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL,
---
base-commit: d5f9bfc37906bbb737790af11f1537593f8778a5
change-id: 20260319-hold_memdev_lock_for_region_poison_inject-clear-4b8020d84662
Best regards,
--
Li Ming <ming.li@zohomail.com>
Li Ming wrote:
> cxl_dpa_to_region() has expectations that cxlmd->endpoint remains valid
> for the duration of the call. When userspace performs poison injection
> or clearing on a region via debugfs, holding cxl_rwsem.region and
> cxl_rwsem.dpa alone is insufficient, these locks do not prevent the
> retrieved CXL memdev from being destroyed, nor do they protect against
> concurrent driver detachment. Therefore, hold CXL memdev lock in the
> debugfs callbacks to ensure the cxlmd->dev.driver remains stable for the
> entire execution of the callback functions.
I took another look at this and there may be a simpler way to ensure
this safety.
In the case where we already have a region you can be assured that
the device is not going to detach from the region while holding the
proper rwsems. Maybe that was what Alison was referring to about it
being "ok"?
However, the problem is that cxl_dpa_to_region() is sometimes called
from paths where the memdev has unknown association to a region like
cxl_{inject,clear}_poison().
There are two ways to solve that problem. Hold the cxlmd lock, or move
the registration of debugfs *after* creation of cxlmd->endpoint. With
that ordering it would ensure that debugfs is only usable in the
interval here ->endpoint is known to be stable.
If that ends up simpler, we should go that route instead.
> To keep lock sequence(cxlmd.dev -> cxl_rwsem.region -> cxl_rwsem.dpa)
> for avoiding deadlock. the interfaces have to find out the correct CXL
> memdev at first, holding lock in the sequence then checking if the DPA
> data has been changed before holding locks.
>
> Suggested-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Li Ming <ming.li@zohomail.com>
> ---
> drivers/cxl/core/region.c | 112 ++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 88 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index f24b7e754727..1a509acc52a3 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -4101,12 +4101,70 @@ static int validate_region_offset(struct cxl_region *cxlr, u64 offset)
> return 0;
> }
>
> +static int __cxl_region_poison_lookup(struct cxl_region *cxlr, u64 offset,
> + struct dpa_result *res)
> +{
> + int rc;
> +
> + *res = (struct dpa_result){ .dpa = ULLONG_MAX, .cxlmd = NULL };
> +
> + if (validate_region_offset(cxlr, offset))
> + return -EINVAL;
> +
> + offset -= cxlr->params.cache_size;
> + rc = region_offset_to_dpa_result(cxlr, offset, res);
> + if (rc || !res->cxlmd || res->dpa == ULLONG_MAX) {
> + dev_dbg(&cxlr->dev,
> + "Failed to resolve DPA for region offset %#llx rc %d\n",
> + offset, rc);
> +
> + return rc ? rc : -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static int cxl_region_poison_lookup(struct cxl_region *cxlr, u64 offset,
> + struct dpa_result *res)
> +{
> + int rc;
> +
> + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
> + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
> + return rc;
> +
> + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
> + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
> + return rc;
> +
> + rc = __cxl_region_poison_lookup(cxlr, offset, res);
> + if (rc)
> + return rc;
> +
> + /*
> + * Hold the device reference in case
> + * the device is destroyed after that.
> + */
> + get_device(&res->cxlmd->dev);
This is what made me take another look at alternate approaches because
this reference count is messy.
A region always holds an implicit reference on attached memdevs by
holding its endpoint decoders. The only value of a reference is making
sure that if devices are removed while the lock is dropped you can be
sure that no new memdev aliases the allocation of the old memdev. The
pointer does not necessarily need to be live for that.
> + return 0;
> +}
> +
> static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
> {
> - struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
> struct cxl_region *cxlr = data;
> + struct dpa_result res1, res2;
> int rc;
>
> + /* To retrieve the correct memdev */
> + rc = cxl_region_poison_lookup(cxlr, offset, &res1);
> + if (rc)
> + return rc;
> +
> + struct device *dev __free(put_device) = &res1.cxlmd->dev;
I do not like magic mid-function scopes that are not generated by actual
allocation functions.
> + ACQUIRE(device_intr, devlock)(dev);
> + if ((rc = ACQUIRE_ERR(device_intr, &devlock)))
> + return rc;
> +
> ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
> if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
> return rc;
> @@ -4115,20 +4173,18 @@ static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
> if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
> return rc;
>
> - if (validate_region_offset(cxlr, offset))
> - return -EINVAL;
> -
> - offset -= cxlr->params.cache_size;
> - rc = region_offset_to_dpa_result(cxlr, offset, &result);
> - if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
> + /*
> + * Retrieve memdev and DPA data again in case that the data
> + * has been changed before holding locks.
> + */
> + rc = __cxl_region_poison_lookup(cxlr, offset, &res2);
> + if (rc || res2.cxlmd != res1.cxlmd || res2.dpa != res1.dpa) {
Nothing does a put_device() on res2.cxlmd->dev.
So, it seems it would indeed be altogether cleaner to eliminate the
cxlmd->endpoint validity checking altogether by reordering
initialization and teardown.
在 2026/3/20 10:30, Dan Williams 写道:
> Li Ming wrote:
>> cxl_dpa_to_region() has expectations that cxlmd->endpoint remains valid
>> for the duration of the call. When userspace performs poison injection
>> or clearing on a region via debugfs, holding cxl_rwsem.region and
>> cxl_rwsem.dpa alone is insufficient, these locks do not prevent the
>> retrieved CXL memdev from being destroyed, nor do they protect against
>> concurrent driver detachment. Therefore, hold CXL memdev lock in the
>> debugfs callbacks to ensure the cxlmd->dev.driver remains stable for the
>> entire execution of the callback functions.
> I took another look at this and there may be a simpler way to ensure
> this safety.
>
> In the case where we already have a region you can be assured that
> the device is not going to detach from the region while holding the
> proper rwsems. Maybe that was what Alison was referring to about it
> being "ok"?
>
> However, the problem is that cxl_dpa_to_region() is sometimes called
> from paths where the memdev has unknown association to a region like
> cxl_{inject,clear}_poison().
>
> There are two ways to solve that problem. Hold the cxlmd lock, or move
> the registration of debugfs *after* creation of cxlmd->endpoint. With
> that ordering it would ensure that debugfs is only usable in the
> interval here ->endpoint is known to be stable.
>
> If that ends up simpler, we should go that route instead.
Hi Dan,
CXL subsystem has memdev debugfs interfaces and region debugfs
interfaces for poison injection and clearing.
Memdev debugfs interfaces are possible to access cxl_dpa_to_region()
before cxlmd->endpoint initialization. So the solution I used as you
mentioned, holding the memdev lock in memdev debugfs interfaces[1]. It
can solve this problem.
For region debugfs interfaces, as Alison and you mentioned,
cxl_rwsem.region lock can ensure the retrieved memdev is not going to
detach from the region. So I think region poison injection and clearing
interfaces are safety, we don't need any changes for them.
So my understanding is that the patch[1] is enough for the issue, no
more changes we need for region poison injection/clearing. is it
correct? Or I missed some scenarios?
[1]:
https://lore.kernel.org/linux-cxl/20260314-fix_access_endpoint_without_drv_check-v2-0-4c09edf2e1db@zohomail.com/T/#m06e52a3473395598ab6dd482a6c01b1568ef8b9a
>
>> To keep lock sequence(cxlmd.dev -> cxl_rwsem.region -> cxl_rwsem.dpa)
>> for avoiding deadlock. the interfaces have to find out the correct CXL
>> memdev at first, holding lock in the sequence then checking if the DPA
>> data has been changed before holding locks.
>>
>> Suggested-by: Dan Williams <dan.j.williams@intel.com>
>> Signed-off-by: Li Ming <ming.li@zohomail.com>
>> ---
>> drivers/cxl/core/region.c | 112 ++++++++++++++++++++++++++++++++++++----------
>> 1 file changed, 88 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>> index f24b7e754727..1a509acc52a3 100644
>> --- a/drivers/cxl/core/region.c
>> +++ b/drivers/cxl/core/region.c
>> @@ -4101,12 +4101,70 @@ static int validate_region_offset(struct cxl_region *cxlr, u64 offset)
>> return 0;
>> }
>>
>> +static int __cxl_region_poison_lookup(struct cxl_region *cxlr, u64 offset,
>> + struct dpa_result *res)
>> +{
>> + int rc;
>> +
>> + *res = (struct dpa_result){ .dpa = ULLONG_MAX, .cxlmd = NULL };
>> +
>> + if (validate_region_offset(cxlr, offset))
>> + return -EINVAL;
>> +
>> + offset -= cxlr->params.cache_size;
>> + rc = region_offset_to_dpa_result(cxlr, offset, res);
>> + if (rc || !res->cxlmd || res->dpa == ULLONG_MAX) {
>> + dev_dbg(&cxlr->dev,
>> + "Failed to resolve DPA for region offset %#llx rc %d\n",
>> + offset, rc);
>> +
>> + return rc ? rc : -EINVAL;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static int cxl_region_poison_lookup(struct cxl_region *cxlr, u64 offset,
>> + struct dpa_result *res)
>> +{
>> + int rc;
>> +
>> + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
>> + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
>> + return rc;
>> +
>> + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
>> + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
>> + return rc;
>> +
>> + rc = __cxl_region_poison_lookup(cxlr, offset, res);
>> + if (rc)
>> + return rc;
>> +
>> + /*
>> + * Hold the device reference in case
>> + * the device is destroyed after that.
>> + */
>> + get_device(&res->cxlmd->dev);
> This is what made me take another look at alternate approaches because
> this reference count is messy.
>
> A region always holds an implicit reference on attached memdevs by
> holding its endpoint decoders. The only value of a reference is making
> sure that if devices are removed while the lock is dropped you can be
> sure that no new memdev aliases the allocation of the old memdev. The
> pointer does not necessarily need to be live for that.
I consider that the cxl_rwsem.region and cxl_rwsem.dpa will be released
after the function, so it is possible that the memdev will be released
after this function returned and before trying to hold the memdev lock,
it will cause the holding memdev lock operation accessing a freed memdev.
>
>> + return 0;
>> +}
>> +
>> static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
>> {
>> - struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
>> struct cxl_region *cxlr = data;
>> + struct dpa_result res1, res2;
>> int rc;
>>
>> + /* To retrieve the correct memdev */
>> + rc = cxl_region_poison_lookup(cxlr, offset, &res1);
>> + if (rc)
>> + return rc;
>> +
>> + struct device *dev __free(put_device) = &res1.cxlmd->dev;
> I do not like magic mid-function scopes that are not generated by actual
> allocation functions.
>
>> + ACQUIRE(device_intr, devlock)(dev);
>> + if ((rc = ACQUIRE_ERR(device_intr, &devlock)))
>> + return rc;
>> +
>> ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
>> if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
>> return rc;
>> @@ -4115,20 +4173,18 @@ static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
>> if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
>> return rc;
>>
>> - if (validate_region_offset(cxlr, offset))
>> - return -EINVAL;
>> -
>> - offset -= cxlr->params.cache_size;
>> - rc = region_offset_to_dpa_result(cxlr, offset, &result);
>> - if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
>> + /*
>> + * Retrieve memdev and DPA data again in case that the data
>> + * has been changed before holding locks.
>> + */
>> + rc = __cxl_region_poison_lookup(cxlr, offset, &res2);
>> + if (rc || res2.cxlmd != res1.cxlmd || res2.dpa != res1.dpa) {
> Nothing does a put_device() on res2.cxlmd->dev.
>
> So, it seems it would indeed be altogether cleaner to eliminate the
> cxlmd->endpoint validity checking altogether by reordering
> initialization and teardown.
__cxl_region_poison_lookup() would not get the res2.cxlmd->dev reference.
Ming
On 3/19/26 7:12 AM, Li Ming wrote:
> cxl_dpa_to_region() has expectations that cxlmd->endpoint remains valid
> for the duration of the call. When userspace performs poison injection
> or clearing on a region via debugfs, holding cxl_rwsem.region and
> cxl_rwsem.dpa alone is insufficient, these locks do not prevent the
> retrieved CXL memdev from being destroyed, nor do they protect against
> concurrent driver detachment. Therefore, hold CXL memdev lock in the
> debugfs callbacks to ensure the cxlmd->dev.driver remains stable for the
> entire execution of the callback functions.
>
> To keep lock sequence(cxlmd.dev -> cxl_rwsem.region -> cxl_rwsem.dpa)
> for avoiding deadlock. the interfaces have to find out the correct CXL
> memdev at first, holding lock in the sequence then checking if the DPA
> data has been changed before holding locks.
>
> Suggested-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Li Ming <ming.li@zohomail.com>
This is the patch I dropped right? I see the review tags are dropped. Does it need to be re-reviewed? Are there new changes?
DJ
> ---
> drivers/cxl/core/region.c | 112 ++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 88 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index f24b7e754727..1a509acc52a3 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -4101,12 +4101,70 @@ static int validate_region_offset(struct cxl_region *cxlr, u64 offset)
> return 0;
> }
>
> +static int __cxl_region_poison_lookup(struct cxl_region *cxlr, u64 offset,
> + struct dpa_result *res)
> +{
> + int rc;
> +
> + *res = (struct dpa_result){ .dpa = ULLONG_MAX, .cxlmd = NULL };
> +
> + if (validate_region_offset(cxlr, offset))
> + return -EINVAL;
> +
> + offset -= cxlr->params.cache_size;
> + rc = region_offset_to_dpa_result(cxlr, offset, res);
> + if (rc || !res->cxlmd || res->dpa == ULLONG_MAX) {
> + dev_dbg(&cxlr->dev,
> + "Failed to resolve DPA for region offset %#llx rc %d\n",
> + offset, rc);
> +
> + return rc ? rc : -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static int cxl_region_poison_lookup(struct cxl_region *cxlr, u64 offset,
> + struct dpa_result *res)
> +{
> + int rc;
> +
> + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
> + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
> + return rc;
> +
> + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
> + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
> + return rc;
> +
> + rc = __cxl_region_poison_lookup(cxlr, offset, res);
> + if (rc)
> + return rc;
> +
> + /*
> + * Hold the device reference in case
> + * the device is destroyed after that.
> + */
> + get_device(&res->cxlmd->dev);
> + return 0;
> +}
> +
> static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
> {
> - struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
> struct cxl_region *cxlr = data;
> + struct dpa_result res1, res2;
> int rc;
>
> + /* To retrieve the correct memdev */
> + rc = cxl_region_poison_lookup(cxlr, offset, &res1);
> + if (rc)
> + return rc;
> +
> + struct device *dev __free(put_device) = &res1.cxlmd->dev;
> + ACQUIRE(device_intr, devlock)(dev);
> + if ((rc = ACQUIRE_ERR(device_intr, &devlock)))
> + return rc;
> +
> ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
> if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
> return rc;
> @@ -4115,20 +4173,18 @@ static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
> if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
> return rc;
>
> - if (validate_region_offset(cxlr, offset))
> - return -EINVAL;
> -
> - offset -= cxlr->params.cache_size;
> - rc = region_offset_to_dpa_result(cxlr, offset, &result);
> - if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
> + /*
> + * Retrieve memdev and DPA data again in case that the data
> + * has been changed before holding locks.
> + */
> + rc = __cxl_region_poison_lookup(cxlr, offset, &res2);
> + if (rc || res2.cxlmd != res1.cxlmd || res2.dpa != res1.dpa) {
> dev_dbg(&cxlr->dev,
> - "Failed to resolve DPA for region offset %#llx rc %d\n",
> - offset, rc);
> -
> - return rc ? rc : -EINVAL;
> + "Error injection raced region reconfiguration: %d", rc);
> + return -ENXIO;
> }
>
> - return cxl_inject_poison_locked(result.cxlmd, result.dpa);
> + return cxl_inject_poison_locked(res2.cxlmd, res2.dpa);
> }
>
> DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL,
> @@ -4136,10 +4192,20 @@ DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL,
>
> static int cxl_region_debugfs_poison_clear(void *data, u64 offset)
> {
> - struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
> struct cxl_region *cxlr = data;
> + struct dpa_result res1, res2;
> int rc;
>
> + /* To retrieve the correct memdev */
> + rc = cxl_region_poison_lookup(cxlr, offset, &res1);
> + if (rc)
> + return rc;
> +
> + struct device *dev __free(put_device) = &res1.cxlmd->dev;
> + ACQUIRE(device_intr, devlock)(dev);
> + if ((rc = ACQUIRE_ERR(device_intr, &devlock)))
> + return rc;
> +
> ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
> if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem)))
> return rc;
> @@ -4148,20 +4214,18 @@ static int cxl_region_debugfs_poison_clear(void *data, u64 offset)
> if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
> return rc;
>
> - if (validate_region_offset(cxlr, offset))
> - return -EINVAL;
> -
> - offset -= cxlr->params.cache_size;
> - rc = region_offset_to_dpa_result(cxlr, offset, &result);
> - if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
> + /*
> + * Retrieve memdev and DPA data again in case that the data
> + * has been changed before holding locks.
> + */
> + rc = __cxl_region_poison_lookup(cxlr, offset, &res2);
> + if (rc || res2.cxlmd != res1.cxlmd || res2.dpa != res1.dpa) {
> dev_dbg(&cxlr->dev,
> - "Failed to resolve DPA for region offset %#llx rc %d\n",
> - offset, rc);
> -
> - return rc ? rc : -EINVAL;
> + "Error clearing raced region reconfiguration: %d", rc);
> + return -ENXIO;
> }
>
> - return cxl_clear_poison_locked(result.cxlmd, result.dpa);
> + return cxl_clear_poison_locked(res2.cxlmd, res2.dpa);
> }
>
> DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL,
>
> ---
> base-commit: d5f9bfc37906bbb737790af11f1537593f8778a5
> change-id: 20260319-hold_memdev_lock_for_region_poison_inject-clear-4b8020d84662
>
> Best regards,
在 2026/3/19 22:47, Dave Jiang 写道: > > On 3/19/26 7:12 AM, Li Ming wrote: >> cxl_dpa_to_region() has expectations that cxlmd->endpoint remains valid >> for the duration of the call. When userspace performs poison injection >> or clearing on a region via debugfs, holding cxl_rwsem.region and >> cxl_rwsem.dpa alone is insufficient, these locks do not prevent the >> retrieved CXL memdev from being destroyed, nor do they protect against >> concurrent driver detachment. Therefore, hold CXL memdev lock in the >> debugfs callbacks to ensure the cxlmd->dev.driver remains stable for the >> entire execution of the callback functions. >> >> To keep lock sequence(cxlmd.dev -> cxl_rwsem.region -> cxl_rwsem.dpa) >> for avoiding deadlock. the interfaces have to find out the correct CXL >> memdev at first, holding lock in the sequence then checking if the DPA >> data has been changed before holding locks. >> >> Suggested-by: Dan Williams <dan.j.williams@intel.com> >> Signed-off-by: Li Ming <ming.li@zohomail.com> > This is the patch I dropped right? I see the review tags are dropped. Does it need to be re-reviewed? Are there new changes? > > DJ It is not, the patch you dropped is for ensuring memdev poison injection/clearing would not access an invalid cxlmd->endpoint. The changes in this patch is for region poison injection/clearing. But you can keep dropping the patch you mentioned at present, I will tell you after confirming the solution. Thanks Ming
© 2016 - 2026 Red Hat, Inc.