drivers/cxl/core/region.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-)
In cxl_cancel_auto_attach(), it assumes cxled->pos is a valid index for
accessing p->targets[]. However, cxled->pos can be set to -ENXIO in
cxl_region_sort_targets() if cxl_calc_interleave_pos() fails. This
causes the driver to use a negative index to access p->targets[],
resulting in out-of-bounds access.
Fix it by walking p->targets[] instead of using cxled->pos directly.
Fixes: 87805c32e6ad ("cxl/region: Fix use-after-free from auto assembly failure")
Signed-off-by: Li Ming <ming.li@zohomail.com>
---
drivers/cxl/core/region.c | 35 ++++++++++++++++-------------------
1 file changed, 16 insertions(+), 19 deletions(-)
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index e50dc716d4e8..551228bc91f5 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2202,18 +2202,30 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return 0;
}
-static int cxl_region_by_target(struct device *dev, const void *data)
+static int cxl_region_remove_target(struct device *dev, void *data)
{
- const struct cxl_endpoint_decoder *cxled = data;
+ struct cxl_endpoint_decoder *cxled = data;
struct cxl_region_params *p;
struct cxl_region *cxlr;
+ int i;
if (!is_cxl_region(dev))
return 0;
cxlr = to_cxl_region(dev);
p = &cxlr->params;
- return p->targets[cxled->pos] == cxled;
+ for (i = 0; i < p->nr_targets; i++) {
+ if (p->targets[i] == cxled) {
+ p->nr_targets--;
+ cxled->state = CXL_DECODER_STATE_AUTO;
+ cxled->pos = -1;
+ p->targets[i] = NULL;
+
+ return 1;
+ }
+ }
+
+ return 0;
}
/*
@@ -2222,25 +2234,10 @@ static int cxl_region_by_target(struct device *dev, const void *data)
*/
static void cxl_cancel_auto_attach(struct cxl_endpoint_decoder *cxled)
{
- struct cxl_region_params *p;
- struct cxl_region *cxlr;
- int pos = cxled->pos;
-
if (cxled->state != CXL_DECODER_STATE_AUTO_STAGED)
return;
- struct device *dev __free(put_device) =
- bus_find_device(&cxl_bus_type, NULL, cxled, cxl_region_by_target);
- if (!dev)
- return;
-
- cxlr = to_cxl_region(dev);
- p = &cxlr->params;
-
- p->nr_targets--;
- cxled->state = CXL_DECODER_STATE_AUTO;
- cxled->pos = -1;
- p->targets[pos] = NULL;
+ bus_for_each_dev(&cxl_bus_type, NULL, cxled, cxl_region_remove_target);
}
static struct cxl_region *
---
base-commit: 5200f5f493f79f14bbdc349e402a40dfb32f23c8
change-id: 20260519-fix_out_of_bounds_access-8838759ee5a6
Best regards,
--
Li Ming <ming.li@zohomail.com>
On 5/19/26 6:23 AM, Li Ming wrote:
> In cxl_cancel_auto_attach(), it assumes cxled->pos is a valid index for
> accessing p->targets[]. However, cxled->pos can be set to -ENXIO in
It can be set to other error codes I think? I would just s/-ENXIO/negative errno/
> cxl_region_sort_targets() if cxl_calc_interleave_pos() fails. This
> causes the driver to use a negative index to access p->targets[],
> resulting in out-of-bounds access.
>
> Fix it by walking p->targets[] instead of using cxled->pos directly.
Does the comment in cxl_region_sort_targets() need to be updated with the new changes?
>
> Fixes: 87805c32e6ad ("cxl/region: Fix use-after-free from auto assembly failure")
> Signed-off-by: Li Ming <ming.li@zohomail.com>
The rest LGTM
DJ
> ---
> drivers/cxl/core/region.c | 35 ++++++++++++++++-------------------
> 1 file changed, 16 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index e50dc716d4e8..551228bc91f5 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2202,18 +2202,30 @@ static int cxl_region_attach(struct cxl_region *cxlr,
> return 0;
> }
>
> -static int cxl_region_by_target(struct device *dev, const void *data)
> +static int cxl_region_remove_target(struct device *dev, void *data)
> {
> - const struct cxl_endpoint_decoder *cxled = data;
> + struct cxl_endpoint_decoder *cxled = data;
> struct cxl_region_params *p;
> struct cxl_region *cxlr;
> + int i;
>
> if (!is_cxl_region(dev))
> return 0;
>
> cxlr = to_cxl_region(dev);
> p = &cxlr->params;
> - return p->targets[cxled->pos] == cxled;
> + for (i = 0; i < p->nr_targets; i++) {
> + if (p->targets[i] == cxled) {
> + p->nr_targets--;
> + cxled->state = CXL_DECODER_STATE_AUTO;
> + cxled->pos = -1;
> + p->targets[i] = NULL;
> +
> + return 1;
> + }
> + }
> +
> + return 0;
> }
>
> /*
> @@ -2222,25 +2234,10 @@ static int cxl_region_by_target(struct device *dev, const void *data)
> */
> static void cxl_cancel_auto_attach(struct cxl_endpoint_decoder *cxled)
> {
> - struct cxl_region_params *p;
> - struct cxl_region *cxlr;
> - int pos = cxled->pos;
> -
> if (cxled->state != CXL_DECODER_STATE_AUTO_STAGED)
> return;
>
> - struct device *dev __free(put_device) =
> - bus_find_device(&cxl_bus_type, NULL, cxled, cxl_region_by_target);
> - if (!dev)
> - return;
> -
> - cxlr = to_cxl_region(dev);
> - p = &cxlr->params;
> -
> - p->nr_targets--;
> - cxled->state = CXL_DECODER_STATE_AUTO;
> - cxled->pos = -1;
> - p->targets[pos] = NULL;
> + bus_for_each_dev(&cxl_bus_type, NULL, cxled, cxl_region_remove_target);
> }
>
> static struct cxl_region *
>
> ---
> base-commit: 5200f5f493f79f14bbdc349e402a40dfb32f23c8
> change-id: 20260519-fix_out_of_bounds_access-8838759ee5a6
>
> Best regards,
在 2026/5/20 01:18, Dave Jiang 写道:
>
> On 5/19/26 6:23 AM, Li Ming wrote:
>> In cxl_cancel_auto_attach(), it assumes cxled->pos is a valid index for
>> accessing p->targets[]. However, cxled->pos can be set to -ENXIO in
> It can be set to other error codes I think? I would just s/-ENXIO/negative errno/
Sure, Will do that.
>
>> cxl_region_sort_targets() if cxl_calc_interleave_pos() fails. This
>> causes the driver to use a negative index to access p->targets[],
>> resulting in out-of-bounds access.
>>
>> Fix it by walking p->targets[] instead of using cxled->pos directly.
> Does the comment in cxl_region_sort_targets() need to be updated with the new changes?
I'm not sure how to update the comment in cxl_region_sort_targets(). Any
suggestion?
Ming
>
>> Fixes: 87805c32e6ad ("cxl/region: Fix use-after-free from auto assembly failure")
>> Signed-off-by: Li Ming <ming.li@zohomail.com>
> The rest LGTM
>
> DJ
>
>> ---
>> drivers/cxl/core/region.c | 35 ++++++++++++++++-------------------
>> 1 file changed, 16 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>> index e50dc716d4e8..551228bc91f5 100644
>> --- a/drivers/cxl/core/region.c
>> +++ b/drivers/cxl/core/region.c
>> @@ -2202,18 +2202,30 @@ static int cxl_region_attach(struct cxl_region *cxlr,
>> return 0;
>> }
>>
>> -static int cxl_region_by_target(struct device *dev, const void *data)
>> +static int cxl_region_remove_target(struct device *dev, void *data)
>> {
>> - const struct cxl_endpoint_decoder *cxled = data;
>> + struct cxl_endpoint_decoder *cxled = data;
>> struct cxl_region_params *p;
>> struct cxl_region *cxlr;
>> + int i;
>>
>> if (!is_cxl_region(dev))
>> return 0;
>>
>> cxlr = to_cxl_region(dev);
>> p = &cxlr->params;
>> - return p->targets[cxled->pos] == cxled;
>> + for (i = 0; i < p->nr_targets; i++) {
>> + if (p->targets[i] == cxled) {
>> + p->nr_targets--;
>> + cxled->state = CXL_DECODER_STATE_AUTO;
>> + cxled->pos = -1;
>> + p->targets[i] = NULL;
>> +
>> + return 1;
>> + }
>> + }
>> +
>> + return 0;
>> }
>>
>> /*
>> @@ -2222,25 +2234,10 @@ static int cxl_region_by_target(struct device *dev, const void *data)
>> */
>> static void cxl_cancel_auto_attach(struct cxl_endpoint_decoder *cxled)
>> {
>> - struct cxl_region_params *p;
>> - struct cxl_region *cxlr;
>> - int pos = cxled->pos;
>> -
>> if (cxled->state != CXL_DECODER_STATE_AUTO_STAGED)
>> return;
>>
>> - struct device *dev __free(put_device) =
>> - bus_find_device(&cxl_bus_type, NULL, cxled, cxl_region_by_target);
>> - if (!dev)
>> - return;
>> -
>> - cxlr = to_cxl_region(dev);
>> - p = &cxlr->params;
>> -
>> - p->nr_targets--;
>> - cxled->state = CXL_DECODER_STATE_AUTO;
>> - cxled->pos = -1;
>> - p->targets[pos] = NULL;
>> + bus_for_each_dev(&cxl_bus_type, NULL, cxled, cxl_region_remove_target);
>> }
>>
>> static struct cxl_region *
>>
>> ---
>> base-commit: 5200f5f493f79f14bbdc349e402a40dfb32f23c8
>> change-id: 20260519-fix_out_of_bounds_access-8838759ee5a6
>>
>> Best regards,
On 5/20/26 5:30 AM, Li Ming wrote:
>
> 在 2026/5/20 01:18, Dave Jiang 写道:
>>
>> On 5/19/26 6:23 AM, Li Ming wrote:
>>> In cxl_cancel_auto_attach(), it assumes cxled->pos is a valid index for
>>> accessing p->targets[]. However, cxled->pos can be set to -ENXIO in
>> It can be set to other error codes I think? I would just s/-ENXIO/negative errno/
> Sure, Will do that.
>>
>>> cxl_region_sort_targets() if cxl_calc_interleave_pos() fails. This
>>> causes the driver to use a negative index to access p->targets[],
>>> resulting in out-of-bounds access.
>>>
>>> Fix it by walking p->targets[] instead of using cxled->pos directly.
>> Does the comment in cxl_region_sort_targets() need to be updated with the new changes?
>
> I'm not sure how to update the comment in cxl_region_sort_targets(). Any suggestion?
idk if we should just drop it entirely since the comment is no longer true. At least that second part. Alison?
>
>
> Ming
>
>>
>>> Fixes: 87805c32e6ad ("cxl/region: Fix use-after-free from auto assembly failure")
>>> Signed-off-by: Li Ming <ming.li@zohomail.com>
>> The rest LGTM
>>
>> DJ
>>
>>> ---
>>> drivers/cxl/core/region.c | 35 ++++++++++++++++-------------------
>>> 1 file changed, 16 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>>> index e50dc716d4e8..551228bc91f5 100644
>>> --- a/drivers/cxl/core/region.c
>>> +++ b/drivers/cxl/core/region.c
>>> @@ -2202,18 +2202,30 @@ static int cxl_region_attach(struct cxl_region *cxlr,
>>> return 0;
>>> }
>>> -static int cxl_region_by_target(struct device *dev, const void *data)
>>> +static int cxl_region_remove_target(struct device *dev, void *data)
>>> {
>>> - const struct cxl_endpoint_decoder *cxled = data;
>>> + struct cxl_endpoint_decoder *cxled = data;
>>> struct cxl_region_params *p;
>>> struct cxl_region *cxlr;
>>> + int i;
>>> if (!is_cxl_region(dev))
>>> return 0;
>>> cxlr = to_cxl_region(dev);
>>> p = &cxlr->params;
>>> - return p->targets[cxled->pos] == cxled;
>>> + for (i = 0; i < p->nr_targets; i++) {
>>> + if (p->targets[i] == cxled) {
>>> + p->nr_targets--;
>>> + cxled->state = CXL_DECODER_STATE_AUTO;
>>> + cxled->pos = -1;
>>> + p->targets[i] = NULL;
>>> +
>>> + return 1;
>>> + }
>>> + }
>>> +
>>> + return 0;
>>> }
>>> /*
>>> @@ -2222,25 +2234,10 @@ static int cxl_region_by_target(struct device *dev, const void *data)
>>> */
>>> static void cxl_cancel_auto_attach(struct cxl_endpoint_decoder *cxled)
>>> {
>>> - struct cxl_region_params *p;
>>> - struct cxl_region *cxlr;
>>> - int pos = cxled->pos;
>>> -
>>> if (cxled->state != CXL_DECODER_STATE_AUTO_STAGED)
>>> return;
>>> - struct device *dev __free(put_device) =
>>> - bus_find_device(&cxl_bus_type, NULL, cxled, cxl_region_by_target);
>>> - if (!dev)
>>> - return;
>>> -
>>> - cxlr = to_cxl_region(dev);
>>> - p = &cxlr->params;
>>> -
>>> - p->nr_targets--;
>>> - cxled->state = CXL_DECODER_STATE_AUTO;
>>> - cxled->pos = -1;
>>> - p->targets[pos] = NULL;
>>> + bus_for_each_dev(&cxl_bus_type, NULL, cxled, cxl_region_remove_target);
>>> }
>>> static struct cxl_region *
>>>
>>> ---
>>> base-commit: 5200f5f493f79f14bbdc349e402a40dfb32f23c8
>>> change-id: 20260519-fix_out_of_bounds_access-8838759ee5a6
>>>
>>> Best regards,
On Wed, May 20, 2026 at 07:59:21AM -0700, Dave Jiang wrote:
>
>
> On 5/20/26 5:30 AM, Li Ming wrote:
> >
> > 在 2026/5/20 01:18, Dave Jiang 写道:
> >>
> >> On 5/19/26 6:23 AM, Li Ming wrote:
> >>> In cxl_cancel_auto_attach(), it assumes cxled->pos is a valid index for
> >>> accessing p->targets[]. However, cxled->pos can be set to -ENXIO in
> >> It can be set to other error codes I think? I would just s/-ENXIO/negative errno/
> > Sure, Will do that.
> >>
> >>> cxl_region_sort_targets() if cxl_calc_interleave_pos() fails. This
> >>> causes the driver to use a negative index to access p->targets[],
> >>> resulting in out-of-bounds access.
> >>>
> >>> Fix it by walking p->targets[] instead of using cxled->pos directly.
> >> Does the comment in cxl_region_sort_targets() need to be updated with the new changes?
> >
> > I'm not sure how to update the comment in cxl_region_sort_targets(). Any suggestion?
>
> idk if we should just drop it entirely since the comment is no longer true. At least that second part. Alison?
I'd like to see it replaced w this so we continue to have the
debug info, but stop the lie that led to this issue.
/*
* Record that sorting failed, but still continue to calc
* cxled->pos so that cxl_calc_interleave_pos() emits its
* dev_dbg() for every member, which is useful for auto
* discovery debug.
*/
snip
> >>> +static int cxl_region_remove_target(struct device *dev, void *data)
> >>> {
> >>> - const struct cxl_endpoint_decoder *cxled = data;
> >>> + struct cxl_endpoint_decoder *cxled = data;
> >>> struct cxl_region_params *p;
> >>> struct cxl_region *cxlr;
> >>> + int i;
> >>> if (!is_cxl_region(dev))
> >>> return 0;
> >>> cxlr = to_cxl_region(dev);
> >>> p = &cxlr->params;
> >>> - return p->targets[cxled->pos] == cxled;
> >>> + for (i = 0; i < p->nr_targets; i++) {
> >>> + if (p->targets[i] == cxled) {
> >>> + p->nr_targets--;
> >>> + cxled->state = CXL_DECODER_STATE_AUTO;
> >>> + cxled->pos = -1;
> >>> + p->targets[i] = NULL;
> >>> +
> >>> + return 1;
> >>> + }
> >>> + }
Sashiko review looks like it is calling out a valid 'hole' issue above.
Does the array need to be compacted when we remove an entry that is not
the last. That would keep nr_targets same as 'first free slot', so
there are no NULL holes. I think that fix goes in a separate patch.
在 2026/5/21 14:52, Alison Schofield 写道:
> On Wed, May 20, 2026 at 07:59:21AM -0700, Dave Jiang wrote:
>>
>> On 5/20/26 5:30 AM, Li Ming wrote:
>>> 在 2026/5/20 01:18, Dave Jiang 写道:
>>>> On 5/19/26 6:23 AM, Li Ming wrote:
>>>>> In cxl_cancel_auto_attach(), it assumes cxled->pos is a valid index for
>>>>> accessing p->targets[]. However, cxled->pos can be set to -ENXIO in
>>>> It can be set to other error codes I think? I would just s/-ENXIO/negative errno/
>>> Sure, Will do that.
>>>>> cxl_region_sort_targets() if cxl_calc_interleave_pos() fails. This
>>>>> causes the driver to use a negative index to access p->targets[],
>>>>> resulting in out-of-bounds access.
>>>>>
>>>>> Fix it by walking p->targets[] instead of using cxled->pos directly.
>>>> Does the comment in cxl_region_sort_targets() need to be updated with the new changes?
>>> I'm not sure how to update the comment in cxl_region_sort_targets(). Any suggestion?
>> idk if we should just drop it entirely since the comment is no longer true. At least that second part. Alison?
> I'd like to see it replaced w this so we continue to have the
> debug info, but stop the lie that led to this issue.
>
> /*
> * Record that sorting failed, but still continue to calc
> * cxled->pos so that cxl_calc_interleave_pos() emits its
> * dev_dbg() for every member, which is useful for auto
> * discovery debug.
> */
>
> snip
>
>>>>> +static int cxl_region_remove_target(struct device *dev, void *data)
>>>>> {
>>>>> - const struct cxl_endpoint_decoder *cxled = data;
>>>>> + struct cxl_endpoint_decoder *cxled = data;
>>>>> struct cxl_region_params *p;
>>>>> struct cxl_region *cxlr;
>>>>> + int i;
>>>>> if (!is_cxl_region(dev))
>>>>> return 0;
>>>>> cxlr = to_cxl_region(dev);
>>>>> p = &cxlr->params;
>>>>> - return p->targets[cxled->pos] == cxled;
>>>>> + for (i = 0; i < p->nr_targets; i++) {
>>>>> + if (p->targets[i] == cxled) {
>>>>> + p->nr_targets--;
>>>>> + cxled->state = CXL_DECODER_STATE_AUTO;
>>>>> + cxled->pos = -1;
>>>>> + p->targets[i] = NULL;
>>>>> +
>>>>> + return 1;
>>>>> + }
>>>>> + }
> Sashiko review looks like it is calling out a valid 'hole' issue above.
> Does the array need to be compacted when we remove an entry that is not
> the last. That would keep nr_targets same as 'first free slot', so
> there are no NULL holes. I think that fix goes in a separate patch.
Good catch, how about using p->interleave_ways instead of p->nr_targets
as the loop condition? I think it can solve the problem.
BTW, where did you get the Sashiko review? I didn't get any email from it.
Ming
On 5/22/26 1:49 AM, Li Ming wrote:
>
> 在 2026/5/21 14:52, Alison Schofield 写道:
>> On Wed, May 20, 2026 at 07:59:21AM -0700, Dave Jiang wrote:
>>>
>>> On 5/20/26 5:30 AM, Li Ming wrote:
>>>> 在 2026/5/20 01:18, Dave Jiang 写道:
>>>>> On 5/19/26 6:23 AM, Li Ming wrote:
>>>>>> In cxl_cancel_auto_attach(), it assumes cxled->pos is a valid index for
>>>>>> accessing p->targets[]. However, cxled->pos can be set to -ENXIO in
>>>>> It can be set to other error codes I think? I would just s/-ENXIO/negative errno/
>>>> Sure, Will do that.
>>>>>> cxl_region_sort_targets() if cxl_calc_interleave_pos() fails. This
>>>>>> causes the driver to use a negative index to access p->targets[],
>>>>>> resulting in out-of-bounds access.
>>>>>>
>>>>>> Fix it by walking p->targets[] instead of using cxled->pos directly.
>>>>> Does the comment in cxl_region_sort_targets() need to be updated with the new changes?
>>>> I'm not sure how to update the comment in cxl_region_sort_targets(). Any suggestion?
>>> idk if we should just drop it entirely since the comment is no longer true. At least that second part. Alison?
>> I'd like to see it replaced w this so we continue to have the
>> debug info, but stop the lie that led to this issue.
>>
>> /*
>> * Record that sorting failed, but still continue to calc
>> * cxled->pos so that cxl_calc_interleave_pos() emits its
>> * dev_dbg() for every member, which is useful for auto
>> * discovery debug.
>> */
>>
>> snip
>>
>>>>>> +static int cxl_region_remove_target(struct device *dev, void *data)
>>>>>> {
>>>>>> - const struct cxl_endpoint_decoder *cxled = data;
>>>>>> + struct cxl_endpoint_decoder *cxled = data;
>>>>>> struct cxl_region_params *p;
>>>>>> struct cxl_region *cxlr;
>>>>>> + int i;
>>>>>> if (!is_cxl_region(dev))
>>>>>> return 0;
>>>>>> cxlr = to_cxl_region(dev);
>>>>>> p = &cxlr->params;
>>>>>> - return p->targets[cxled->pos] == cxled;
>>>>>> + for (i = 0; i < p->nr_targets; i++) {
>>>>>> + if (p->targets[i] == cxled) {
>>>>>> + p->nr_targets--;
>>>>>> + cxled->state = CXL_DECODER_STATE_AUTO;
>>>>>> + cxled->pos = -1;
>>>>>> + p->targets[i] = NULL;
>>>>>> +
>>>>>> + return 1;
>>>>>> + }
>>>>>> + }
>> Sashiko review looks like it is calling out a valid 'hole' issue above.
>> Does the array need to be compacted when we remove an entry that is not
>> the last. That would keep nr_targets same as 'first free slot', so
>> there are no NULL holes. I think that fix goes in a separate patch.
>
> Good catch, how about using p->interleave_ways instead of p->nr_targets as the loop condition? I think it can solve the problem.
>
> BTW, where did you get the Sashiko review? I didn't get any email from it.
You'll have to look, unless you cc Sachiko with your patch submission. I've asked to have linux-cxl added to Sachiko review.
https://sashiko.dev/#/?list=org.kernel.vger.linux-cxl
>
>
> Ming
>
© 2016 - 2026 Red Hat, Inc.