Both detach_target() and cxld_unregister() want to tear down a cxl_region
when an endpoint decoder is either detached or destroyed.
When a region is to be destroyed cxl_region_detach() releases
cxl_region_rwsem unbinds the cxl_region driver and re-acquires the rwsem.
This "reverse" locking pattern is difficult to reason about, not amenable
to scope-based cleanup, and the minor differences in the calling context of
detach_target() and cxld_unregister() currently results in the
cxl_decoder_kill_region() wrapper.
Introduce cxl_decoder_detach() to wrap a core __cxl_decoder_detach() that
serves both cases. I.e. either detaching a known position in a region
(interruptible), or detaching an endpoint decoder if it is found to be a
member of a region (uninterruptible).
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Jonathan Cameron <jonathan.cameron@huawei.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Alison Schofield <alison.schofield@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
drivers/cxl/core/core.h | 15 +++++-
drivers/cxl/core/port.c | 9 ++--
drivers/cxl/core/region.c | 103 ++++++++++++++++++++++----------------
3 files changed, 75 insertions(+), 52 deletions(-)
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 29b61828a847..2be37084409f 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -12,6 +12,11 @@ extern const struct device_type cxl_pmu_type;
extern struct attribute_group cxl_base_attribute_group;
+enum cxl_detach_mode {
+ DETACH_ONLY,
+ DETACH_INVALIDATE,
+};
+
#ifdef CONFIG_CXL_REGION
extern struct device_attribute dev_attr_create_pmem_region;
extern struct device_attribute dev_attr_create_ram_region;
@@ -20,7 +25,11 @@ extern struct device_attribute dev_attr_region;
extern const struct device_type cxl_pmem_region_type;
extern const struct device_type cxl_dax_region_type;
extern const struct device_type cxl_region_type;
-void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
+
+int cxl_decoder_detach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ enum cxl_detach_mode mode);
+
#define CXL_REGION_ATTR(x) (&dev_attr_##x.attr)
#define CXL_REGION_TYPE(x) (&cxl_region_type)
#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
@@ -48,7 +57,9 @@ static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
{
return 0;
}
-static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
+static inline int cxl_decoder_detach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled,
+ int pos, enum cxl_detach_mode mode)
{
}
static inline int cxl_region_init(void)
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index eb46c6764d20..087a20a9ee1c 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -2001,12 +2001,9 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL");
static void cxld_unregister(void *dev)
{
- struct cxl_endpoint_decoder *cxled;
-
- if (is_endpoint_decoder(dev)) {
- cxled = to_cxl_endpoint_decoder(dev);
- cxl_decoder_kill_region(cxled);
- }
+ if (is_endpoint_decoder(dev))
+ cxl_decoder_detach(NULL, to_cxl_endpoint_decoder(dev), -1,
+ DETACH_INVALIDATE);
device_unregister(dev);
}
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 2a97fa9a394f..4314aaed8ad8 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2135,27 +2135,43 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return 0;
}
-static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
+static struct cxl_region *
+__cxl_decoder_detach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ enum cxl_detach_mode mode)
{
- struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
- struct cxl_region *cxlr = cxled->cxld.region;
struct cxl_region_params *p;
- int rc = 0;
lockdep_assert_held_write(&cxl_region_rwsem);
- if (!cxlr)
- return 0;
+ if (!cxled) {
+ p = &cxlr->params;
- p = &cxlr->params;
- get_device(&cxlr->dev);
+ if (pos >= p->interleave_ways) {
+ dev_dbg(&cxlr->dev, "position %d out of range %d\n",
+ pos, p->interleave_ways);
+ return ERR_PTR(-ENXIO);
+ }
+
+ if (!p->targets[pos])
+ return NULL;
+ cxled = p->targets[pos];
+ } else {
+ cxlr = cxled->cxld.region;
+ if (!cxlr)
+ return NULL;
+ p = &cxlr->params;
+ }
+
+ if (mode == DETACH_INVALIDATE)
+ cxled->part = -1;
if (p->state > CXL_CONFIG_ACTIVE) {
cxl_region_decode_reset(cxlr, p->interleave_ways);
p->state = CXL_CONFIG_ACTIVE;
}
- for (iter = ep_port; !is_cxl_root(iter);
+ for (struct cxl_port *iter = cxled_to_port(cxled); !is_cxl_root(iter);
iter = to_cxl_port(iter->dev.parent))
cxl_port_detach_region(iter, cxlr, cxled);
@@ -2166,7 +2182,7 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
cxled->pos);
- goto out;
+ return NULL;
}
if (p->state == CXL_CONFIG_ACTIVE) {
@@ -2180,21 +2196,42 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
.end = -1,
};
- /* notify the region driver that one of its targets has departed */
- up_write(&cxl_region_rwsem);
- device_release_driver(&cxlr->dev);
- down_write(&cxl_region_rwsem);
-out:
- put_device(&cxlr->dev);
- return rc;
+ get_device(&cxlr->dev);
+ return cxlr;
}
-void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
+/*
+ * Cleanup a decoder's interest in a region. There are 2 cases to
+ * handle, removing an unknown @cxled from a known position in a region
+ * (detach_target()) or removing a known @cxled from an unknown @cxlr
+ * (cxld_unregister())
+ *
+ * When the detachment finds a region release the region driver.
+ */
+int cxl_decoder_detach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ enum cxl_detach_mode mode)
{
- down_write(&cxl_region_rwsem);
- cxled->part = -1;
- cxl_region_detach(cxled);
+ struct cxl_region *detach;
+
+ /* when the decoder is being destroyed lock unconditionally */
+ if (mode == DETACH_INVALIDATE)
+ down_write(&cxl_region_rwsem);
+ else {
+ int rc = down_write_killable(&cxl_region_rwsem);
+
+ if (rc)
+ return rc;
+ }
+
+ detach = __cxl_decoder_detach(cxlr, cxled, pos, mode);
up_write(&cxl_region_rwsem);
+
+ if (detach) {
+ device_release_driver(&detach->dev);
+ put_device(&detach->dev);
+ }
+ return 0;
}
static int attach_target(struct cxl_region *cxlr,
@@ -2225,29 +2262,7 @@ static int attach_target(struct cxl_region *cxlr,
static int detach_target(struct cxl_region *cxlr, int pos)
{
- struct cxl_region_params *p = &cxlr->params;
- int rc;
-
- rc = down_write_killable(&cxl_region_rwsem);
- if (rc)
- return rc;
-
- if (pos >= p->interleave_ways) {
- dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
- p->interleave_ways);
- rc = -ENXIO;
- goto out;
- }
-
- if (!p->targets[pos]) {
- rc = 0;
- goto out;
- }
-
- rc = cxl_region_detach(p->targets[pos]);
-out:
- up_write(&cxl_region_rwsem);
- return rc;
+ return cxl_decoder_detach(cxlr, NULL, pos, DETACH_ONLY);
}
static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
--
2.50.0
On Fri, 11 Jul 2025 16:49:31 -0700 Dan Williams <dan.j.williams@intel.com> wrote: > Both detach_target() and cxld_unregister() want to tear down a cxl_region > when an endpoint decoder is either detached or destroyed. > > When a region is to be destroyed cxl_region_detach() releases > cxl_region_rwsem unbinds the cxl_region driver and re-acquires the rwsem. > > This "reverse" locking pattern is difficult to reason about, not amenable > to scope-based cleanup, and the minor differences in the calling context of > detach_target() and cxld_unregister() currently results in the > cxl_decoder_kill_region() wrapper. > > Introduce cxl_decoder_detach() to wrap a core __cxl_decoder_detach() that > serves both cases. I.e. either detaching a known position in a region > (interruptible), or detaching an endpoint decoder if it is found to be a > member of a region (uninterruptible). > > Cc: Davidlohr Bueso <dave@stgolabs.net> > Cc: Jonathan Cameron <jonathan.cameron@huawei.com> > Cc: Dave Jiang <dave.jiang@intel.com> > Cc: Alison Schofield <alison.schofield@intel.com> > Cc: Vishal Verma <vishal.l.verma@intel.com> > Cc: Ira Weiny <ira.weiny@intel.com> > Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org> > Signed-off-by: Dan Williams <dan.j.williams@intel.com> One query inline about what I think is a change in when a reference count is held on the region device. I'm struggling to reason about whether that change would have always been safe or if there is another change here that makes it fine now? (or whether I'm just misreading the change). Jonathan > --- > drivers/cxl/core/core.h | 15 +++++- > drivers/cxl/core/port.c | 9 ++-- > drivers/cxl/core/region.c | 103 ++++++++++++++++++++++---------------- > 3 files changed, 75 insertions(+), 52 deletions(-) > > diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h > index 29b61828a847..2be37084409f 100644 > --- a/drivers/cxl/core/core.h > +++ b/drivers/cxl/core/core.h > @@ -12,6 +12,11 @@ extern const struct device_type cxl_pmu_type; > > extern struct attribute_group cxl_base_attribute_group; > > +enum cxl_detach_mode { > + DETACH_ONLY, > + DETACH_INVALIDATE, > +}; > + > #ifdef CONFIG_CXL_REGION > extern struct device_attribute dev_attr_create_pmem_region; > extern struct device_attribute dev_attr_create_ram_region; > @@ -20,7 +25,11 @@ extern struct device_attribute dev_attr_region; > extern const struct device_type cxl_pmem_region_type; > extern const struct device_type cxl_dax_region_type; > extern const struct device_type cxl_region_type; > -void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled); > + > +int cxl_decoder_detach(struct cxl_region *cxlr, > + struct cxl_endpoint_decoder *cxled, int pos, > + enum cxl_detach_mode mode); > + > #define CXL_REGION_ATTR(x) (&dev_attr_##x.attr) > #define CXL_REGION_TYPE(x) (&cxl_region_type) > #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr), > @@ -48,7 +57,9 @@ static inline int cxl_get_poison_by_endpoint(struct cxl_port *port) > { > return 0; > } > -static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) > +static inline int cxl_decoder_detach(struct cxl_region *cxlr, > + struct cxl_endpoint_decoder *cxled, > + int pos, enum cxl_detach_mode mode) > { > } > static inline int cxl_region_init(void) > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c > index eb46c6764d20..087a20a9ee1c 100644 > --- a/drivers/cxl/core/port.c > +++ b/drivers/cxl/core/port.c > @@ -2001,12 +2001,9 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL"); > > static void cxld_unregister(void *dev) > { > - struct cxl_endpoint_decoder *cxled; > - > - if (is_endpoint_decoder(dev)) { > - cxled = to_cxl_endpoint_decoder(dev); > - cxl_decoder_kill_region(cxled); > - } > + if (is_endpoint_decoder(dev)) > + cxl_decoder_detach(NULL, to_cxl_endpoint_decoder(dev), -1, > + DETACH_INVALIDATE); > > device_unregister(dev); > } > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c > index 2a97fa9a394f..4314aaed8ad8 100644 > --- a/drivers/cxl/core/region.c > +++ b/drivers/cxl/core/region.c > @@ -2135,27 +2135,43 @@ static int cxl_region_attach(struct cxl_region *cxlr, > return 0; > } > > -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) > +static struct cxl_region * > +__cxl_decoder_detach(struct cxl_region *cxlr, > + struct cxl_endpoint_decoder *cxled, int pos, > + enum cxl_detach_mode mode) > { > - struct cxl_port *iter, *ep_port = cxled_to_port(cxled); > - struct cxl_region *cxlr = cxled->cxld.region; > struct cxl_region_params *p; > - int rc = 0; > > lockdep_assert_held_write(&cxl_region_rwsem); > > - if (!cxlr) > - return 0; > + if (!cxled) { > + p = &cxlr->params; > > - p = &cxlr->params; > - get_device(&cxlr->dev); This is a fairly nasty patch to unwind and fully understand but I'm nervous that in the existing we have a get_device(&cxlr->dev) before potential cxl_region_decode_reset(cxlr, ...) and now we don't. I'm not sure if that is a real problem though, it just makes me nervous. > + if (pos >= p->interleave_ways) { > + dev_dbg(&cxlr->dev, "position %d out of range %d\n", > + pos, p->interleave_ways); > + return ERR_PTR(-ENXIO); > + } > + > + if (!p->targets[pos]) > + return NULL; > + cxled = p->targets[pos]; > + } else { > + cxlr = cxled->cxld.region; > + if (!cxlr) > + return NULL; > + p = &cxlr->params; > + } > + > + if (mode == DETACH_INVALIDATE) > + cxled->part = -1; > > if (p->state > CXL_CONFIG_ACTIVE) { > cxl_region_decode_reset(cxlr, p->interleave_ways); > p->state = CXL_CONFIG_ACTIVE; > } > > - for (iter = ep_port; !is_cxl_root(iter); > + for (struct cxl_port *iter = cxled_to_port(cxled); !is_cxl_root(iter); > iter = to_cxl_port(iter->dev.parent)) > cxl_port_detach_region(iter, cxlr, cxled); > > @@ -2166,7 +2182,7 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) > dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n", > dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), > cxled->pos); > - goto out; > + return NULL; > } > > if (p->state == CXL_CONFIG_ACTIVE) { > @@ -2180,21 +2196,42 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) > .end = -1, > }; > > - /* notify the region driver that one of its targets has departed */ > - up_write(&cxl_region_rwsem); > - device_release_driver(&cxlr->dev); > - down_write(&cxl_region_rwsem); > -out: > - put_device(&cxlr->dev); > - return rc; > + get_device(&cxlr->dev); > + return cxlr; > } > > -void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) > +/* > + * Cleanup a decoder's interest in a region. There are 2 cases to > + * handle, removing an unknown @cxled from a known position in a region > + * (detach_target()) or removing a known @cxled from an unknown @cxlr > + * (cxld_unregister()) > + * > + * When the detachment finds a region release the region driver. > + */ > +int cxl_decoder_detach(struct cxl_region *cxlr, > + struct cxl_endpoint_decoder *cxled, int pos, > + enum cxl_detach_mode mode) > { > - down_write(&cxl_region_rwsem); > - cxled->part = -1; > - cxl_region_detach(cxled); > + struct cxl_region *detach; > + > + /* when the decoder is being destroyed lock unconditionally */ > + if (mode == DETACH_INVALIDATE) > + down_write(&cxl_region_rwsem); > + else { > + int rc = down_write_killable(&cxl_region_rwsem); > + > + if (rc) > + return rc; > + } > + > + detach = __cxl_decoder_detach(cxlr, cxled, pos, mode); > up_write(&cxl_region_rwsem); > + > + if (detach) { > + device_release_driver(&detach->dev); > + put_device(&detach->dev); > + } > + return 0; > } > > static int attach_target(struct cxl_region *cxlr, > @@ -2225,29 +2262,7 @@ static int attach_target(struct cxl_region *cxlr, > > static int detach_target(struct cxl_region *cxlr, int pos) > { > - struct cxl_region_params *p = &cxlr->params; > - int rc; > - > - rc = down_write_killable(&cxl_region_rwsem); > - if (rc) > - return rc; > - > - if (pos >= p->interleave_ways) { > - dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, > - p->interleave_ways); > - rc = -ENXIO; > - goto out; > - } > - > - if (!p->targets[pos]) { > - rc = 0; > - goto out; > - } > - > - rc = cxl_region_detach(p->targets[pos]); > -out: > - up_write(&cxl_region_rwsem); > - return rc; > + return cxl_decoder_detach(cxlr, NULL, pos, DETACH_ONLY); > } > > static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
Jonathan Cameron wrote: > On Fri, 11 Jul 2025 16:49:31 -0700 > Dan Williams <dan.j.williams@intel.com> wrote: > > > Both detach_target() and cxld_unregister() want to tear down a cxl_region > > when an endpoint decoder is either detached or destroyed. > > > > When a region is to be destroyed cxl_region_detach() releases > > cxl_region_rwsem unbinds the cxl_region driver and re-acquires the rwsem. > > > > This "reverse" locking pattern is difficult to reason about, not amenable > > to scope-based cleanup, and the minor differences in the calling context of > > detach_target() and cxld_unregister() currently results in the > > cxl_decoder_kill_region() wrapper. > > > > Introduce cxl_decoder_detach() to wrap a core __cxl_decoder_detach() that > > serves both cases. I.e. either detaching a known position in a region > > (interruptible), or detaching an endpoint decoder if it is found to be a > > member of a region (uninterruptible). > > > > Cc: Davidlohr Bueso <dave@stgolabs.net> > > Cc: Jonathan Cameron <jonathan.cameron@huawei.com> > > Cc: Dave Jiang <dave.jiang@intel.com> > > Cc: Alison Schofield <alison.schofield@intel.com> > > Cc: Vishal Verma <vishal.l.verma@intel.com> > > Cc: Ira Weiny <ira.weiny@intel.com> > > Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org> > > Signed-off-by: Dan Williams <dan.j.williams@intel.com> > One query inline about what I think is a change in when a reference count is > held on the region device. I'm struggling to reason about whether that change > would have always been safe or if there is another change here that makes > it fine now? > > (or whether I'm just misreading the change). > > Jonathan [..] > > > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c > > index eb46c6764d20..087a20a9ee1c 100644 > > --- a/drivers/cxl/core/port.c > > +++ b/drivers/cxl/core/port.c > > @@ -2001,12 +2001,9 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL"); > > > > static void cxld_unregister(void *dev) > > { > > - struct cxl_endpoint_decoder *cxled; > > - > > - if (is_endpoint_decoder(dev)) { > > - cxled = to_cxl_endpoint_decoder(dev); > > - cxl_decoder_kill_region(cxled); > > - } > > + if (is_endpoint_decoder(dev)) > > + cxl_decoder_detach(NULL, to_cxl_endpoint_decoder(dev), -1, > > + DETACH_INVALIDATE); > > > > device_unregister(dev); > > } > > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c > > index 2a97fa9a394f..4314aaed8ad8 100644 > > --- a/drivers/cxl/core/region.c > > +++ b/drivers/cxl/core/region.c > > @@ -2135,27 +2135,43 @@ static int cxl_region_attach(struct cxl_region *cxlr, > > return 0; > > } > > > > -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) > > +static struct cxl_region * > > +__cxl_decoder_detach(struct cxl_region *cxlr, > > + struct cxl_endpoint_decoder *cxled, int pos, > > + enum cxl_detach_mode mode) > > { > > - struct cxl_port *iter, *ep_port = cxled_to_port(cxled); > > - struct cxl_region *cxlr = cxled->cxld.region; > > struct cxl_region_params *p; > > - int rc = 0; > > > > lockdep_assert_held_write(&cxl_region_rwsem); > > > > - if (!cxlr) > > - return 0; > > + if (!cxled) { > > + p = &cxlr->params; > > > > - p = &cxlr->params; > > - get_device(&cxlr->dev); > > This is a fairly nasty patch to unwind and fully understand but > I'm nervous that in the existing we have a get_device(&cxlr->dev) > before potential cxl_region_decode_reset(cxlr, ...) > and now we don't. I'm not sure if that is a real problem though, > it just makes me nervous. The reference count is not for cxl_region_decode_reset(). The reference count is to keep the region from being freed when the lock is dropped so that device_release_driver() can see if it has work to do. The lookup result from endpoint decoder to region is only stable while the lock is held and the region could be freed at any point after that. The pin holds that off until we are done with the potential follow-on work from detaching a decoder. The reference is not taken in other paths like region sysfs because userspace activity in sysfs attributes holds off attribute unregistration.
On Tue, 15 Jul 2025 09:44:29 -0700 dan.j.williams@intel.com wrote: > Jonathan Cameron wrote: > > On Fri, 11 Jul 2025 16:49:31 -0700 > > Dan Williams <dan.j.williams@intel.com> wrote: > > > > > Both detach_target() and cxld_unregister() want to tear down a cxl_region > > > when an endpoint decoder is either detached or destroyed. > > > > > > When a region is to be destroyed cxl_region_detach() releases > > > cxl_region_rwsem unbinds the cxl_region driver and re-acquires the rwsem. > > > > > > This "reverse" locking pattern is difficult to reason about, not amenable > > > to scope-based cleanup, and the minor differences in the calling context of > > > detach_target() and cxld_unregister() currently results in the > > > cxl_decoder_kill_region() wrapper. > > > > > > Introduce cxl_decoder_detach() to wrap a core __cxl_decoder_detach() that > > > serves both cases. I.e. either detaching a known position in a region > > > (interruptible), or detaching an endpoint decoder if it is found to be a > > > member of a region (uninterruptible). > > > > > > Cc: Davidlohr Bueso <dave@stgolabs.net> > > > Cc: Jonathan Cameron <jonathan.cameron@huawei.com> > > > Cc: Dave Jiang <dave.jiang@intel.com> > > > Cc: Alison Schofield <alison.schofield@intel.com> > > > Cc: Vishal Verma <vishal.l.verma@intel.com> > > > Cc: Ira Weiny <ira.weiny@intel.com> > > > Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org> > > > Signed-off-by: Dan Williams <dan.j.williams@intel.com> > > One query inline about what I think is a change in when a reference count is > > held on the region device. I'm struggling to reason about whether that change > > would have always been safe or if there is another change here that makes > > it fine now? > > > > (or whether I'm just misreading the change). > > > > Jonathan > [..] > > > > > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c > > > index eb46c6764d20..087a20a9ee1c 100644 > > > --- a/drivers/cxl/core/port.c > > > +++ b/drivers/cxl/core/port.c > > > @@ -2001,12 +2001,9 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL"); > > > > > > static void cxld_unregister(void *dev) > > > { > > > - struct cxl_endpoint_decoder *cxled; > > > - > > > - if (is_endpoint_decoder(dev)) { > > > - cxled = to_cxl_endpoint_decoder(dev); > > > - cxl_decoder_kill_region(cxled); > > > - } > > > + if (is_endpoint_decoder(dev)) > > > + cxl_decoder_detach(NULL, to_cxl_endpoint_decoder(dev), -1, > > > + DETACH_INVALIDATE); > > > > > > device_unregister(dev); > > > } > > > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c > > > index 2a97fa9a394f..4314aaed8ad8 100644 > > > --- a/drivers/cxl/core/region.c > > > +++ b/drivers/cxl/core/region.c > > > @@ -2135,27 +2135,43 @@ static int cxl_region_attach(struct cxl_region *cxlr, > > > return 0; > > > } > > > > > > -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) > > > +static struct cxl_region * > > > +__cxl_decoder_detach(struct cxl_region *cxlr, > > > + struct cxl_endpoint_decoder *cxled, int pos, > > > + enum cxl_detach_mode mode) > > > { > > > - struct cxl_port *iter, *ep_port = cxled_to_port(cxled); > > > - struct cxl_region *cxlr = cxled->cxld.region; > > > struct cxl_region_params *p; > > > - int rc = 0; > > > > > > lockdep_assert_held_write(&cxl_region_rwsem); > > > > > > - if (!cxlr) > > > - return 0; > > > + if (!cxled) { > > > + p = &cxlr->params; > > > > > > - p = &cxlr->params; > > > - get_device(&cxlr->dev); > > > > This is a fairly nasty patch to unwind and fully understand but > > I'm nervous that in the existing we have a get_device(&cxlr->dev) > > before potential cxl_region_decode_reset(cxlr, ...) > > and now we don't. I'm not sure if that is a real problem though, > > it just makes me nervous. > > The reference count is not for cxl_region_decode_reset(). The reference > count is to keep the region from being freed when the lock is dropped so > that device_release_driver() can see if it has work to do. > > The lookup result from endpoint decoder to region is only stable while > the lock is held and the region could be freed at any point after that. > The pin holds that off until we are done with the potential follow-on > work from detaching a decoder. > > The reference is not taken in other paths like region sysfs because > userspace activity in sysfs attributes holds off attribute > unregistration. Fair enough and thanks for the explanation. Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> >
On 7/11/25 4:49 PM, Dan Williams wrote: > Both detach_target() and cxld_unregister() want to tear down a cxl_region > when an endpoint decoder is either detached or destroyed. > > When a region is to be destroyed cxl_region_detach() releases > cxl_region_rwsem unbinds the cxl_region driver and re-acquires the rwsem. > I think this one needs some commas? When a region is to be destroyed cxl_region_detach() releases cxl_region_rwsem, unbinds the cxl_region driver, and re-acquires the rwsem. > This "reverse" locking pattern is difficult to reason about, not amenable > to scope-based cleanup, and the minor differences in the calling context of > detach_target() and cxld_unregister() currently results in the > cxl_decoder_kill_region() wrapper. > > Introduce cxl_decoder_detach() to wrap a core __cxl_decoder_detach() that > serves both cases. I.e. either detaching a known position in a region > (interruptible), or detaching an endpoint decoder if it is found to be a > member of a region (uninterruptible). > > Cc: Davidlohr Bueso <dave@stgolabs.net> > Cc: Jonathan Cameron <jonathan.cameron@huawei.com> > Cc: Dave Jiang <dave.jiang@intel.com> > Cc: Alison Schofield <alison.schofield@intel.com> > Cc: Vishal Verma <vishal.l.verma@intel.com> > Cc: Ira Weiny <ira.weiny@intel.com> > Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org> > Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> > --- > drivers/cxl/core/core.h | 15 +++++- > drivers/cxl/core/port.c | 9 ++-- > drivers/cxl/core/region.c | 103 ++++++++++++++++++++++---------------- > 3 files changed, 75 insertions(+), 52 deletions(-) > > diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h > index 29b61828a847..2be37084409f 100644 > --- a/drivers/cxl/core/core.h > +++ b/drivers/cxl/core/core.h > @@ -12,6 +12,11 @@ extern const struct device_type cxl_pmu_type; > > extern struct attribute_group cxl_base_attribute_group; > > +enum cxl_detach_mode { > + DETACH_ONLY, > + DETACH_INVALIDATE, > +}; > + > #ifdef CONFIG_CXL_REGION > extern struct device_attribute dev_attr_create_pmem_region; > extern struct device_attribute dev_attr_create_ram_region; > @@ -20,7 +25,11 @@ extern struct device_attribute dev_attr_region; > extern const struct device_type cxl_pmem_region_type; > extern const struct device_type cxl_dax_region_type; > extern const struct device_type cxl_region_type; > -void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled); > + > +int cxl_decoder_detach(struct cxl_region *cxlr, > + struct cxl_endpoint_decoder *cxled, int pos, > + enum cxl_detach_mode mode); > + > #define CXL_REGION_ATTR(x) (&dev_attr_##x.attr) > #define CXL_REGION_TYPE(x) (&cxl_region_type) > #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr), > @@ -48,7 +57,9 @@ static inline int cxl_get_poison_by_endpoint(struct cxl_port *port) > { > return 0; > } > -static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) > +static inline int cxl_decoder_detach(struct cxl_region *cxlr, > + struct cxl_endpoint_decoder *cxled, > + int pos, enum cxl_detach_mode mode) > { > } > static inline int cxl_region_init(void) > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c > index eb46c6764d20..087a20a9ee1c 100644 > --- a/drivers/cxl/core/port.c > +++ b/drivers/cxl/core/port.c > @@ -2001,12 +2001,9 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL"); > > static void cxld_unregister(void *dev) > { > - struct cxl_endpoint_decoder *cxled; > - > - if (is_endpoint_decoder(dev)) { > - cxled = to_cxl_endpoint_decoder(dev); > - cxl_decoder_kill_region(cxled); > - } > + if (is_endpoint_decoder(dev)) > + cxl_decoder_detach(NULL, to_cxl_endpoint_decoder(dev), -1, > + DETACH_INVALIDATE); > > device_unregister(dev); > } > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c > index 2a97fa9a394f..4314aaed8ad8 100644 > --- a/drivers/cxl/core/region.c > +++ b/drivers/cxl/core/region.c > @@ -2135,27 +2135,43 @@ static int cxl_region_attach(struct cxl_region *cxlr, > return 0; > } > > -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) > +static struct cxl_region * > +__cxl_decoder_detach(struct cxl_region *cxlr, > + struct cxl_endpoint_decoder *cxled, int pos, > + enum cxl_detach_mode mode) > { > - struct cxl_port *iter, *ep_port = cxled_to_port(cxled); > - struct cxl_region *cxlr = cxled->cxld.region; > struct cxl_region_params *p; > - int rc = 0; > > lockdep_assert_held_write(&cxl_region_rwsem); > > - if (!cxlr) > - return 0; > + if (!cxled) { > + p = &cxlr->params; > > - p = &cxlr->params; > - get_device(&cxlr->dev); > + if (pos >= p->interleave_ways) { > + dev_dbg(&cxlr->dev, "position %d out of range %d\n", > + pos, p->interleave_ways); > + return ERR_PTR(-ENXIO); > + } > + > + if (!p->targets[pos]) > + return NULL; > + cxled = p->targets[pos]; > + } else { > + cxlr = cxled->cxld.region; > + if (!cxlr) > + return NULL; > + p = &cxlr->params; > + } > + > + if (mode == DETACH_INVALIDATE) > + cxled->part = -1; > > if (p->state > CXL_CONFIG_ACTIVE) { > cxl_region_decode_reset(cxlr, p->interleave_ways); > p->state = CXL_CONFIG_ACTIVE; > } > > - for (iter = ep_port; !is_cxl_root(iter); > + for (struct cxl_port *iter = cxled_to_port(cxled); !is_cxl_root(iter); > iter = to_cxl_port(iter->dev.parent)) > cxl_port_detach_region(iter, cxlr, cxled); > > @@ -2166,7 +2182,7 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) > dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n", > dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), > cxled->pos); > - goto out; > + return NULL; > } > > if (p->state == CXL_CONFIG_ACTIVE) { > @@ -2180,21 +2196,42 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) > .end = -1, > }; > > - /* notify the region driver that one of its targets has departed */ > - up_write(&cxl_region_rwsem); > - device_release_driver(&cxlr->dev); > - down_write(&cxl_region_rwsem); > -out: > - put_device(&cxlr->dev); > - return rc; > + get_device(&cxlr->dev); > + return cxlr; > } > > -void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) > +/* > + * Cleanup a decoder's interest in a region. There are 2 cases to > + * handle, removing an unknown @cxled from a known position in a region > + * (detach_target()) or removing a known @cxled from an unknown @cxlr > + * (cxld_unregister()) > + * > + * When the detachment finds a region release the region driver. > + */ > +int cxl_decoder_detach(struct cxl_region *cxlr, > + struct cxl_endpoint_decoder *cxled, int pos, > + enum cxl_detach_mode mode) > { > - down_write(&cxl_region_rwsem); > - cxled->part = -1; > - cxl_region_detach(cxled); > + struct cxl_region *detach; > + > + /* when the decoder is being destroyed lock unconditionally */ > + if (mode == DETACH_INVALIDATE) > + down_write(&cxl_region_rwsem); > + else { > + int rc = down_write_killable(&cxl_region_rwsem); > + > + if (rc) > + return rc; > + } > + > + detach = __cxl_decoder_detach(cxlr, cxled, pos, mode); > up_write(&cxl_region_rwsem); > + > + if (detach) { > + device_release_driver(&detach->dev); > + put_device(&detach->dev); > + } > + return 0; > } > > static int attach_target(struct cxl_region *cxlr, > @@ -2225,29 +2262,7 @@ static int attach_target(struct cxl_region *cxlr, > > static int detach_target(struct cxl_region *cxlr, int pos) > { > - struct cxl_region_params *p = &cxlr->params; > - int rc; > - > - rc = down_write_killable(&cxl_region_rwsem); > - if (rc) > - return rc; > - > - if (pos >= p->interleave_ways) { > - dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, > - p->interleave_ways); > - rc = -ENXIO; > - goto out; > - } > - > - if (!p->targets[pos]) { > - rc = 0; > - goto out; > - } > - > - rc = cxl_region_detach(p->targets[pos]); > -out: > - up_write(&cxl_region_rwsem); > - return rc; > + return cxl_decoder_detach(cxlr, NULL, pos, DETACH_ONLY); > } > > static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
Dave Jiang wrote: > > > On 7/11/25 4:49 PM, Dan Williams wrote: > > Both detach_target() and cxld_unregister() want to tear down a cxl_region > > when an endpoint decoder is either detached or destroyed. > > > > When a region is to be destroyed cxl_region_detach() releases > > cxl_region_rwsem unbinds the cxl_region driver and re-acquires the rwsem. > > > > I think this one needs some commas? > > When a region is to be destroyed cxl_region_detach() releases > cxl_region_rwsem, unbinds the cxl_region driver, and re-acquires the rwsem. Yes, modulo whether you believe in that last Oxford comma.
On Saturday, July 12, 2025 1:49:31 AM Central European Summer Time Dan Williams wrote: > Both detach_target() and cxld_unregister() want to tear down a cxl_region > when an endpoint decoder is either detached or destroyed. > > When a region is to be destroyed cxl_region_detach() releases > cxl_region_rwsem unbinds the cxl_region driver and re-acquires the rwsem. > > This "reverse" locking pattern is difficult to reason about, not amenable > to scope-based cleanup, and the minor differences in the calling context of > detach_target() and cxld_unregister() currently results in the > cxl_decoder_kill_region() wrapper. > > Introduce cxl_decoder_detach() to wrap a core __cxl_decoder_detach() that > serves both cases. I.e. either detaching a known position in a region > (interruptible), or detaching an endpoint decoder if it is found to be a > member of a region (uninterruptible). > > Cc: Davidlohr Bueso <dave@stgolabs.net> > Cc: Jonathan Cameron <jonathan.cameron@huawei.com> > Cc: Dave Jiang <dave.jiang@intel.com> > Cc: Alison Schofield <alison.schofield@intel.com> > Cc: Vishal Verma <vishal.l.verma@intel.com> > Cc: Ira Weiny <ira.weiny@intel.com> > Acked-by: "Peter Zijlstra (Intel)" <peterz@infradead.org> > Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com> > --- > drivers/cxl/core/core.h | 15 +++++- > drivers/cxl/core/port.c | 9 ++-- > drivers/cxl/core/region.c | 103 ++++++++++++++++++++++---------------- > 3 files changed, 75 insertions(+), 52 deletions(-) > > diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h > index 29b61828a847..2be37084409f 100644 > --- a/drivers/cxl/core/core.h > +++ b/drivers/cxl/core/core.h > @@ -12,6 +12,11 @@ extern const struct device_type cxl_pmu_type; > > extern struct attribute_group cxl_base_attribute_group; > > +enum cxl_detach_mode { > + DETACH_ONLY, > + DETACH_INVALIDATE, > +}; > + > #ifdef CONFIG_CXL_REGION > extern struct device_attribute dev_attr_create_pmem_region; > extern struct device_attribute dev_attr_create_ram_region; > @@ -20,7 +25,11 @@ extern struct device_attribute dev_attr_region; > extern const struct device_type cxl_pmem_region_type; > extern const struct device_type cxl_dax_region_type; > extern const struct device_type cxl_region_type; > -void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled); > + > +int cxl_decoder_detach(struct cxl_region *cxlr, > + struct cxl_endpoint_decoder *cxled, int pos, > + enum cxl_detach_mode mode); > + > #define CXL_REGION_ATTR(x) (&dev_attr_##x.attr) > #define CXL_REGION_TYPE(x) (&cxl_region_type) > #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr), > @@ -48,7 +57,9 @@ static inline int cxl_get_poison_by_endpoint(struct cxl_port *port) > { > return 0; > } > -static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) > +static inline int cxl_decoder_detach(struct cxl_region *cxlr, > + struct cxl_endpoint_decoder *cxled, > + int pos, enum cxl_detach_mode mode) > { > } > static inline int cxl_region_init(void) > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c > index eb46c6764d20..087a20a9ee1c 100644 > --- a/drivers/cxl/core/port.c > +++ b/drivers/cxl/core/port.c > @@ -2001,12 +2001,9 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL"); > > static void cxld_unregister(void *dev) > { > - struct cxl_endpoint_decoder *cxled; > - > - if (is_endpoint_decoder(dev)) { > - cxled = to_cxl_endpoint_decoder(dev); > - cxl_decoder_kill_region(cxled); > - } > + if (is_endpoint_decoder(dev)) > + cxl_decoder_detach(NULL, to_cxl_endpoint_decoder(dev), -1, > + DETACH_INVALIDATE); > > device_unregister(dev); > } > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c > index 2a97fa9a394f..4314aaed8ad8 100644 > --- a/drivers/cxl/core/region.c > +++ b/drivers/cxl/core/region.c > @@ -2135,27 +2135,43 @@ static int cxl_region_attach(struct cxl_region *cxlr, > return 0; > } > > -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) > +static struct cxl_region * > +__cxl_decoder_detach(struct cxl_region *cxlr, > + struct cxl_endpoint_decoder *cxled, int pos, > + enum cxl_detach_mode mode) > { > - struct cxl_port *iter, *ep_port = cxled_to_port(cxled); > - struct cxl_region *cxlr = cxled->cxld.region; > struct cxl_region_params *p; > - int rc = 0; > > lockdep_assert_held_write(&cxl_region_rwsem); > > - if (!cxlr) > - return 0; > + if (!cxled) { > + p = &cxlr->params; > > - p = &cxlr->params; > - get_device(&cxlr->dev); > + if (pos >= p->interleave_ways) { > + dev_dbg(&cxlr->dev, "position %d out of range %d\n", > + pos, p->interleave_ways); > + return ERR_PTR(-ENXIO); > + } > + > + if (!p->targets[pos]) > + return NULL; > + cxled = p->targets[pos]; > + } else { > + cxlr = cxled->cxld.region; > + if (!cxlr) > + return NULL; > + p = &cxlr->params; > + } > + > + if (mode == DETACH_INVALIDATE) > + cxled->part = -1; > > if (p->state > CXL_CONFIG_ACTIVE) { > cxl_region_decode_reset(cxlr, p->interleave_ways); > p->state = CXL_CONFIG_ACTIVE; > } > > - for (iter = ep_port; !is_cxl_root(iter); > + for (struct cxl_port *iter = cxled_to_port(cxled); !is_cxl_root(iter); > iter = to_cxl_port(iter->dev.parent)) > cxl_port_detach_region(iter, cxlr, cxled); > > @@ -2166,7 +2182,7 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) > dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n", > dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), > cxled->pos); > - goto out; > + return NULL; > } > > if (p->state == CXL_CONFIG_ACTIVE) { > @@ -2180,21 +2196,42 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) > .end = -1, > }; > > - /* notify the region driver that one of its targets has departed */ > - up_write(&cxl_region_rwsem); > - device_release_driver(&cxlr->dev); > - down_write(&cxl_region_rwsem); > -out: > - put_device(&cxlr->dev); > - return rc; > + get_device(&cxlr->dev); > + return cxlr; > } > > -void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) > +/* > + * Cleanup a decoder's interest in a region. There are 2 cases to > + * handle, removing an unknown @cxled from a known position in a region > + * (detach_target()) or removing a known @cxled from an unknown @cxlr > + * (cxld_unregister()) > + * > + * When the detachment finds a region release the region driver. > + */ > +int cxl_decoder_detach(struct cxl_region *cxlr, > + struct cxl_endpoint_decoder *cxled, int pos, > + enum cxl_detach_mode mode) > { > - down_write(&cxl_region_rwsem); > - cxled->part = -1; > - cxl_region_detach(cxled); > + struct cxl_region *detach; > + > + /* when the decoder is being destroyed lock unconditionally */ > + if (mode == DETACH_INVALIDATE) > + down_write(&cxl_region_rwsem); > + else { > + int rc = down_write_killable(&cxl_region_rwsem); > + > + if (rc) > + return rc; > + } > + > + detach = __cxl_decoder_detach(cxlr, cxled, pos, mode); > up_write(&cxl_region_rwsem); > + > + if (detach) { > + device_release_driver(&detach->dev); > + put_device(&detach->dev); > + } > + return 0; > } > > static int attach_target(struct cxl_region *cxlr, > @@ -2225,29 +2262,7 @@ static int attach_target(struct cxl_region *cxlr, > > static int detach_target(struct cxl_region *cxlr, int pos) > { > - struct cxl_region_params *p = &cxlr->params; > - int rc; > - > - rc = down_write_killable(&cxl_region_rwsem); > - if (rc) > - return rc; > - > - if (pos >= p->interleave_ways) { > - dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos, > - p->interleave_ways); > - rc = -ENXIO; > - goto out; > - } > - > - if (!p->targets[pos]) { > - rc = 0; > - goto out; > - } > - > - rc = cxl_region_detach(p->targets[pos]); > -out: > - up_write(&cxl_region_rwsem); > - return rc; > + return cxl_decoder_detach(cxlr, NULL, pos, DETACH_ONLY); > } > > static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos, > -- > 2.50.0 > > >
© 2016 - 2025 Red Hat, Inc.