When trying to run perf and sysfs mode simultaneously, the WARN_ON()
in tmc_etr_enable_hw() is triggered sometimes:
WARNING: CPU: 42 PID: 3911571 at drivers/hwtracing/coresight/coresight-tmc-etr.c:1060 tmc_etr_enable_hw+0xc0/0xd8 [coresight_tmc]
[..snip..]
Call trace:
tmc_etr_enable_hw+0xc0/0xd8 [coresight_tmc] (P)
tmc_enable_etr_sink+0x11c/0x250 [coresight_tmc] (L)
tmc_enable_etr_sink+0x11c/0x250 [coresight_tmc]
coresight_enable_path+0x1c8/0x218 [coresight]
coresight_enable_sysfs+0xa4/0x228 [coresight]
enable_source_store+0x58/0xa8 [coresight]
dev_attr_store+0x20/0x40
sysfs_kf_write+0x4c/0x68
kernfs_fop_write_iter+0x120/0x1b8
vfs_write+0x2c8/0x388
ksys_write+0x74/0x108
__arm64_sys_write+0x24/0x38
el0_svc_common.constprop.0+0x64/0x148
do_el0_svc+0x24/0x38
el0_svc+0x3c/0x130
el0t_64_sync_handler+0xc8/0xd0
el0t_64_sync+0x1ac/0x1b0
---[ end trace 0000000000000000 ]---
Since the sysfs buffer allocation and the hardware enablement is not
in the same critical region, it's possible to race with the perf
mode:
[sysfs mode] [perf mode]
tmc_etr_get_sysfs_buffer()
spin_lock(&drvdata->spinlock)
[sysfs buffer allocation]
spin_unlock(&drvdata->spinlock)
spin_lock(&drvdata->spinlock)
tmc_etr_enable_hw()
drvdata->etr_buf = etr_perf->etr_buf
spin_unlock(&drvdata->spinlock)
spin_lock(&drvdata->spinlock)
tmc_etr_enable_hw()
WARN_ON(drvdata->etr_buf) // WARN sicne etr_buf initialized at
the perf side
spin_unlock(&drvdata->spinlock)
A race condition is introduced here, perf always prioritizes execution, and
warnings can lead to concerns about potential hidden bugs, such as getting
out of sync.
To fix this, configure the tmc-etr mode before invoking enable_etr_perf()
or enable_etr_sysfs(), explicitly check if the tmc-etr sink is already
enabled in a different mode, and simplily the setup and checks for "mode".
To prevent race conditions between mode transitions.
Fixes: 296b01fd106e ("coresight: Refactor out buffer allocation function for ETR")
Reported-by: Yicong Yang <yangyicong@hisilicon.com>
Closes: https://lore.kernel.org/linux-arm-kernel/20241202092419.11777-2-yangyicong@huawei.com/
Signed-off-by: Junhao He <hejunhao3@huawei.com>
---
.../hwtracing/coresight/coresight-tmc-etr.c | 73 +++++++++++--------
1 file changed, 43 insertions(+), 30 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index b07fcdb3fe1a..252a57a8e94e 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1263,11 +1263,6 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev)
raw_spin_lock_irqsave(&drvdata->spinlock, flags);
}
- if (drvdata->reading || coresight_get_mode(csdev) == CS_MODE_PERF) {
- ret = -EBUSY;
- goto out;
- }
-
/*
* If we don't have a buffer or it doesn't match the requested size,
* use the buffer allocated above. Otherwise reuse the existing buffer.
@@ -1278,7 +1273,6 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev)
drvdata->sysfs_buf = new_buf;
}
-out:
raw_spin_unlock_irqrestore(&drvdata->spinlock, flags);
/* Free memory outside the spinlock if need be */
@@ -1289,7 +1283,7 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev)
static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
{
- int ret = 0;
+ int ret;
unsigned long flags;
struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
struct etr_buf *sysfs_buf = tmc_etr_get_sysfs_buffer(csdev);
@@ -1299,23 +1293,10 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
raw_spin_lock_irqsave(&drvdata->spinlock, flags);
- /*
- * In sysFS mode we can have multiple writers per sink. Since this
- * sink is already enabled no memory is needed and the HW need not be
- * touched, even if the buffer size has changed.
- */
- if (coresight_get_mode(csdev) == CS_MODE_SYSFS) {
- csdev->refcnt++;
- goto out;
- }
-
ret = tmc_etr_enable_hw(drvdata, sysfs_buf);
- if (!ret) {
- coresight_set_mode(csdev, CS_MODE_SYSFS);
+ if (!ret)
csdev->refcnt++;
- }
-out:
raw_spin_unlock_irqrestore(&drvdata->spinlock, flags);
if (!ret)
@@ -1735,11 +1716,6 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle);
raw_spin_lock_irqsave(&drvdata->spinlock, flags);
- /* Don't use this sink if it is already claimed by sysFS */
- if (coresight_get_mode(csdev) == CS_MODE_SYSFS) {
- rc = -EBUSY;
- goto unlock_out;
- }
if (WARN_ON(!etr_perf || !etr_perf->etr_buf)) {
rc = -EINVAL;
@@ -1768,7 +1744,6 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
if (!rc) {
/* Associate with monitored process. */
drvdata->pid = pid;
- coresight_set_mode(csdev, CS_MODE_PERF);
drvdata->perf_buf = etr_perf->etr_buf;
csdev->refcnt++;
}
@@ -1781,14 +1756,52 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
static int tmc_enable_etr_sink(struct coresight_device *csdev,
enum cs_mode mode, void *data)
{
+ struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+ enum cs_mode old_mode;
+ int rc = -EINVAL;
+
+ scoped_guard(spinlock_irqsave, &drvdata->spinlock) {
+ old_mode = coresight_get_mode(csdev);
+ if (old_mode != CS_MODE_DISABLED && old_mode != mode)
+ return -EBUSY;
+
+ if (drvdata->reading)
+ return -EBUSY;
+
+ /* In sysFS mode we can have multiple writers per sink. */
+ if (old_mode == CS_MODE_SYSFS) {
+ csdev->refcnt++;
+ return 0;
+ }
+
+ /*
+ * minor note: In sysFS mode, the task1 get locked first, it setup
+ * etr mode to SYSFS. Then task2 get locked,it will directly return
+ * success even when the tmc-etr is not enabled at this moment.
+ * Ultimately, task1 will still successfully enable tmc-etr.
+ * This is a transient state and does not cause an anomaly.
+ */
+ coresight_set_mode(csdev, mode);
+ }
+
switch (mode) {
case CS_MODE_SYSFS:
- return tmc_enable_etr_sink_sysfs(csdev);
+ rc = tmc_enable_etr_sink_sysfs(csdev);
+ break;
case CS_MODE_PERF:
- return tmc_enable_etr_sink_perf(csdev, data);
+ rc = tmc_enable_etr_sink_perf(csdev, data);
+ break;
default:
- return -EINVAL;
+ rc = -EINVAL;
}
+
+ if (rc && old_mode != mode) {
+ scoped_guard(spinlock_irqsave, &drvdata->spinlock) {
+ coresight_set_mode(csdev, old_mode);
+ }
+ }
+
+ return rc;
}
static int tmc_disable_etr_sink(struct coresight_device *csdev)
--
2.33.0
On Fri, Jun 20, 2025 at 03:54:11PM +0800, Junhao He wrote: [...] > To fix this, configure the tmc-etr mode before invoking enable_etr_perf() > or enable_etr_sysfs(), explicitly check if the tmc-etr sink is already > enabled in a different mode, and simplily the setup and checks for "mode". > To prevent race conditions between mode transitions. I have a similiar fixing for CTI driver, see: https://lore.kernel.org/linux-arm-kernel/20250701-arm_cs_pm_fix_v3-v2-0-23ebb864fcc1@arm.com/T/#maccd68b460fb8d74ccdd3504163d8f486f04178b [...] > @@ -1781,14 +1756,52 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) > static int tmc_enable_etr_sink(struct coresight_device *csdev, > enum cs_mode mode, void *data) > { > + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); > + enum cs_mode old_mode; > + int rc = -EINVAL; > + > + scoped_guard(spinlock_irqsave, &drvdata->spinlock) { scoped_guard(raw_spinlock_irqsave, &drvdata->spinlock) { I am concerned that the spinlock is acquired and released for several times in a single flow. It is possible to hit some corner case, e.g., - CPU0 acquires the lock, set sink as SYSFS mode, and releases the lock; - CPU1 acquires the lock, detects the SYSFS mode has been set, directly bail out, then enable ETM. The problem is the sink has not been enabled yet. This leads to CPU1 to enable the tracer but prior to sink's enabling. The key piont is we should ensure the mode is consistent to the hardware state. I will take a bit time for if we can have a better idea for this. > + old_mode = coresight_get_mode(csdev); > + if (old_mode != CS_MODE_DISABLED && old_mode != mode) > + return -EBUSY; > + > + if (drvdata->reading) > + return -EBUSY; > + > + /* In sysFS mode we can have multiple writers per sink. */ > + if (old_mode == CS_MODE_SYSFS) { > + csdev->refcnt++; I am just wandering if we can unify the "refcnt" code for both SYSFS mode and Perf mode, and as a result, we can consolidate the code cross different drivers. Something like: if (!csdev->refcnt) { coresight_set_mode(csdev, mode); } else { /* The device has been configured with an incompatible mode */ if (coresight_get_mode(csdev) != mode) return -EBUSY; csdev->refcnt++; } Then when disable the device: csdev->refcnt--; if (!csdev->refcnt) coresight_set_mode(csdev, CS_MODE_DISABLED); We should not check "drvdata->reading" when enable or disable sink, as it is a flag to track if reading the trace buffer, it is irrelevant to the sink device's enabling or disabling. Please verify perf mode (especially for system wide session) to avoid anything missed. Thanks, Leo > + return 0; > + } > + > + /* > + * minor note: In sysFS mode, the task1 get locked first, it setup > + * etr mode to SYSFS. Then task2 get locked,it will directly return > + * success even when the tmc-etr is not enabled at this moment. > + * Ultimately, task1 will still successfully enable tmc-etr. > + * This is a transient state and does not cause an anomaly. > + */ > + coresight_set_mode(csdev, mode); > + } > + > switch (mode) { > case CS_MODE_SYSFS: > - return tmc_enable_etr_sink_sysfs(csdev); > + rc = tmc_enable_etr_sink_sysfs(csdev); > + break; > case CS_MODE_PERF: > - return tmc_enable_etr_sink_perf(csdev, data); > + rc = tmc_enable_etr_sink_perf(csdev, data); > + break; > default: > - return -EINVAL; > + rc = -EINVAL; > } > + > + if (rc && old_mode != mode) { > + scoped_guard(spinlock_irqsave, &drvdata->spinlock) { > + coresight_set_mode(csdev, old_mode); > + } > + } > + > + return rc; > } > > static int tmc_disable_etr_sink(struct coresight_device *csdev) > -- > 2.33.0 >
On 2025/7/3 0:47, Leo Yan wrote: > On Fri, Jun 20, 2025 at 03:54:11PM +0800, Junhao He wrote: > > [...] > >> To fix this, configure the tmc-etr mode before invoking enable_etr_perf() >> or enable_etr_sysfs(), explicitly check if the tmc-etr sink is already >> enabled in a different mode, and simplily the setup and checks for "mode". >> To prevent race conditions between mode transitions. > I have a similiar fixing for CTI driver, see: > https://lore.kernel.org/linux-arm-kernel/20250701-arm_cs_pm_fix_v3-v2-0-23ebb864fcc1@arm.com/T/#maccd68b460fb8d74ccdd3504163d8f486f04178b > > [...] > >> @@ -1781,14 +1756,52 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) >> static int tmc_enable_etr_sink(struct coresight_device *csdev, >> enum cs_mode mode, void *data) >> { >> + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); >> + enum cs_mode old_mode; >> + int rc = -EINVAL; >> + >> + scoped_guard(spinlock_irqsave, &drvdata->spinlock) { > scoped_guard(raw_spinlock_irqsave, &drvdata->spinlock) { Ok, will fix it. > I am concerned that the spinlock is acquired and released for several > times in a single flow. It is possible to hit some corner case, e.g., > > - CPU0 acquires the lock, set sink as SYSFS mode, and releases the lock; > - CPU1 acquires the lock, detects the SYSFS mode has been set, > directly bail out, then enable ETM. > > The problem is the sink has not been enabled yet. This leads to CPU1 > to enable the tracer but prior to sink's enabling. Yes, you are right. So I added a minor note for that. The impact of this corner case may not have been fully considered. > The key piont is we should ensure the mode is consistent to the > hardware state. I will take a bit time for if we can have a better idea > for this. > >> + old_mode = coresight_get_mode(csdev); >> + if (old_mode != CS_MODE_DISABLED && old_mode != mode) >> + return -EBUSY; >> + >> + if (drvdata->reading) >> + return -EBUSY; >> + >> + /* In sysFS mode we can have multiple writers per sink. */ >> + if (old_mode == CS_MODE_SYSFS) { >> + csdev->refcnt++; > I am just wandering if we can unify the "refcnt" code for both SYSFS > mode and Perf mode, and as a result, we can consolidate the code cross > different drivers. > > Something like: > > if (!csdev->refcnt) { > coresight_set_mode(csdev, mode); > } else { > /* The device has been configured with an incompatible mode */ > if (coresight_get_mode(csdev) != mode) > return -EBUSY; > > csdev->refcnt++; we need to check the mode, cannot directly increase the reference count In performance mode. > } > > Then when disable the device: > > csdev->refcnt--; > if (!csdev->refcnt) > coresight_set_mode(csdev, CS_MODE_DISABLED); > > We should not check "drvdata->reading" when enable or disable sink, as > it is a flag to track if reading the trace buffer, it is irrelevant to > the sink device's enabling or disabling. In sysfs mode, it is necessary to check drvdata->reading. > > Please verify perf mode (especially for system wide session) to avoid > anything missed. I will refactor this code and upload a new version that includes all of your suggested solutions. Thanks, Junhao > Thanks, > Leo > >> + return 0; >> + } >> + >> + /* >> + * minor note: In sysFS mode, the task1 get locked first, it setup >> + * etr mode to SYSFS. Then task2 get locked,it will directly return >> + * success even when the tmc-etr is not enabled at this moment. >> + * Ultimately, task1 will still successfully enable tmc-etr. >> + * This is a transient state and does not cause an anomaly. >> + */ >> + coresight_set_mode(csdev, mode); >> + } >> + >> switch (mode) { >> case CS_MODE_SYSFS: >> - return tmc_enable_etr_sink_sysfs(csdev); >> + rc = tmc_enable_etr_sink_sysfs(csdev); >> + break; >> case CS_MODE_PERF: >> - return tmc_enable_etr_sink_perf(csdev, data); >> + rc = tmc_enable_etr_sink_perf(csdev, data); >> + break; >> default: >> - return -EINVAL; >> + rc = -EINVAL; >> } >> + >> + if (rc && old_mode != mode) { >> + scoped_guard(spinlock_irqsave, &drvdata->spinlock) { >> + coresight_set_mode(csdev, old_mode); >> + } >> + } >> + >> + return rc; >> } >> >> static int tmc_disable_etr_sink(struct coresight_device *csdev) >> -- >> 2.33.0 >> > . >
On Fri, 20 Jun 2025 15:54:11 +0800 Junhao He <hejunhao3@huawei.com> wrote: > When trying to run perf and sysfs mode simultaneously, the WARN_ON() > in tmc_etr_enable_hw() is triggered sometimes: > > WARNING: CPU: 42 PID: 3911571 at drivers/hwtracing/coresight/coresight-tmc-etr.c:1060 tmc_etr_enable_hw+0xc0/0xd8 [coresight_tmc] > [..snip..] > Call trace: > tmc_etr_enable_hw+0xc0/0xd8 [coresight_tmc] (P) > tmc_enable_etr_sink+0x11c/0x250 [coresight_tmc] (L) > tmc_enable_etr_sink+0x11c/0x250 [coresight_tmc] > coresight_enable_path+0x1c8/0x218 [coresight] > coresight_enable_sysfs+0xa4/0x228 [coresight] > enable_source_store+0x58/0xa8 [coresight] > dev_attr_store+0x20/0x40 > sysfs_kf_write+0x4c/0x68 > kernfs_fop_write_iter+0x120/0x1b8 > vfs_write+0x2c8/0x388 > ksys_write+0x74/0x108 > __arm64_sys_write+0x24/0x38 > el0_svc_common.constprop.0+0x64/0x148 > do_el0_svc+0x24/0x38 > el0_svc+0x3c/0x130 > el0t_64_sync_handler+0xc8/0xd0 > el0t_64_sync+0x1ac/0x1b0 > ---[ end trace 0000000000000000 ]--- > > Since the sysfs buffer allocation and the hardware enablement is not > in the same critical region, it's possible to race with the perf > > mode: > [sysfs mode] [perf mode] > tmc_etr_get_sysfs_buffer() > spin_lock(&drvdata->spinlock) > [sysfs buffer allocation] > spin_unlock(&drvdata->spinlock) > spin_lock(&drvdata->spinlock) > tmc_etr_enable_hw() > drvdata->etr_buf = etr_perf->etr_buf > spin_unlock(&drvdata->spinlock) > spin_lock(&drvdata->spinlock) > tmc_etr_enable_hw() > WARN_ON(drvdata->etr_buf) // WARN sicne etr_buf initialized at > the perf side > spin_unlock(&drvdata->spinlock) > > A race condition is introduced here, perf always prioritizes execution, and > warnings can lead to concerns about potential hidden bugs, such as getting > out of sync. > > To fix this, configure the tmc-etr mode before invoking enable_etr_perf() > or enable_etr_sysfs(), explicitly check if the tmc-etr sink is already > enabled in a different mode, and simplily the setup and checks for "mode". > To prevent race conditions between mode transitions. > > Fixes: 296b01fd106e ("coresight: Refactor out buffer allocation function for ETR") > Reported-by: Yicong Yang <yangyicong@hisilicon.com> > Closes: https://lore.kernel.org/linux-arm-kernel/20241202092419.11777-2-yangyicong@huawei.com/ > Signed-off-by: Junhao He <hejunhao3@huawei.com> > --- > .../hwtracing/coresight/coresight-tmc-etr.c | 73 +++++++++++-------- > 1 file changed, 43 insertions(+), 30 deletions(-) > > diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c > index b07fcdb3fe1a..252a57a8e94e 100644 > --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c > +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c > @@ -1263,11 +1263,6 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev) > raw_spin_lock_irqsave(&drvdata->spinlock, flags); > } > > - if (drvdata->reading || coresight_get_mode(csdev) == CS_MODE_PERF) { > - ret = -EBUSY; > - goto out; > - } > - > /* > * If we don't have a buffer or it doesn't match the requested size, > * use the buffer allocated above. Otherwise reuse the existing buffer. > @@ -1278,7 +1273,6 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev) > drvdata->sysfs_buf = new_buf; > } > > -out: > raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); > > /* Free memory outside the spinlock if need be */ > @@ -1289,7 +1283,7 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev) > > static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) > { > - int ret = 0; > + int ret; > unsigned long flags; > struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); > struct etr_buf *sysfs_buf = tmc_etr_get_sysfs_buffer(csdev); > @@ -1299,23 +1293,10 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) > > raw_spin_lock_irqsave(&drvdata->spinlock, flags); > > - /* > - * In sysFS mode we can have multiple writers per sink. Since this > - * sink is already enabled no memory is needed and the HW need not be > - * touched, even if the buffer size has changed. > - */ > - if (coresight_get_mode(csdev) == CS_MODE_SYSFS) { > - csdev->refcnt++; > - goto out; > - } > - > ret = tmc_etr_enable_hw(drvdata, sysfs_buf); > - if (!ret) { > - coresight_set_mode(csdev, CS_MODE_SYSFS); > + if (!ret) > csdev->refcnt++; > - } > > -out: > raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); > > if (!ret) > @@ -1735,11 +1716,6 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) > struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle); > > raw_spin_lock_irqsave(&drvdata->spinlock, flags); > - /* Don't use this sink if it is already claimed by sysFS */ > - if (coresight_get_mode(csdev) == CS_MODE_SYSFS) { > - rc = -EBUSY; > - goto unlock_out; > - } > > if (WARN_ON(!etr_perf || !etr_perf->etr_buf)) { > rc = -EINVAL; > @@ -1768,7 +1744,6 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) > if (!rc) { > /* Associate with monitored process. */ > drvdata->pid = pid; > - coresight_set_mode(csdev, CS_MODE_PERF); > drvdata->perf_buf = etr_perf->etr_buf; > csdev->refcnt++; > } > @@ -1781,14 +1756,52 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) > static int tmc_enable_etr_sink(struct coresight_device *csdev, > enum cs_mode mode, void *data) > { > + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); > + enum cs_mode old_mode; > + int rc = -EINVAL; > + > + scoped_guard(spinlock_irqsave, &drvdata->spinlock) { > + old_mode = coresight_get_mode(csdev); > + if (old_mode != CS_MODE_DISABLED && old_mode != mode) > + return -EBUSY; > + > + if (drvdata->reading) > + return -EBUSY; > + > + /* In sysFS mode we can have multiple writers per sink. */ > + if (old_mode == CS_MODE_SYSFS) { This seems odd. You are incrementing the reference count when potentially changing away from CS_MODE_SYSFS? Is this meant to only occur if old_mode == mode && old_mode == CS_MODE_SYSFS? In the code prior to this patch this bit only ran in tmc_enable_etr_sink_sysfs() which was only called based on the mode being configured (mode here I think) being sysfs. That no longer looks to be the case. Maybe I'm missing something as the flows around this are complex. > + csdev->refcnt++; > + return 0; > + } > + > + /* > + * minor note: In sysFS mode, the task1 get locked first, it setup > + * etr mode to SYSFS. Then task2 get locked,it will directly return > + * success even when the tmc-etr is not enabled at this moment. > + * Ultimately, task1 will still successfully enable tmc-etr. > + * This is a transient state and does not cause an anomaly. > + */ > + coresight_set_mode(csdev, mode); > + } > + > switch (mode) { > case CS_MODE_SYSFS: > - return tmc_enable_etr_sink_sysfs(csdev); > + rc = tmc_enable_etr_sink_sysfs(csdev); > + break; > case CS_MODE_PERF: > - return tmc_enable_etr_sink_perf(csdev, data); > + rc = tmc_enable_etr_sink_perf(csdev, data); > + break; > default: > - return -EINVAL; > + rc = -EINVAL; > } > + > + if (rc && old_mode != mode) { > + scoped_guard(spinlock_irqsave, &drvdata->spinlock) { Might be a local style matching thing but if not the scope is tight anyway so could use the unscoped version. guard(spinlock_irqsave)(&drvdata->spinlock); coresight_set_mode(csdev, old_mode); > + coresight_set_mode(csdev, old_mode); > + } > + } > + > + return rc; > } > > static int tmc_disable_etr_sink(struct coresight_device *csdev)
On 2025/6/20 20:45, Jonathan Cameron wrote: > On Fri, 20 Jun 2025 15:54:11 +0800 > Junhao He <hejunhao3@huawei.com> wrote: > >> When trying to run perf and sysfs mode simultaneously, the WARN_ON() >> in tmc_etr_enable_hw() is triggered sometimes: >> >> WARNING: CPU: 42 PID: 3911571 at drivers/hwtracing/coresight/coresight-tmc-etr.c:1060 tmc_etr_enable_hw+0xc0/0xd8 [coresight_tmc] >> [..snip..] >> Call trace: >> tmc_etr_enable_hw+0xc0/0xd8 [coresight_tmc] (P) >> tmc_enable_etr_sink+0x11c/0x250 [coresight_tmc] (L) >> tmc_enable_etr_sink+0x11c/0x250 [coresight_tmc] >> coresight_enable_path+0x1c8/0x218 [coresight] >> coresight_enable_sysfs+0xa4/0x228 [coresight] >> enable_source_store+0x58/0xa8 [coresight] >> dev_attr_store+0x20/0x40 >> sysfs_kf_write+0x4c/0x68 >> kernfs_fop_write_iter+0x120/0x1b8 >> vfs_write+0x2c8/0x388 >> ksys_write+0x74/0x108 >> __arm64_sys_write+0x24/0x38 >> el0_svc_common.constprop.0+0x64/0x148 >> do_el0_svc+0x24/0x38 >> el0_svc+0x3c/0x130 >> el0t_64_sync_handler+0xc8/0xd0 >> el0t_64_sync+0x1ac/0x1b0 >> ---[ end trace 0000000000000000 ]--- >> >> Since the sysfs buffer allocation and the hardware enablement is not >> in the same critical region, it's possible to race with the perf >> >> mode: >> [sysfs mode] [perf mode] >> tmc_etr_get_sysfs_buffer() >> spin_lock(&drvdata->spinlock) >> [sysfs buffer allocation] >> spin_unlock(&drvdata->spinlock) >> spin_lock(&drvdata->spinlock) >> tmc_etr_enable_hw() >> drvdata->etr_buf = etr_perf->etr_buf >> spin_unlock(&drvdata->spinlock) >> spin_lock(&drvdata->spinlock) >> tmc_etr_enable_hw() >> WARN_ON(drvdata->etr_buf) // WARN sicne etr_buf initialized at >> the perf side >> spin_unlock(&drvdata->spinlock) >> >> A race condition is introduced here, perf always prioritizes execution, and >> warnings can lead to concerns about potential hidden bugs, such as getting >> out of sync. >> >> To fix this, configure the tmc-etr mode before invoking enable_etr_perf() >> or enable_etr_sysfs(), explicitly check if the tmc-etr sink is already >> enabled in a different mode, and simplily the setup and checks for "mode". >> To prevent race conditions between mode transitions. >> >> Fixes: 296b01fd106e ("coresight: Refactor out buffer allocation function for ETR") >> Reported-by: Yicong Yang <yangyicong@hisilicon.com> >> Closes: https://lore.kernel.org/linux-arm-kernel/20241202092419.11777-2-yangyicong@huawei.com/ >> Signed-off-by: Junhao He <hejunhao3@huawei.com> >> --- >> .../hwtracing/coresight/coresight-tmc-etr.c | 73 +++++++++++-------- >> 1 file changed, 43 insertions(+), 30 deletions(-) >> >> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c >> index b07fcdb3fe1a..252a57a8e94e 100644 >> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c >> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c >> @@ -1263,11 +1263,6 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev) >> raw_spin_lock_irqsave(&drvdata->spinlock, flags); >> } >> >> - if (drvdata->reading || coresight_get_mode(csdev) == CS_MODE_PERF) { >> - ret = -EBUSY; >> - goto out; >> - } >> - >> /* >> * If we don't have a buffer or it doesn't match the requested size, >> * use the buffer allocated above. Otherwise reuse the existing buffer. >> @@ -1278,7 +1273,6 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev) >> drvdata->sysfs_buf = new_buf; >> } >> >> -out: >> raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); >> >> /* Free memory outside the spinlock if need be */ >> @@ -1289,7 +1283,7 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev) >> >> static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) >> { >> - int ret = 0; >> + int ret; >> unsigned long flags; >> struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); >> struct etr_buf *sysfs_buf = tmc_etr_get_sysfs_buffer(csdev); >> @@ -1299,23 +1293,10 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) >> >> raw_spin_lock_irqsave(&drvdata->spinlock, flags); >> >> - /* >> - * In sysFS mode we can have multiple writers per sink. Since this >> - * sink is already enabled no memory is needed and the HW need not be >> - * touched, even if the buffer size has changed. >> - */ >> - if (coresight_get_mode(csdev) == CS_MODE_SYSFS) { >> - csdev->refcnt++; >> - goto out; >> - } >> - >> ret = tmc_etr_enable_hw(drvdata, sysfs_buf); >> - if (!ret) { >> - coresight_set_mode(csdev, CS_MODE_SYSFS); >> + if (!ret) >> csdev->refcnt++; >> - } >> >> -out: >> raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); >> >> if (!ret) >> @@ -1735,11 +1716,6 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) >> struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle); >> >> raw_spin_lock_irqsave(&drvdata->spinlock, flags); >> - /* Don't use this sink if it is already claimed by sysFS */ >> - if (coresight_get_mode(csdev) == CS_MODE_SYSFS) { >> - rc = -EBUSY; >> - goto unlock_out; >> - } >> >> if (WARN_ON(!etr_perf || !etr_perf->etr_buf)) { >> rc = -EINVAL; >> @@ -1768,7 +1744,6 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) >> if (!rc) { >> /* Associate with monitored process. */ >> drvdata->pid = pid; >> - coresight_set_mode(csdev, CS_MODE_PERF); >> drvdata->perf_buf = etr_perf->etr_buf; >> csdev->refcnt++; >> } >> @@ -1781,14 +1756,52 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) >> static int tmc_enable_etr_sink(struct coresight_device *csdev, >> enum cs_mode mode, void *data) >> { >> + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); >> + enum cs_mode old_mode; >> + int rc = -EINVAL; >> + >> + scoped_guard(spinlock_irqsave, &drvdata->spinlock) { >> + old_mode = coresight_get_mode(csdev); >> + if (old_mode != CS_MODE_DISABLED && old_mode != mode) >> + return -EBUSY; >> + >> + if (drvdata->reading) >> + return -EBUSY; >> + >> + /* In sysFS mode we can have multiple writers per sink. */ >> + if (old_mode == CS_MODE_SYSFS) { > This seems odd. You are incrementing the reference count when potentially changing > away from CS_MODE_SYSFS? > Is this meant to only occur if old_mode == mode && old_mode == CS_MODE_SYSFS? Yes, old_mode == CS_MODE_SYSFS means tmc_etr is occupied by sysfs, and old_mode == mode, so can add reference counting directly. When tmc_etr is idle its mode is CS_MODE_DISABLED. > In the code prior to this patch this bit only ran in tmc_enable_etr_sink_sysfs() > which was only called based on the mode being configured (mode here I think) being > sysfs. That no longer looks to be the case. before the patch, first check (mode == CS_MODE_SYSFS), then check (coresight_get_mode(csdev) == CS_MODE_SYSFS), and finally csdev->refcnt++. This is the same as the current functionality. > Maybe I'm missing something as the flows around this are complex. > > >> + csdev->refcnt++; >> + return 0; >> + } >> + >> + /* >> + * minor note: In sysFS mode, the task1 get locked first, it setup >> + * etr mode to SYSFS. Then task2 get locked,it will directly return >> + * success even when the tmc-etr is not enabled at this moment. >> + * Ultimately, task1 will still successfully enable tmc-etr. >> + * This is a transient state and does not cause an anomaly. >> + */ >> + coresight_set_mode(csdev, mode); >> + } >> + >> switch (mode) { >> case CS_MODE_SYSFS: >> - return tmc_enable_etr_sink_sysfs(csdev); >> + rc = tmc_enable_etr_sink_sysfs(csdev); >> + break; >> case CS_MODE_PERF: >> - return tmc_enable_etr_sink_perf(csdev, data); >> + rc = tmc_enable_etr_sink_perf(csdev, data); >> + break; >> default: >> - return -EINVAL; >> + rc = -EINVAL; >> } >> + >> + if (rc && old_mode != mode) { >> + scoped_guard(spinlock_irqsave, &drvdata->spinlock) { > Might be a local style matching thing but if not the scope is tight anyway > so could use the unscoped version. > > guard(spinlock_irqsave)(&drvdata->spinlock); > coresight_set_mode(csdev, old_mode); Sure, Will fix in next version. >> + coresight_set_mode(csdev, old_mode); >> + } >> + } >> + >> + return rc; >> } >> >> static int tmc_disable_etr_sink(struct coresight_device *csdev) > . >
© 2016 - 2025 Red Hat, Inc.