From: Junhao He <hejunhao3@huawei.com>
When trying to run perf and sysfs mode simultaneously, the WARN_ON()
in tmc_etr_enable_hw() is triggered sometimes:
WARNING: CPU: 42 PID: 3911571 at drivers/hwtracing/coresight/coresight-tmc-etr.c:1060 tmc_etr_enable_hw+0xc0/0xd8 [coresight_tmc]
[..snip..]
Call trace:
tmc_etr_enable_hw+0xc0/0xd8 [coresight_tmc] (P)
tmc_enable_etr_sink+0x11c/0x250 [coresight_tmc] (L)
tmc_enable_etr_sink+0x11c/0x250 [coresight_tmc]
coresight_enable_path+0x1c8/0x218 [coresight]
coresight_enable_sysfs+0xa4/0x228 [coresight]
enable_source_store+0x58/0xa8 [coresight]
dev_attr_store+0x20/0x40
sysfs_kf_write+0x4c/0x68
kernfs_fop_write_iter+0x120/0x1b8
vfs_write+0x2c8/0x388
ksys_write+0x74/0x108
__arm64_sys_write+0x24/0x38
el0_svc_common.constprop.0+0x64/0x148
do_el0_svc+0x24/0x38
el0_svc+0x3c/0x130
el0t_64_sync_handler+0xc8/0xd0
el0t_64_sync+0x1ac/0x1b0
---[ end trace 0000000000000000 ]---
Since the sysfs buffer allocation and the hardware enablement is not
in the same critical region, it's possible to race with the perf
mode:
[sysfs mode] [perf mode]
tmc_etr_get_sysfs_buffer()
spin_lock(&drvdata->spinlock)
[sysfs buffer allocation]
spin_unlock(&drvdata->spinlock)
spin_lock(&drvdata->spinlock)
tmc_etr_enable_hw()
drvdata->etr_buf = etr_perf->etr_buf
spin_unlock(&drvdata->spinlock)
spin_lock(&drvdata->spinlock)
tmc_etr_enable_hw()
WARN_ON(drvdata->etr_buf) // WARN sicne etr_buf initialized at
the perf side
spin_unlock(&drvdata->spinlock)
A race condition is introduced here, perf always prioritizes execution, and
warnings can lead to concerns about potential hidden bugs, such as getting
out of sync.
To fix this, configure the tmc-etr mode before invoking enable_etr_perf()
or enable_etr_sysfs(), explicitly check if the tmc-etr sink is already
enabled in a different mode, and simplily the setup and checks for "mode".
To prevent race conditions between mode transitions.
Fixes: 296b01fd106e ("coresight: Refactor out buffer allocation function for ETR")
Reported-by: Yicong Yang <yangyicong@hisilicon.com>
Closes: https://lore.kernel.org/linux-arm-kernel/20241202092419.11777-2-yangyicong@huawei.com/
Signed-off-by: Junhao He <hejunhao3@huawei.com>
Tested-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Junhao He <hejunhao3@h-partners.com>
---
.../hwtracing/coresight/coresight-tmc-etr.c | 106 +++++++++---------
1 file changed, 55 insertions(+), 51 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index b07fcdb3fe1a..fe1d5e8a0d2b 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1263,11 +1263,6 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev)
raw_spin_lock_irqsave(&drvdata->spinlock, flags);
}
- if (drvdata->reading || coresight_get_mode(csdev) == CS_MODE_PERF) {
- ret = -EBUSY;
- goto out;
- }
-
/*
* If we don't have a buffer or it doesn't match the requested size,
* use the buffer allocated above. Otherwise reuse the existing buffer.
@@ -1278,7 +1273,6 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev)
drvdata->sysfs_buf = new_buf;
}
-out:
raw_spin_unlock_irqrestore(&drvdata->spinlock, flags);
/* Free memory outside the spinlock if need be */
@@ -1289,7 +1283,7 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev)
static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
{
- int ret = 0;
+ int ret;
unsigned long flags;
struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
struct etr_buf *sysfs_buf = tmc_etr_get_sysfs_buffer(csdev);
@@ -1299,23 +1293,10 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
raw_spin_lock_irqsave(&drvdata->spinlock, flags);
- /*
- * In sysFS mode we can have multiple writers per sink. Since this
- * sink is already enabled no memory is needed and the HW need not be
- * touched, even if the buffer size has changed.
- */
- if (coresight_get_mode(csdev) == CS_MODE_SYSFS) {
- csdev->refcnt++;
- goto out;
- }
-
ret = tmc_etr_enable_hw(drvdata, sysfs_buf);
- if (!ret) {
- coresight_set_mode(csdev, CS_MODE_SYSFS);
+ if (!ret)
csdev->refcnt++;
- }
-out:
raw_spin_unlock_irqrestore(&drvdata->spinlock, flags);
if (!ret)
@@ -1729,39 +1710,24 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
{
int rc = 0;
pid_t pid;
- unsigned long flags;
struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
struct perf_output_handle *handle = data;
struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle);
- raw_spin_lock_irqsave(&drvdata->spinlock, flags);
- /* Don't use this sink if it is already claimed by sysFS */
- if (coresight_get_mode(csdev) == CS_MODE_SYSFS) {
- rc = -EBUSY;
- goto unlock_out;
- }
-
- if (WARN_ON(!etr_perf || !etr_perf->etr_buf)) {
- rc = -EINVAL;
- goto unlock_out;
- }
+ if (WARN_ON(!etr_perf || !etr_perf->etr_buf))
+ return -EINVAL;
/* Get a handle on the pid of the session owner */
pid = etr_perf->pid;
/* Do not proceed if this device is associated with another session */
- if (drvdata->pid != -1 && drvdata->pid != pid) {
- rc = -EBUSY;
- goto unlock_out;
- }
+ if (drvdata->pid != -1 && drvdata->pid != pid)
+ return -EBUSY;
- /*
- * No HW configuration is needed if the sink is already in
- * use for this session.
- */
+ /* The sink is already in use for this session */
if (drvdata->pid == pid) {
csdev->refcnt++;
- goto unlock_out;
+ return rc;
}
rc = tmc_etr_enable_hw(drvdata, etr_perf->etr_buf);
@@ -1773,22 +1739,60 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
csdev->refcnt++;
}
-unlock_out:
- raw_spin_unlock_irqrestore(&drvdata->spinlock, flags);
return rc;
}
static int tmc_enable_etr_sink(struct coresight_device *csdev,
enum cs_mode mode, void *data)
{
- switch (mode) {
- case CS_MODE_SYSFS:
- return tmc_enable_etr_sink_sysfs(csdev);
- case CS_MODE_PERF:
- return tmc_enable_etr_sink_perf(csdev, data);
- default:
- return -EINVAL;
+ struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+ int rc;
+
+retry:
+ scoped_guard(raw_spinlock_irqsave, &drvdata->spinlock) {
+ if (coresight_get_mode(csdev) != CS_MODE_DISABLED &&
+ coresight_get_mode(csdev) != mode)
+ return -EBUSY;
+
+ switch (mode) {
+ case CS_MODE_SYSFS:
+ if (drvdata->reading)
+ return -EBUSY;
+
+ if (csdev->refcnt) {
+ /* The sink is already enabled via sysfs */
+ csdev->refcnt++;
+ return 0;
+ }
+
+ /*
+ * A sysfs session is currently enabling ETR, preventing
+ * a second sysfs process from repeatedly triggering the
+ * enable procedure.
+ */
+ if (coresight_get_mode(csdev) == CS_MODE_SYSFS && !csdev->refcnt)
+ goto retry;
+
+ /*
+ * Set ETR to sysfs mode before it is fully enabled, to
+ * prevent race conditions during mode transitions.
+ */
+ coresight_set_mode(csdev, mode);
+ break;
+ case CS_MODE_PERF:
+ return tmc_enable_etr_sink_perf(csdev, data);
+ default:
+ return -EINVAL;
+ }
+ }
+
+ rc = tmc_enable_etr_sink_sysfs(csdev);
+ if (rc) {
+ guard(raw_spinlock_irqsave)(&drvdata->spinlock);
+ coresight_set_mode(csdev, CS_MODE_DISABLED);
}
+
+ return rc;
}
static int tmc_disable_etr_sink(struct coresight_device *csdev)
--
2.33.0
Hi Junhao,
While your patch fixes the problem it introduces imbalance in the
way perf vs sysfs modes are handled.
On 11/11/2025 12:21, Junhao He wrote:
> From: Junhao He <hejunhao3@huawei.com>
>
> When trying to run perf and sysfs mode simultaneously, the WARN_ON()
> in tmc_etr_enable_hw() is triggered sometimes:
>
> WARNING: CPU: 42 PID: 3911571 at drivers/hwtracing/coresight/coresight-tmc-etr.c:1060 tmc_etr_enable_hw+0xc0/0xd8 [coresight_tmc]
> [..snip..]
> Call trace:
> tmc_etr_enable_hw+0xc0/0xd8 [coresight_tmc] (P)
> tmc_enable_etr_sink+0x11c/0x250 [coresight_tmc] (L)
> tmc_enable_etr_sink+0x11c/0x250 [coresight_tmc]
> coresight_enable_path+0x1c8/0x218 [coresight]
> coresight_enable_sysfs+0xa4/0x228 [coresight]
> enable_source_store+0x58/0xa8 [coresight]
> dev_attr_store+0x20/0x40
> sysfs_kf_write+0x4c/0x68
> kernfs_fop_write_iter+0x120/0x1b8
> vfs_write+0x2c8/0x388
> ksys_write+0x74/0x108
> __arm64_sys_write+0x24/0x38
> el0_svc_common.constprop.0+0x64/0x148
> do_el0_svc+0x24/0x38
> el0_svc+0x3c/0x130
> el0t_64_sync_handler+0xc8/0xd0
> el0t_64_sync+0x1ac/0x1b0
> ---[ end trace 0000000000000000 ]---
>
> Since the sysfs buffer allocation and the hardware enablement is not
> in the same critical region, it's possible to race with the perf
>
> mode:
> [sysfs mode] [perf mode]
> tmc_etr_get_sysfs_buffer()
> spin_lock(&drvdata->spinlock)
> [sysfs buffer allocation]
> spin_unlock(&drvdata->spinlock)
> spin_lock(&drvdata->spinlock)
> tmc_etr_enable_hw()
> drvdata->etr_buf = etr_perf->etr_buf
> spin_unlock(&drvdata->spinlock)
> spin_lock(&drvdata->spinlock)
^^ Could we not double check the mode here and break out ?
Something like this :
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c
b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index e0d83ee01b77..2097c57d19d0 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1314,6 +1314,9 @@ static int tmc_enable_etr_sink_sysfs(struct
coresight_device *csdev)
if (coresight_get_mode(csdev) == CS_MODE_SYSFS) {
csdev->refcnt++;
goto out;
+ } else if (coresight_get_mode(csdev) != CS_MODE_DISABLED) {
+ ret = -EBUSY;
+ goto out;
}
ret = tmc_etr_enable_hw(drvdata, sysfs_buf);
Suzuki
> tmc_etr_enable_hw()
> WARN_ON(drvdata->etr_buf) // WARN sicne etr_buf initialized at
> the perf side
> spin_unlock(&drvdata->spinlock)
>
> A race condition is introduced here, perf always prioritizes execution, and
> warnings can lead to concerns about potential hidden bugs, such as getting
> out of sync.
>
> To fix this, configure the tmc-etr mode before invoking enable_etr_perf()
> or enable_etr_sysfs(), explicitly check if the tmc-etr sink is already
> enabled in a different mode, and simplily the setup and checks for "mode".
> To prevent race conditions between mode transitions.
>
> Fixes: 296b01fd106e ("coresight: Refactor out buffer allocation function for ETR")
> Reported-by: Yicong Yang <yangyicong@hisilicon.com>
> Closes: https://lore.kernel.org/linux-arm-kernel/20241202092419.11777-2-yangyicong@huawei.com/
> Signed-off-by: Junhao He <hejunhao3@huawei.com>
> Tested-by: Yicong Yang <yangyicong@hisilicon.com>
> Signed-off-by: Junhao He <hejunhao3@h-partners.com>
> ---
> .../hwtracing/coresight/coresight-tmc-etr.c | 106 +++++++++---------
> 1 file changed, 55 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> index b07fcdb3fe1a..fe1d5e8a0d2b 100644
> --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
> +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
> @@ -1263,11 +1263,6 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev)
> raw_spin_lock_irqsave(&drvdata->spinlock, flags);
> }
>
> - if (drvdata->reading || coresight_get_mode(csdev) == CS_MODE_PERF) {
> - ret = -EBUSY;
> - goto out;
> - }
> -
> /*
> * If we don't have a buffer or it doesn't match the requested size,
> * use the buffer allocated above. Otherwise reuse the existing buffer.
> @@ -1278,7 +1273,6 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev)
> drvdata->sysfs_buf = new_buf;
> }
>
> -out:
> raw_spin_unlock_irqrestore(&drvdata->spinlock, flags);
>
> /* Free memory outside the spinlock if need be */
> @@ -1289,7 +1283,7 @@ static struct etr_buf *tmc_etr_get_sysfs_buffer(struct coresight_device *csdev)
>
> static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
> {
> - int ret = 0;
> + int ret;
> unsigned long flags;
> struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
> struct etr_buf *sysfs_buf = tmc_etr_get_sysfs_buffer(csdev);
> @@ -1299,23 +1293,10 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
>
> raw_spin_lock_irqsave(&drvdata->spinlock, flags);
>
> - /*
> - * In sysFS mode we can have multiple writers per sink. Since this
> - * sink is already enabled no memory is needed and the HW need not be
> - * touched, even if the buffer size has changed.
> - */
> - if (coresight_get_mode(csdev) == CS_MODE_SYSFS) {
> - csdev->refcnt++;
> - goto out;
> - }
> -
> ret = tmc_etr_enable_hw(drvdata, sysfs_buf);
> - if (!ret) {
> - coresight_set_mode(csdev, CS_MODE_SYSFS);
> + if (!ret)
> csdev->refcnt++;
> - }
>
> -out:
> raw_spin_unlock_irqrestore(&drvdata->spinlock, flags);
>
> if (!ret)
> @@ -1729,39 +1710,24 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
> {
> int rc = 0;
> pid_t pid;
> - unsigned long flags;
> struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
> struct perf_output_handle *handle = data;
> struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle);
>
> - raw_spin_lock_irqsave(&drvdata->spinlock, flags);
> - /* Don't use this sink if it is already claimed by sysFS */
> - if (coresight_get_mode(csdev) == CS_MODE_SYSFS) {
> - rc = -EBUSY;
> - goto unlock_out;
> - }
> -
> - if (WARN_ON(!etr_perf || !etr_perf->etr_buf)) {
> - rc = -EINVAL;
> - goto unlock_out;
> - }
> + if (WARN_ON(!etr_perf || !etr_perf->etr_buf))
> + return -EINVAL;
>
> /* Get a handle on the pid of the session owner */
> pid = etr_perf->pid;
>
> /* Do not proceed if this device is associated with another session */
> - if (drvdata->pid != -1 && drvdata->pid != pid) {
> - rc = -EBUSY;
> - goto unlock_out;
> - }
> + if (drvdata->pid != -1 && drvdata->pid != pid)
> + return -EBUSY;
>
> - /*
> - * No HW configuration is needed if the sink is already in
> - * use for this session.
> - */
> + /* The sink is already in use for this session */
> if (drvdata->pid == pid) {
> csdev->refcnt++;
> - goto unlock_out;
> + return rc;
> }
>
> rc = tmc_etr_enable_hw(drvdata, etr_perf->etr_buf);
> @@ -1773,22 +1739,60 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
> csdev->refcnt++;
> }
>
> -unlock_out:
> - raw_spin_unlock_irqrestore(&drvdata->spinlock, flags);
> return rc;
> }
>
> static int tmc_enable_etr_sink(struct coresight_device *csdev,
> enum cs_mode mode, void *data)
> {
> - switch (mode) {
> - case CS_MODE_SYSFS:
> - return tmc_enable_etr_sink_sysfs(csdev);
> - case CS_MODE_PERF:
> - return tmc_enable_etr_sink_perf(csdev, data);
> - default:
> - return -EINVAL;
> + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
> + int rc;
> +
> +retry:
> + scoped_guard(raw_spinlock_irqsave, &drvdata->spinlock) {
> + if (coresight_get_mode(csdev) != CS_MODE_DISABLED &&
> + coresight_get_mode(csdev) != mode)
> + return -EBUSY;
> +
> + switch (mode) {
> + case CS_MODE_SYSFS:
> + if (drvdata->reading)
> + return -EBUSY;
> +
> + if (csdev->refcnt) {
> + /* The sink is already enabled via sysfs */
> + csdev->refcnt++;
> + return 0;
> + }
> +
> + /*
> + * A sysfs session is currently enabling ETR, preventing
> + * a second sysfs process from repeatedly triggering the
> + * enable procedure.
> + */
> + if (coresight_get_mode(csdev) == CS_MODE_SYSFS && !csdev->refcnt)
> + goto retry;
> +
> + /*
> + * Set ETR to sysfs mode before it is fully enabled, to
> + * prevent race conditions during mode transitions.
> + */
> + coresight_set_mode(csdev, mode);
> + break;
> + case CS_MODE_PERF:
> + return tmc_enable_etr_sink_perf(csdev, data);
> + default:
> + return -EINVAL;
> + }
> + }
> +
> + rc = tmc_enable_etr_sink_sysfs(csdev);
> + if (rc) {
> + guard(raw_spinlock_irqsave)(&drvdata->spinlock);
> + coresight_set_mode(csdev, CS_MODE_DISABLED);
> }
> +
> + return rc;
> }
>
> static int tmc_disable_etr_sink(struct coresight_device *csdev)
On Thu, Nov 13, 2025 at 03:02:45PM +0000, Suzuki Kuruppassery Poulose wrote: > Hi Junhao, > > While your patch fixes the problem it introduces imbalance in the > way perf vs sysfs modes are handled. Previously, I suggested setting the mode in a unified way in tmc_enable_etr_sink() [1]. After looking into the code again, it seems that both my suggestion and this patch set the perf mode is too late. Maybe we need to set the sink's device mode once a session starts to use it. So we should set the perf mode when allocating sink's buffer (tmc_alloc_etr_buffer()), as this is the first callback invoked for a perf session. As a result, we can keep to set the sysfs mode in tmc_enable_etr_sink_sysfs(). A side topic is we need to refactor the tmc_etr_get_sysfs_buffer() function, ideally this function is purely for allocating buffer without any locking. We can defer to assign "drvdata->sysfs_buf" until acquired spin lock in tmc_enable_etr_sink_sysfs(). Essetionally, we can enable sink in one go rather than acquire-release lock for several times. Thanks, Leo [1] https://lore.kernel.org/linux-arm-kernel/20251020143718.GH281971@e132581.arm.com/
On 19/11/2025 18:04, Leo Yan wrote: > On Thu, Nov 13, 2025 at 03:02:45PM +0000, Suzuki Kuruppassery Poulose wrote: >> Hi Junhao, >> >> While your patch fixes the problem it introduces imbalance in the >> way perf vs sysfs modes are handled. > > Previously, I suggested setting the mode in a unified way in > tmc_enable_etr_sink() [1]. After looking into the code again, it seems > that both my suggestion and this patch set the perf mode is too late. > > Maybe we need to set the sink's device mode once a session starts to use > it. So we should set the perf mode when allocating sink's buffer > (tmc_alloc_etr_buffer()), as this is the first callback invoked for a > perf session. Thats too early, in setup_aux(), when the event is not scheduled in. We don't know when that would be. > > As a result, we can keep to set the sysfs mode in > tmc_enable_etr_sink_sysfs(). > > A side topic is we need to refactor the tmc_etr_get_sysfs_buffer() > function, ideally this function is purely for allocating buffer without > any locking. We can defer to assign "drvdata->sysfs_buf" until > acquired spin lock in tmc_enable_etr_sink_sysfs(). Essetionally, we > can enable sink in one go rather than acquire-release lock for several > times. Patche welcome :-) Cheers Suzuki > > Thanks, > Leo > > [1] https://lore.kernel.org/linux-arm-kernel/20251020143718.GH281971@e132581.arm.com/
© 2016 - 2026 Red Hat, Inc.