Exposing a simple counter to userspace for monitoring tools.
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
v1 -> v2: added documentation
---
.../ABI/testing/sysfs-kernel-rcu_stall_count | 6 +++++
kernel/rcu/tree_stall.h | 26 +++++++++++++++++++
2 files changed, 32 insertions(+)
create mode 100644 Documentation/ABI/testing/sysfs-kernel-rcu_stall_count
diff --git a/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count b/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count
new file mode 100644
index 000000000000..a4a97a7f4a4d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count
@@ -0,0 +1,6 @@
+What: /sys/kernel/rcu_stall_count
+Date: May 2025
+KernelVersion: 6.16
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ Shows how many times the system has detected an RCU stall since last boot.
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 925fcdad5dea..158330524795 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -20,6 +20,28 @@
int sysctl_panic_on_rcu_stall __read_mostly;
int sysctl_max_rcu_stall_to_panic __read_mostly;
+#ifdef CONFIG_SYSFS
+
+static unsigned int rcu_stall_count;
+
+static ssize_t rcu_stall_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *page)
+{
+ return sysfs_emit(page, "%u\n", rcu_stall_count);
+}
+
+static struct kobj_attribute rcu_stall_count_attr = __ATTR_RO(rcu_stall_count);
+
+static __init int kernel_rcu_stall_sysfs_init(void)
+{
+ sysfs_add_file_to_group(kernel_kobj, &rcu_stall_count_attr.attr, NULL);
+ return 0;
+}
+
+late_initcall(kernel_rcu_stall_sysfs_init);
+
+#endif // CONFIG_SYSFS
+
#ifdef CONFIG_PROVE_RCU
#define RCU_STALL_DELAY_DELTA (5 * HZ)
#else
@@ -784,6 +806,10 @@ static void check_cpu_stall(struct rcu_data *rdp)
if (kvm_check_and_clear_guest_paused())
return;
+#ifdef CONFIG_SYSFS
+ ++rcu_stall_count;
+#endif
+
rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps);
if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
--
2.47.2
Hello Andrew,
On 04/05/25 23:38, Max Kellermann wrote:
> Exposing a simple counter to userspace for monitoring tools.
>
> Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> ---
> v1 -> v2: added documentation
> ---
> .../ABI/testing/sysfs-kernel-rcu_stall_count | 6 +++++
> kernel/rcu/tree_stall.h | 26 +++++++++++++++++++
> 2 files changed, 32 insertions(+)
> create mode 100644 Documentation/ABI/testing/sysfs-kernel-rcu_stall_count
>
> diff --git a/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count b/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count
> new file mode 100644
> index 000000000000..a4a97a7f4a4d
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-kernel-rcu_stall_count
> @@ -0,0 +1,6 @@
> +What: /sys/kernel/rcu_stall_count
> +Date: May 2025
> +KernelVersion: 6.16
> +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
> +Description:
> + Shows how many times the system has detected an RCU stall since last boot.
> diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
> index 925fcdad5dea..158330524795 100644
> --- a/kernel/rcu/tree_stall.h
> +++ b/kernel/rcu/tree_stall.h
> @@ -20,6 +20,28 @@
> int sysctl_panic_on_rcu_stall __read_mostly;
> int sysctl_max_rcu_stall_to_panic __read_mostly;
>
> +#ifdef CONFIG_SYSFS
> +
> +static unsigned int rcu_stall_count;
> +
> +static ssize_t rcu_stall_count_show(struct kobject *kobj, struct kobj_attribute *attr,
> + char *page)
> +{
> + return sysfs_emit(page, "%u\n", rcu_stall_count);
> +}
> +
> +static struct kobj_attribute rcu_stall_count_attr = __ATTR_RO(rcu_stall_count);
> +
> +static __init int kernel_rcu_stall_sysfs_init(void)
> +{
> + sysfs_add_file_to_group(kernel_kobj, &rcu_stall_count_attr.attr, NULL);
> + return 0;
> +}
> +
> +late_initcall(kernel_rcu_stall_sysfs_init);
> +
> +#endif // CONFIG_SYSFS
> +
> #ifdef CONFIG_PROVE_RCU
> #define RCU_STALL_DELAY_DELTA (5 * HZ)
> #else
> @@ -784,6 +806,10 @@ static void check_cpu_stall(struct rcu_data *rdp)
> if (kvm_check_and_clear_guest_paused())
> return;
>
> +#ifdef CONFIG_SYSFS
> + ++rcu_stall_count;
> +#endif
> +
> rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps);
> if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
> pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
It seems like this patch was not applied properly to the upstream tree.
Out of the three hunks in this patch, only the first one is applied; the
second
and third hunks are missing.
commit 2536c5c7d6ae5e1d844aa21f28b326b5e7f815ef
Author: Max Kellermann <max.kellermann@ionos.com>
Date: Sun May 4 20:08:31 2025 +0200
kernel/rcu/tree_stall: add /sys/kernel/rcu_stall_count
Expose a simple counter to userspace for monitoring tools.
Thanks,
Sourabh Jain
On Tue, 3 Jun 2025 22:09:30 +0530 Sourabh Jain <sourabhjain@linux.ibm.com> wrote:
> Hello Andrew,
>
> > +#endif
> > +
> > rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps);
> > if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
> > pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
>
> It seems like this patch was not applied properly to the upstream tree.
>
> Out of the three hunks in this patch, only the first one is applied; the
> second
> and third hunks are missing.
>
> commit 2536c5c7d6ae5e1d844aa21f28b326b5e7f815ef
> Author: Max Kellermann <max.kellermann@ionos.com>
> Date: Sun May 4 20:08:31 2025 +0200
>
> kernel/rcu/tree_stall: add /sys/kernel/rcu_stall_count
>
> Expose a simple counter to userspace for monitoring tools.
OK. iirc there was quite a lot of churn and conflicts here :)
Please send a fixup against latest -linus?
On 04/06/25 05:46, Andrew Morton wrote:
> On Tue, 3 Jun 2025 22:09:30 +0530 Sourabh Jain <sourabhjain@linux.ibm.com> wrote:
>
>> Hello Andrew,
>>
>>> +#endif
>>> +
>>> rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps);
>>> if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
>>> pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
>> It seems like this patch was not applied properly to the upstream tree.
>>
>> Out of the three hunks in this patch, only the first one is applied; the
>> second
>> and third hunks are missing.
>>
>> commit 2536c5c7d6ae5e1d844aa21f28b326b5e7f815ef
>> Author: Max Kellermann <max.kellermann@ionos.com>
>> Date: Sun May 4 20:08:31 2025 +0200
>>
>> kernel/rcu/tree_stall: add /sys/kernel/rcu_stall_count
>>
>> Expose a simple counter to userspace for monitoring tools.
> OK. iirc there was quite a lot of churn and conflicts here :)
>
> Please send a fixup against latest -linus?
Sure, I will wait for a day or two to see if Max is interested in
sending the fix-up patch. Otherwise, I will send it.
Thanks,
Sourabh Jain
© 2016 - 2026 Red Hat, Inc.