[PATCH v4 1/2] watchdog: move arm64 watchdog_hld into common code

Yunhui Cui posted 2 patches 2 months ago
There is a newer version of this series
[PATCH v4 1/2] watchdog: move arm64 watchdog_hld into common code
Posted by Yunhui Cui 2 months ago
Move the contents of arch/arm64/watchdog_hld.c to kernel/watchdog_perf.c
to create a generic implementation that can be reused by other arch,
such as RISC-V.

Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
---
 arch/arm64/Kconfig               |  1 +
 arch/arm64/kernel/Makefile       |  1 -
 arch/arm64/kernel/watchdog_hld.c | 94 --------------------------------
 drivers/perf/arm_pmu.c           | 10 +++-
 include/linux/perf/arm_pmu.h     |  2 -
 kernel/watchdog_perf.c           | 83 ++++++++++++++++++++++++++++
 lib/Kconfig.debug                |  8 +++
 7 files changed, 101 insertions(+), 98 deletions(-)
 delete mode 100644 arch/arm64/kernel/watchdog_hld.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b5438ff4772ce..33072adc88332 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -231,6 +231,7 @@ config ARM64
 	select HAVE_GCC_PLUGINS
 	select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \
 		HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+	select WATCHDOG_PERF_ADJUST_PERIOD if HARDLOCKUP_DETECTOR_PERF && CPU_FREQ
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 76f32e424065e..12d77f373fea4 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -44,7 +44,6 @@ obj-$(CONFIG_KUSER_HELPERS)		+= kuser32.o
 obj-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o entry-ftrace.o
 obj-$(CONFIG_MODULES)			+= module.o module-plts.o
 obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o perf_callchain.o
-obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF)	+= watchdog_hld.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_CPU_PM)			+= sleep.o suspend.o
 obj-$(CONFIG_KGDB)			+= kgdb.o
diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c
deleted file mode 100644
index 3093037dcb7be..0000000000000
--- a/arch/arm64/kernel/watchdog_hld.c
+++ /dev/null
@@ -1,94 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/nmi.h>
-#include <linux/cpufreq.h>
-#include <linux/perf/arm_pmu.h>
-
-/*
- * Safe maximum CPU frequency in case a particular platform doesn't implement
- * cpufreq driver. Although, architecture doesn't put any restrictions on
- * maximum frequency but 5 GHz seems to be safe maximum given the available
- * Arm CPUs in the market which are clocked much less than 5 GHz. On the other
- * hand, we can't make it much higher as it would lead to a large hard-lockup
- * detection timeout on parts which are running slower (eg. 1GHz on
- * Developerbox) and doesn't possess a cpufreq driver.
- */
-#define SAFE_MAX_CPU_FREQ	5000000000UL // 5 GHz
-u64 hw_nmi_get_sample_period(int watchdog_thresh)
-{
-	unsigned int cpu = smp_processor_id();
-	unsigned long max_cpu_freq;
-
-	max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
-	if (!max_cpu_freq)
-		max_cpu_freq = SAFE_MAX_CPU_FREQ;
-
-	return (u64)max_cpu_freq * watchdog_thresh;
-}
-
-bool __init arch_perf_nmi_is_available(void)
-{
-	/*
-	 * hardlockup_detector_perf_init() will success even if Pseudo-NMI turns off,
-	 * however, the pmu interrupts will act like a normal interrupt instead of
-	 * NMI and the hardlockup detector would be broken.
-	 */
-	return arm_pmu_irq_is_nmi();
-}
-
-static int watchdog_perf_update_period(void *data)
-{
-	int cpu = smp_processor_id();
-	u64 max_cpu_freq, new_period;
-
-	max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
-	if (!max_cpu_freq)
-		return 0;
-
-	new_period = watchdog_thresh * max_cpu_freq;
-	hardlockup_detector_perf_adjust_period(new_period);
-
-	return 0;
-}
-
-static int watchdog_freq_notifier_callback(struct notifier_block *nb,
-					   unsigned long val, void *data)
-{
-	struct cpufreq_policy *policy = data;
-	int cpu;
-
-	if (val != CPUFREQ_CREATE_POLICY)
-		return NOTIFY_DONE;
-
-	/*
-	 * Let each online CPU related to the policy update the period by their
-	 * own. This will serialize with the framework on start/stop the lockup
-	 * detector (softlockup_{start,stop}_all) and avoid potential race
-	 * condition. Otherwise we may have below theoretical race condition:
-	 * (core 0/1 share the same policy)
-	 * [core 0]                      [core 1]
-	 *                               hardlockup_detector_event_create()
-	 *                                 hw_nmi_get_sample_period()
-	 * (cpufreq registered, notifier callback invoked)
-	 * watchdog_freq_notifier_callback()
-	 *   watchdog_perf_update_period()
-	 *   (since core 1's event's not yet created,
-	 *    the period is not set)
-	 *                                 perf_event_create_kernel_counter()
-	 *                                 (event's period is SAFE_MAX_CPU_FREQ)
-	 */
-	for_each_cpu(cpu, policy->cpus)
-		smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false);
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block watchdog_freq_notifier = {
-	.notifier_call = watchdog_freq_notifier_callback,
-};
-
-static int __init init_watchdog_freq_notifier(void)
-{
-	return cpufreq_register_notifier(&watchdog_freq_notifier,
-					 CPUFREQ_POLICY_NOTIFIER);
-}
-core_initcall(init_watchdog_freq_notifier);
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 5c310e803dd78..ca4e1d07b61cb 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -17,6 +17,7 @@
 #include <linux/cpu_pm.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
+#include <linux/nmi.h>
 #include <linux/perf/arm_pmu.h>
 #include <linux/slab.h>
 #include <linux/sched/clock.h>
@@ -696,10 +697,17 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
 	return per_cpu(hw_events->irq, cpu);
 }
 
-bool arm_pmu_irq_is_nmi(void)
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+bool arch_perf_nmi_is_available(void)
 {
+	/*
+	 * watchdog_hardlockup_probe() will success even if Pseudo-NMI turns off,
+	 * however, the pmu interrupts will act like a normal interrupt instead of
+	 * NMI and the hardlockup detector would be broken.
+	 */
 	return has_nmi;
 }
+#endif
 
 /*
  * PMU hardware loses all context when a CPU goes offline.
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 93c9a26492fcf..6b53fb453fd63 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -184,8 +184,6 @@ void kvm_host_pmu_init(struct arm_pmu *pmu);
 #define kvm_host_pmu_init(x)	do { } while(0)
 #endif
 
-bool arm_pmu_irq_is_nmi(void);
-
 /* Internal functions only for core arm_pmu code */
 struct arm_pmu *armpmu_alloc(void);
 void armpmu_free(struct arm_pmu *pmu);
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index d3ca70e3c256a..a61e35fc9673e 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -15,6 +15,7 @@
 #include <linux/panic.h>
 #include <linux/nmi.h>
 #include <linux/atomic.h>
+#include <linux/cpufreq.h>
 #include <linux/module.h>
 #include <linux/sched/debug.h>
 
@@ -306,3 +307,85 @@ void __init hardlockup_config_perf_event(const char *str)
 	wd_hw_attr.type = PERF_TYPE_RAW;
 	wd_hw_attr.config = config;
 }
+
+#ifdef CONFIG_WATCHDOG_PERF_ADJUST_PERIOD
+/*
+ * Safe maximum CPU frequency in case a particular platform doesn't implement
+ * cpufreq driver. Although, architecture doesn't put any restrictions on
+ * maximum frequency but 5 GHz seems to be safe maximum given the available
+ * CPUs in the market which are clocked much less than 5 GHz. On the other
+ * hand, we can't make it much higher as it would lead to a large hard-lockup
+ * detection timeout on parts which are running slower (eg. 1GHz on
+ * Developerbox) and doesn't possess a cpufreq driver.
+ */
+#define SAFE_MAX_CPU_FREQ	5000000000UL // 5 GHz
+__weak u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+	unsigned int cpu = smp_processor_id();
+	unsigned long max_cpu_freq;
+
+	max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
+	if (!max_cpu_freq)
+		max_cpu_freq = SAFE_MAX_CPU_FREQ;
+
+	return (u64)max_cpu_freq * watchdog_thresh;
+}
+
+static int watchdog_perf_update_period(void *data)
+{
+	int cpu = smp_processor_id();
+	u64 max_cpu_freq, new_period;
+
+	max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
+	if (!max_cpu_freq)
+		return 0;
+
+	new_period = watchdog_thresh * max_cpu_freq;
+	hardlockup_detector_perf_adjust_period(new_period);
+
+	return 0;
+}
+
+static int watchdog_freq_notifier_callback(struct notifier_block *nb,
+					   unsigned long val, void *data)
+{
+	struct cpufreq_policy *policy = data;
+	int cpu;
+
+	if (val != CPUFREQ_CREATE_POLICY)
+		return NOTIFY_DONE;
+
+	/*
+	 * Let each online CPU related to the policy update the period by their
+	 * own. This will serialize with the framework on start/stop the lockup
+	 * detector (softlockup_{start,stop}_all) and avoid potential race
+	 * condition. Otherwise we may have below theoretical race condition:
+	 * (core 0/1 share the same policy)
+	 * [core 0]                      [core 1]
+	 *                               hardlockup_detector_event_create()
+	 *                                 hw_nmi_get_sample_period()
+	 * (cpufreq registered, notifier callback invoked)
+	 * watchdog_freq_notifier_callback()
+	 *   watchdog_perf_update_period()
+	 *   (since core 1's event's not yet created,
+	 *    the period is not set)
+	 *                                 perf_event_create_kernel_counter()
+	 *                                 (event's period is SAFE_MAX_CPU_FREQ)
+	 */
+	for_each_cpu(cpu, policy->cpus)
+		smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block watchdog_freq_notifier = {
+	.notifier_call = watchdog_freq_notifier_callback,
+};
+
+static int __init init_watchdog_freq_notifier(void)
+{
+	return cpufreq_register_notifier(&watchdog_freq_notifier,
+					 CPUFREQ_POLICY_NOTIFIER);
+}
+core_initcall(init_watchdog_freq_notifier);
+#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 099abf128ce67..ceba3e28cb0d0 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1200,6 +1200,14 @@ config HARDLOCKUP_DETECTOR_PERF
 	depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH
 	select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
 
+config WATCHDOG_PERF_ADJUST_PERIOD
+	bool
+	help
+	  Adjust the watchdog hardlockup detector's period based on CPU max
+	  frequency. Uses cpufreq if available; falls back to a safe 5 GHz
+	  default otherwise. Registers a cpufreq notifier to update the period
+	  automatically.
+
 config HARDLOCKUP_DETECTOR_BUDDY
 	bool
 	depends on HARDLOCKUP_DETECTOR
-- 
2.39.5
Re: [PATCH v4 1/2] watchdog: move arm64 watchdog_hld into common code
Posted by Will Deacon 1 month, 2 weeks ago
On Tue, Oct 14, 2025 at 11:14:24AM +0800, Yunhui Cui wrote:
> @@ -306,3 +307,85 @@ void __init hardlockup_config_perf_event(const char *str)
>  	wd_hw_attr.type = PERF_TYPE_RAW;
>  	wd_hw_attr.config = config;
>  }
> +
> +#ifdef CONFIG_WATCHDOG_PERF_ADJUST_PERIOD
> +/*
> + * Safe maximum CPU frequency in case a particular platform doesn't implement
> + * cpufreq driver. Although, architecture doesn't put any restrictions on
> + * maximum frequency but 5 GHz seems to be safe maximum given the available
> + * CPUs in the market which are clocked much less than 5 GHz. On the other
> + * hand, we can't make it much higher as it would lead to a large hard-lockup
> + * detection timeout on parts which are running slower (eg. 1GHz on
> + * Developerbox) and doesn't possess a cpufreq driver.
> + */
> +#define SAFE_MAX_CPU_FREQ	5000000000UL // 5 GHz
> +__weak u64 hw_nmi_get_sample_period(int watchdog_thresh)
> +{
> +	unsigned int cpu = smp_processor_id();
> +	unsigned long max_cpu_freq;
> +
> +	max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
> +	if (!max_cpu_freq)
> +		max_cpu_freq = SAFE_MAX_CPU_FREQ;
> +
> +	return (u64)max_cpu_freq * watchdog_thresh;
> +}

Why does this function become __weak? Neither arm64 nor riscv override
it afaict.

Will
Re: [External] Re: [PATCH v4 1/2] watchdog: move arm64 watchdog_hld into common code
Posted by yunhui cui 1 month, 1 week ago
Hi Will,

On Mon, Nov 3, 2025 at 9:44 PM Will Deacon <will@kernel.org> wrote:
>
> On Tue, Oct 14, 2025 at 11:14:24AM +0800, Yunhui Cui wrote:
> > @@ -306,3 +307,85 @@ void __init hardlockup_config_perf_event(const char *str)
> >       wd_hw_attr.type = PERF_TYPE_RAW;
> >       wd_hw_attr.config = config;
> >  }
> > +
> > +#ifdef CONFIG_WATCHDOG_PERF_ADJUST_PERIOD
> > +/*
> > + * Safe maximum CPU frequency in case a particular platform doesn't implement
> > + * cpufreq driver. Although, architecture doesn't put any restrictions on
> > + * maximum frequency but 5 GHz seems to be safe maximum given the available
> > + * CPUs in the market which are clocked much less than 5 GHz. On the other
> > + * hand, we can't make it much higher as it would lead to a large hard-lockup
> > + * detection timeout on parts which are running slower (eg. 1GHz on
> > + * Developerbox) and doesn't possess a cpufreq driver.
> > + */
> > +#define SAFE_MAX_CPU_FREQ    5000000000UL // 5 GHz
> > +__weak u64 hw_nmi_get_sample_period(int watchdog_thresh)
> > +{
> > +     unsigned int cpu = smp_processor_id();
> > +     unsigned long max_cpu_freq;
> > +
> > +     max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
> > +     if (!max_cpu_freq)
> > +             max_cpu_freq = SAFE_MAX_CPU_FREQ;
> > +
> > +     return (u64)max_cpu_freq * watchdog_thresh;
> > +}
>
> Why does this function become __weak? Neither arm64 nor riscv override
> it afaict.

Would you say there’s any particular issue here? If some architectures
might need to override the hw_nmi_get_sample_period() function later
on, wouldn’t __weak be a more reasonable choice?

>
> Will

Thanks,
Yunhui
Re: [External] Re: [PATCH v4 1/2] watchdog: move arm64 watchdog_hld into common code
Posted by Will Deacon 1 month, 1 week ago
On Fri, Nov 07, 2025 at 10:42:25AM +0800, yunhui cui wrote:
> On Mon, Nov 3, 2025 at 9:44 PM Will Deacon <will@kernel.org> wrote:
> >
> > On Tue, Oct 14, 2025 at 11:14:24AM +0800, Yunhui Cui wrote:
> > > @@ -306,3 +307,85 @@ void __init hardlockup_config_perf_event(const char *str)
> > >       wd_hw_attr.type = PERF_TYPE_RAW;
> > >       wd_hw_attr.config = config;
> > >  }
> > > +
> > > +#ifdef CONFIG_WATCHDOG_PERF_ADJUST_PERIOD
> > > +/*
> > > + * Safe maximum CPU frequency in case a particular platform doesn't implement
> > > + * cpufreq driver. Although, architecture doesn't put any restrictions on
> > > + * maximum frequency but 5 GHz seems to be safe maximum given the available
> > > + * CPUs in the market which are clocked much less than 5 GHz. On the other
> > > + * hand, we can't make it much higher as it would lead to a large hard-lockup
> > > + * detection timeout on parts which are running slower (eg. 1GHz on
> > > + * Developerbox) and doesn't possess a cpufreq driver.
> > > + */
> > > +#define SAFE_MAX_CPU_FREQ    5000000000UL // 5 GHz
> > > +__weak u64 hw_nmi_get_sample_period(int watchdog_thresh)
> > > +{
> > > +     unsigned int cpu = smp_processor_id();
> > > +     unsigned long max_cpu_freq;
> > > +
> > > +     max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
> > > +     if (!max_cpu_freq)
> > > +             max_cpu_freq = SAFE_MAX_CPU_FREQ;
> > > +
> > > +     return (u64)max_cpu_freq * watchdog_thresh;
> > > +}
> >
> > Why does this function become __weak? Neither arm64 nor riscv override
> > it afaict.
> 
> Would you say there’s any particular issue here? If some architectures
> might need to override the hw_nmi_get_sample_period() function later
> on, wouldn’t __weak be a more reasonable choice?

__weak is pretty brittle (it can depend on link order if you have multiple
targets) and I suspect it prevents inlining when LTO isn't enabled. It's
cleaner and more robust for architectures to provide their hooks by
#defining the symbol, as is done commonly in other parts of the kernel.

But in this particular case, it's completely unnecessary because there
isn't an architectural override and so this function should simply be
static.

Will
Re: [External] Re: [PATCH v4 1/2] watchdog: move arm64 watchdog_hld into common code
Posted by yunhui cui 1 month, 1 week ago
Hi Will,

On Fri, Nov 7, 2025 at 9:11 PM Will Deacon <will@kernel.org> wrote:
>
> On Fri, Nov 07, 2025 at 10:42:25AM +0800, yunhui cui wrote:
> > On Mon, Nov 3, 2025 at 9:44 PM Will Deacon <will@kernel.org> wrote:
> > >
> > > On Tue, Oct 14, 2025 at 11:14:24AM +0800, Yunhui Cui wrote:
> > > > @@ -306,3 +307,85 @@ void __init hardlockup_config_perf_event(const char *str)
> > > >       wd_hw_attr.type = PERF_TYPE_RAW;
> > > >       wd_hw_attr.config = config;
> > > >  }
> > > > +
> > > > +#ifdef CONFIG_WATCHDOG_PERF_ADJUST_PERIOD
> > > > +/*
> > > > + * Safe maximum CPU frequency in case a particular platform doesn't implement
> > > > + * cpufreq driver. Although, architecture doesn't put any restrictions on
> > > > + * maximum frequency but 5 GHz seems to be safe maximum given the available
> > > > + * CPUs in the market which are clocked much less than 5 GHz. On the other
> > > > + * hand, we can't make it much higher as it would lead to a large hard-lockup
> > > > + * detection timeout on parts which are running slower (eg. 1GHz on
> > > > + * Developerbox) and doesn't possess a cpufreq driver.
> > > > + */
> > > > +#define SAFE_MAX_CPU_FREQ    5000000000UL // 5 GHz
> > > > +__weak u64 hw_nmi_get_sample_period(int watchdog_thresh)
> > > > +{
> > > > +     unsigned int cpu = smp_processor_id();
> > > > +     unsigned long max_cpu_freq;
> > > > +
> > > > +     max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
> > > > +     if (!max_cpu_freq)
> > > > +             max_cpu_freq = SAFE_MAX_CPU_FREQ;
> > > > +
> > > > +     return (u64)max_cpu_freq * watchdog_thresh;
> > > > +}
> > >
> > > Why does this function become __weak? Neither arm64 nor riscv override
> > > it afaict.
> >
> > Would you say there’s any particular issue here? If some architectures
> > might need to override the hw_nmi_get_sample_period() function later
> > on, wouldn’t __weak be a more reasonable choice?
>
> __weak is pretty brittle (it can depend on link order if you have multiple
> targets) and I suspect it prevents inlining when LTO isn't enabled. It's
> cleaner and more robust for architectures to provide their hooks by
> #defining the symbol, as is done commonly in other parts of the kernel.
>
> But in this particular case, it's completely unnecessary because there
> isn't an architectural override and so this function should simply be
> static.

Since the function is declared as u64 hw_nmi_get_sample_period(int
watchdog_thresh) in nmi.h, it cannot be static.
I will remove the __weak modifier in the next version.

>
> Will

Thanks,
Yunhui