[PATCH v2 07/18] mm/ksw: add atomic watch on/off operations

Jinchao Wang posted 18 patches 4 weeks, 1 day ago
There is a newer version of this series
[PATCH v2 07/18] mm/ksw: add atomic watch on/off operations
Posted by Jinchao Wang 4 weeks, 1 day ago
Add support to atomically turn the hardware watch on and off without
allocation overhead.

The watch is pre-allocated and later retargeted using hw_breakpoint_modify_local().
The current CPU is updated directly, while other CPUs are updated
asynchronously via smp_call_function_single_async().

This allows KStackWatch to switch the watch in kprobe/fprobe handlers.

Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
---
 mm/kstackwatch/kstackwatch.h |  2 +
 mm/kstackwatch/watch.c       | 97 ++++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+)

diff --git a/mm/kstackwatch/kstackwatch.h b/mm/kstackwatch/kstackwatch.h
index 2318779bde70..13ef8c79f855 100644
--- a/mm/kstackwatch/kstackwatch.h
+++ b/mm/kstackwatch/kstackwatch.h
@@ -41,5 +41,7 @@ extern bool panic_on_catch;
 /* watch management */
 int ksw_watch_init(struct ksw_config *config);
 void ksw_watch_exit(void);
+int ksw_watch_on(u64 watch_addr, u64 watch_len);
+void ksw_watch_off(void);
 
 #endif /* _KSTACKWATCH_H */
diff --git a/mm/kstackwatch/watch.c b/mm/kstackwatch/watch.c
index e7ed88700b49..c563f8d17829 100644
--- a/mm/kstackwatch/watch.c
+++ b/mm/kstackwatch/watch.c
@@ -16,9 +16,21 @@
 
 struct perf_event *__percpu *watch_events;
 struct ksw_config *watch_config;
+static DEFINE_SPINLOCK(watch_lock);
 
 static unsigned long long watch_holder;
 
+static struct watch_info {
+	u64 addr;
+	u64 len;
+} watch_info;
+
+static void ksw_watch_on_local_cpu(void *info);
+
+static DEFINE_PER_CPU(call_single_data_t,
+		      watch_csd) = CSD_INIT(ksw_watch_on_local_cpu,
+					    &watch_info);
+
 static void ksw_watch_handler(struct perf_event *bp,
 			      struct perf_sample_data *data,
 			      struct pt_regs *regs)
@@ -32,6 +44,91 @@ static void ksw_watch_handler(struct perf_event *bp,
 		panic("KSW: Stack corruption detected");
 }
 
+/*
+ * set up watchon current CPU
+ * addr and len updated by ksw_watch_on() already
+ */
+static void ksw_watch_on_local_cpu(void *data)
+{
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+	struct watch_info *watch_info = data;
+
+	int cpu = smp_processor_id();
+	int ret;
+
+	bp = *per_cpu_ptr(watch_events, cpu);
+	if (!bp)
+		return;
+	attr.bp_addr = watch_info->addr;
+	attr.bp_len = watch_info->len;
+	attr.bp_type = bp->attr.bp_type;
+
+	ret = hw_breakpoint_modify_local(bp, &attr);
+	if (ret) {
+		pr_err("KSW: failed to reinstall HWBP on CPU %d ret %d\n", cpu,
+		       ret);
+		return;
+	}
+
+	if (bp->attr.bp_addr == (unsigned long)&watch_holder) {
+		pr_debug("KSW: watch off CPU %d\n", cpu);
+	} else {
+		pr_debug("KSW: watch on CPU %d at 0x%px (len %llu)\n", cpu,
+			 (void *)bp->attr.bp_addr, bp->attr.bp_len);
+	}
+}
+
+int ksw_watch_on(u64 watch_addr, u64 watch_len)
+{
+	unsigned long flags;
+	int cpu;
+
+	if (!watch_addr) {
+		pr_err("KSW: watch with invalid address\n");
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&watch_lock, flags);
+
+	/*
+	 * check if already watched
+	 */
+	if (watch_info.addr != 0 && // not uninit
+	    watch_info.addr != (unsigned long)&watch_holder && // installed
+	    watch_addr != (unsigned long)&watch_holder) { //not restore
+		spin_unlock_irqrestore(&watch_lock, flags);
+		return -EBUSY;
+	}
+
+	watch_info.addr = watch_addr;
+	watch_info.len = watch_len;
+
+	spin_unlock_irqrestore(&watch_lock, flags);
+
+	if (watch_addr == (unsigned long)&watch_holder)
+		pr_debug("KSW: watch off starting\n");
+	else
+		pr_debug("KSW: watch on starting\n");
+
+	for_each_online_cpu(cpu) {
+		if (cpu == raw_smp_processor_id()) {
+			ksw_watch_on_local_cpu(&watch_info);
+		} else {
+			call_single_data_t *csd = &per_cpu(watch_csd, cpu);
+
+			smp_call_function_single_async(cpu, csd);
+		}
+	}
+
+	return 0;
+}
+
+void ksw_watch_off(void)
+{
+	ksw_watch_on((unsigned long)&watch_holder, sizeof(watch_holder));
+}
+
 int ksw_watch_init(struct ksw_config *config)
 {
 	struct perf_event_attr attr;
-- 
2.43.0
Re: [PATCH v2 07/18] mm/ksw: add atomic watch on/off operations
Posted by Peter Zijlstra 4 weeks, 1 day ago
On Thu, Sep 04, 2025 at 08:21:04AM +0800, Jinchao Wang wrote:

> +static DEFINE_PER_CPU(call_single_data_t,
> +		      watch_csd) = CSD_INIT(ksw_watch_on_local_cpu,
> +					    &watch_info);
> +

> +int ksw_watch_on(u64 watch_addr, u64 watch_len)
> +{
> +	unsigned long flags;
> +	int cpu;
> +
> +	if (!watch_addr) {
> +		pr_err("KSW: watch with invalid address\n");
> +		return -EINVAL;
> +	}
> +
> +	spin_lock_irqsave(&watch_lock, flags);
> +
> +	/*
> +	 * check if already watched
> +	 */
> +	if (watch_info.addr != 0 && // not uninit
> +	    watch_info.addr != (unsigned long)&watch_holder && // installed
> +	    watch_addr != (unsigned long)&watch_holder) { //not restore
> +		spin_unlock_irqrestore(&watch_lock, flags);
> +		return -EBUSY;
> +	}
> +
> +	watch_info.addr = watch_addr;
> +	watch_info.len = watch_len;
> +
> +	spin_unlock_irqrestore(&watch_lock, flags);
> +
> +	if (watch_addr == (unsigned long)&watch_holder)
> +		pr_debug("KSW: watch off starting\n");
> +	else
> +		pr_debug("KSW: watch on starting\n");
> +
> +	for_each_online_cpu(cpu) {
> +		if (cpu == raw_smp_processor_id()) {
> +			ksw_watch_on_local_cpu(&watch_info);
> +		} else {
> +			call_single_data_t *csd = &per_cpu(watch_csd, cpu);
> +
> +			smp_call_function_single_async(cpu, csd);
> +		}
> +	}
> +
> +	return 0;
> +}

What do you think happens when two ksw_watch_on() instances run
concurrently?

What happens if a CPU comes online/offline concurrently with
ksw_watch_on()?
Re: [PATCH v2 07/18] mm/ksw: add atomic watch on/off operations
Posted by Jinchao Wang 3 weeks, 4 days ago
On Thu, Sep 04, 2025 at 08:46:59AM +0200, Peter Zijlstra wrote:
> On Thu, Sep 04, 2025 at 08:21:04AM +0800, Jinchao Wang wrote:
> 
> > +static DEFINE_PER_CPU(call_single_data_t,
> > +		      watch_csd) = CSD_INIT(ksw_watch_on_local_cpu,
> > +					    &watch_info);
> > +
> 
> > +int ksw_watch_on(u64 watch_addr, u64 watch_len)
> > +{
> > +	unsigned long flags;
> > +	int cpu;
> > +
> > +	if (!watch_addr) {
> > +		pr_err("KSW: watch with invalid address\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	spin_lock_irqsave(&watch_lock, flags);
> > +
> > +	/*
> > +	 * check if already watched
> > +	 */
> > +	if (watch_info.addr != 0 && // not uninit
> > +	    watch_info.addr != (unsigned long)&watch_holder && // installed
> > +	    watch_addr != (unsigned long)&watch_holder) { //not restore
> > +		spin_unlock_irqrestore(&watch_lock, flags);
> > +		return -EBUSY;
> > +	}
> > +
> > +	watch_info.addr = watch_addr;
> > +	watch_info.len = watch_len;
> > +
> > +	spin_unlock_irqrestore(&watch_lock, flags);
> > +
> > +	if (watch_addr == (unsigned long)&watch_holder)
> > +		pr_debug("KSW: watch off starting\n");
> > +	else
> > +		pr_debug("KSW: watch on starting\n");
> > +
> > +	for_each_online_cpu(cpu) {
> > +		if (cpu == raw_smp_processor_id()) {
> > +			ksw_watch_on_local_cpu(&watch_info);
> > +		} else {
> > +			call_single_data_t *csd = &per_cpu(watch_csd, cpu);
> > +
> > +			smp_call_function_single_async(cpu, csd);
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> 
> What do you think happens when two ksw_watch_on() instances run
> concurrently?
> 
> What happens if a CPU comes online/offline concurrently with
> ksw_watch_on()?
Thanks for pointing that out. I did not think much about it.
I will think more and address it in the next series.