[PATCH 05/10] perf/uprobe: SRCU-ify uprobe->consumer list

Peter Zijlstra posted 10 patches 1 year, 7 months ago
There is a newer version of this series
[PATCH 05/10] perf/uprobe: SRCU-ify uprobe->consumer list
Posted by Peter Zijlstra 1 year, 7 months ago
With handle_swbp() hitting concurrently on (all) CPUs the
uprobe->register_rwsem can get very contended. Replace it with
SRCU+mutex.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/events/uprobes.c |   37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -26,6 +26,7 @@
 #include <linux/task_work.h>
 #include <linux/shmem_fs.h>
 #include <linux/khugepaged.h>
+#include <linux/srcu.h>
 
 #include <linux/uprobes.h>
 
@@ -49,6 +50,8 @@ static struct mutex uprobes_mmap_mutex[U
 
 DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
 
+DEFINE_STATIC_SRCU(uprobes_srcu);
+
 /* Have a copy of original instruction */
 #define UPROBE_COPY_INSN	0
 
@@ -56,7 +59,7 @@ struct uprobe {
 	struct rb_node		rb_node;	/* node in the rb tree */
 	refcount_t		ref;
 	struct rcu_head		rcu;
-	struct rw_semaphore	register_rwsem;
+	struct mutex		register_mutex;
 	struct rw_semaphore	consumer_rwsem;
 	struct list_head	pending_list;
 	struct uprobe_consumer	*consumers;
@@ -750,7 +753,7 @@ static struct uprobe *alloc_uprobe(struc
 	uprobe->inode = inode;
 	uprobe->offset = offset;
 	uprobe->ref_ctr_offset = ref_ctr_offset;
-	init_rwsem(&uprobe->register_rwsem);
+	mutex_init(&uprobe->register_mutex);
 	init_rwsem(&uprobe->consumer_rwsem);
 
 	/* add to uprobes_tree, sorted on inode:offset */
@@ -1129,10 +1132,12 @@ void uprobe_unregister(struct inode *ino
 	if (WARN_ON(!uprobe))
 		return;
 
-	down_write(&uprobe->register_rwsem);
+	mutex_lock(&uprobe->register_mutex);
 	__uprobe_unregister(uprobe, uc);
-	up_write(&uprobe->register_rwsem);
+	mutex_unlock(&uprobe->register_mutex);
 	put_uprobe(uprobe);
+
+	synchronize_srcu(&uprobes_srcu);
 }
 EXPORT_SYMBOL_GPL(uprobe_unregister);
 
@@ -1192,7 +1197,7 @@ static int __uprobe_register(struct inod
 	 * We can race with uprobe_unregister()->delete_uprobe().
 	 * Check uprobe_is_active() and retry if it is false.
 	 */
-	down_write(&uprobe->register_rwsem);
+	mutex_lock(&uprobe->register_mutex);
 	ret = -EAGAIN;
 	if (likely(uprobe_is_active(uprobe))) {
 		consumer_add(uprobe, uc);
@@ -1200,7 +1205,7 @@ static int __uprobe_register(struct inod
 		if (ret)
 			__uprobe_unregister(uprobe, uc);
 	}
-	up_write(&uprobe->register_rwsem);
+	mutex_unlock(&uprobe->register_mutex);
 	put_uprobe(uprobe);
 
 	if (unlikely(ret == -EAGAIN))
@@ -1240,12 +1245,12 @@ int uprobe_apply(struct inode *inode, lo
 	if (WARN_ON(!uprobe))
 		return ret;
 
-	down_write(&uprobe->register_rwsem);
+	mutex_lock(&uprobe->register_mutex);
 	for (con = uprobe->consumers; con && con != uc ; con = con->next)
 		;
 	if (con)
 		ret = register_for_each_vma(uprobe, add ? uc : NULL);
-	up_write(&uprobe->register_rwsem);
+	mutex_unlock(&uprobe->register_mutex);
 	put_uprobe(uprobe);
 
 	return ret;
@@ -2087,14 +2092,19 @@ static struct uprobe *find_active_uprobe
 	return uprobe;
 }
 
+#define for_each_consumer_rcu(pos, head) \
+	for (pos = rcu_dereference_raw(head); pos; \
+	     pos = rcu_dereference_raw(pos->next))
+
 static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
 {
 	struct uprobe_consumer *uc;
 	int remove = UPROBE_HANDLER_REMOVE;
 	bool need_prep = false; /* prepare return uprobe, when needed */
 
-	down_read(&uprobe->register_rwsem);
-	for (uc = uprobe->consumers; uc; uc = uc->next) {
+	guard(srcu)(&uprobes_srcu);
+
+	for_each_consumer_rcu(uc, uprobe->consumers) {
 		int rc = 0;
 
 		if (uc->handler) {
@@ -2116,7 +2126,6 @@ static void handler_chain(struct uprobe
 		WARN_ON(!uprobe_is_active(uprobe));
 		unapply_uprobe(uprobe, current->mm);
 	}
-	up_read(&uprobe->register_rwsem);
 }
 
 static void
@@ -2125,12 +2134,12 @@ handle_uretprobe_chain(struct return_ins
 	struct uprobe *uprobe = ri->uprobe;
 	struct uprobe_consumer *uc;
 
-	down_read(&uprobe->register_rwsem);
-	for (uc = uprobe->consumers; uc; uc = uc->next) {
+	guard(srcu)(&uprobes_srcu);
+
+	for_each_consumer_rcu(uc, uprobe->consumers) {
 		if (uc->ret_handler)
 			uc->ret_handler(uc, ri->func, regs);
 	}
-	up_read(&uprobe->register_rwsem);
 }
 
 static struct return_instance *find_next_ret_chain(struct return_instance *ri)
Re: [PATCH 05/10] perf/uprobe: SRCU-ify uprobe->consumer list
Posted by Peter Zijlstra 1 year, 7 months ago
On Mon, Jul 08, 2024 at 11:12:46AM +0200, Peter Zijlstra wrote:
> @@ -2087,14 +2092,19 @@ static struct uprobe *find_active_uprobe
>  	return uprobe;
>  }
>  
> +#define for_each_consumer_rcu(pos, head) \
> +	for (pos = rcu_dereference_raw(head); pos; \
> +	     pos = rcu_dereference_raw(pos->next))
> +
>  static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
>  {
>  	struct uprobe_consumer *uc;
>  	int remove = UPROBE_HANDLER_REMOVE;
>  	bool need_prep = false; /* prepare return uprobe, when needed */
>  
> -	down_read(&uprobe->register_rwsem);
> -	for (uc = uprobe->consumers; uc; uc = uc->next) {
> +	guard(srcu)(&uprobes_srcu);
> +
> +	for_each_consumer_rcu(uc, uprobe->consumers) {
>  		int rc = 0;
>  
>  		if (uc->handler) {
> @@ -2116,7 +2126,6 @@ static void handler_chain(struct uprobe
>  		WARN_ON(!uprobe_is_active(uprobe));
>  		unapply_uprobe(uprobe, current->mm);

   ^^^ this remove case needs more thought.

>  	}
> -	up_read(&uprobe->register_rwsem);
>  }
Re: [PATCH 05/10] perf/uprobe: SRCU-ify uprobe->consumer list
Posted by Oleg Nesterov 1 year, 7 months ago
On 07/09, Peter Zijlstra wrote:
>
> > +	guard(srcu)(&uprobes_srcu);
> > +
> > +	for_each_consumer_rcu(uc, uprobe->consumers) {
> >  		int rc = 0;
> >
> >  		if (uc->handler) {
> > @@ -2116,7 +2126,6 @@ static void handler_chain(struct uprobe
> >  		WARN_ON(!uprobe_is_active(uprobe));
> >  		unapply_uprobe(uprobe, current->mm);
>
>    ^^^ this remove case needs more thought.

Yeah... that is why the current code doesn't use ->consumer_rwsem, iirc.

And consumer_add/del need some changes. Say, consumer_add() does

	uc->next = uprobe->consumers;
	uprobe->consumers = uc;

I guess it should do

	WRITE_ONCE(uc->next, uprobe->consumers);
	rcu_assign_pointer(uprobe->consumers, uc);

Oleg.
Re: [PATCH 05/10] perf/uprobe: SRCU-ify uprobe->consumer list
Posted by Peter Zijlstra 1 year, 7 months ago
On Tue, Jul 09, 2024 at 03:33:49PM +0200, Oleg Nesterov wrote:
> On 07/09, Peter Zijlstra wrote:
> >
> > > +	guard(srcu)(&uprobes_srcu);
> > > +
> > > +	for_each_consumer_rcu(uc, uprobe->consumers) {
> > >  		int rc = 0;
> > >
> > >  		if (uc->handler) {
> > > @@ -2116,7 +2126,6 @@ static void handler_chain(struct uprobe
> > >  		WARN_ON(!uprobe_is_active(uprobe));
> > >  		unapply_uprobe(uprobe, current->mm);
> >
> >    ^^^ this remove case needs more thought.
> 
> Yeah... that is why the current code doesn't use ->consumer_rwsem, iirc.

AFAICT something like the below should work. Concurrent
remove_breakpoint() should already be possible today and doesn't appear
to be a problem.

--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1260,6 +1260,10 @@ int uprobe_apply(struct inode *inode, lo
 	return ret;
 }
 
+/*
+ * Can race against uprobe_unregister() / register_for_each_vma(), and relies
+ * on duplicate remove_breakpoint() being a no-op.
+ */
 static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
 {
 	VMA_ITERATOR(vmi, mm, 0);
@@ -2101,6 +2105,7 @@ static void handler_chain(struct uprobe
 	struct uprobe_consumer *uc;
 	int remove = UPROBE_HANDLER_REMOVE;
 	bool need_prep = false; /* prepare return uprobe, when needed */
+	bool had_handler = false;
 
 	down_read(&uprobe->register_rwsem);
 	for (uc = uprobe->consumers; uc; uc = uc->next) {
@@ -2115,16 +2120,26 @@ static void handler_chain(struct uprobe
 		if (uc->ret_handler)
 			need_prep = true;
 
+		/*
+		 * A single handler that does not mask out REMOVE, means the
+		 * probe stays.
+		 */
+		had_handler = true;
 		remove &= rc;
 	}
 
+	/*
+	 * If there were no handlers called, nobody asked for it to be removed
+	 * but also nobody got to mask the value. Fix it up.
+	 */
+	if (!had_handler)
+		remove = 0;
+
 	if (need_prep && !remove)
 		prepare_uretprobe(uprobe, regs); /* put bp at return */
 
-	if (remove && uprobe->consumers) {
-		WARN_ON(!uprobe_is_active(uprobe));
+	if (remove)
 		unapply_uprobe(uprobe, current->mm);
-	}
 	up_read(&uprobe->register_rwsem);
 }
Re: [PATCH 05/10] perf/uprobe: SRCU-ify uprobe->consumer list
Posted by Oleg Nesterov 1 year, 7 months ago
On 07/09, Peter Zijlstra wrote:
>
> On Tue, Jul 09, 2024 at 03:33:49PM +0200, Oleg Nesterov wrote:
> > On 07/09, Peter Zijlstra wrote:
> > >
> > > > +	guard(srcu)(&uprobes_srcu);
> > > > +
> > > > +	for_each_consumer_rcu(uc, uprobe->consumers) {
> > > >  		int rc = 0;
> > > >
> > > >  		if (uc->handler) {
> > > > @@ -2116,7 +2126,6 @@ static void handler_chain(struct uprobe
> > > >  		WARN_ON(!uprobe_is_active(uprobe));
> > > >  		unapply_uprobe(uprobe, current->mm);
> > >
> > >    ^^^ this remove case needs more thought.
> >
> > Yeah... that is why the current code doesn't use ->consumer_rwsem, iirc.
>
> AFAICT something like the below should work. Concurrent
> remove_breakpoint() should already be possible today and doesn't appear
> to be a problem.

Sorry, I don't understand how can this patch help. Yes, it removes the
uprobe->consumers != NULL check, but this is minor.

To simplify, suppose we have a single consumer which is not interested
in this task/mm, it returns UPROBE_HANDLER_REMOVE.

For example, event->hw.target != NULL and the current task is the forked
child which hits the breakpoint copied by dup_mmap().

Now. We need to ensure that another (say system-wide) consumer can't come
and call register_for_each_vma() before unapply_uprobe().

But perhaps I missed your point...

Oleg.

> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -1260,6 +1260,10 @@ int uprobe_apply(struct inode *inode, lo
>  	return ret;
>  }
>
> +/*
> + * Can race against uprobe_unregister() / register_for_each_vma(), and relies
> + * on duplicate remove_breakpoint() being a no-op.
> + */
>  static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
>  {
>  	VMA_ITERATOR(vmi, mm, 0);
> @@ -2101,6 +2105,7 @@ static void handler_chain(struct uprobe
>  	struct uprobe_consumer *uc;
>  	int remove = UPROBE_HANDLER_REMOVE;
>  	bool need_prep = false; /* prepare return uprobe, when needed */
> +	bool had_handler = false;
>
>  	down_read(&uprobe->register_rwsem);
>  	for (uc = uprobe->consumers; uc; uc = uc->next) {
> @@ -2115,16 +2120,26 @@ static void handler_chain(struct uprobe
>  		if (uc->ret_handler)
>  			need_prep = true;
>
> +		/*
> +		 * A single handler that does not mask out REMOVE, means the
> +		 * probe stays.
> +		 */
> +		had_handler = true;
>  		remove &= rc;
>  	}
>
> +	/*
> +	 * If there were no handlers called, nobody asked for it to be removed
> +	 * but also nobody got to mask the value. Fix it up.
> +	 */
> +	if (!had_handler)
> +		remove = 0;
> +
>  	if (need_prep && !remove)
>  		prepare_uretprobe(uprobe, regs); /* put bp at return */
>
> -	if (remove && uprobe->consumers) {
> -		WARN_ON(!uprobe_is_active(uprobe));
> +	if (remove)
>  		unapply_uprobe(uprobe, current->mm);
> -	}
>  	up_read(&uprobe->register_rwsem);
>  }
>
>
Re: [PATCH 05/10] perf/uprobe: SRCU-ify uprobe->consumer list
Posted by Peter Zijlstra 1 year, 7 months ago
On Tue, Jul 09, 2024 at 05:05:04PM +0200, Oleg Nesterov wrote:


> To simplify, suppose we have a single consumer which is not interested
> in this task/mm, it returns UPROBE_HANDLER_REMOVE.
> 
> For example, event->hw.target != NULL and the current task is the forked
> child which hits the breakpoint copied by dup_mmap().
> 
> Now. We need to ensure that another (say system-wide) consumer can't come
> and call register_for_each_vma() before unapply_uprobe().
> 
> But perhaps I missed your point...

Ooh, I see. I failed to consider that particular case. This is
interleaving uprobe_register() and handler_chain(). Silly me only looked
at uprobe_unregister() and handler_chain().

Hmm, easiest would be to add a seqcount to register_mutex and simply
skip the remove case when odd.

Then the handler might get a few extra (unwanted) calls, but it should
be able to handle them, they're fundamentally not different from the
first one where it says REMOVE. Right?
Re: [PATCH 05/10] perf/uprobe: SRCU-ify uprobe->consumer list
Posted by Oleg Nesterov 1 year, 7 months ago
On 07/09, Peter Zijlstra wrote:
>
> Hmm, easiest would be to add a seqcount to register_mutex and simply
> skip the remove case when odd.

probably yes,

> Then the handler might get a few extra (unwanted) calls, but it should
> be able to handle them, they're fundamentally not different from the
> first one where it says REMOVE. Right?

plus the handler can never assume that UPROBE_HANDLER_REMOVE won't be
ignored, another consumer can want to trace this task.

Oleg.
Re: [PATCH 05/10] perf/uprobe: SRCU-ify uprobe->consumer list
Posted by Peter Zijlstra 1 year, 7 months ago
On Tue, Jul 09, 2024 at 03:33:49PM +0200, Oleg Nesterov wrote:
> I guess it should do
> 
> 	WRITE_ONCE(uc->next, uprobe->consumers);
> 	rcu_assign_pointer(uprobe->consumers, uc);

Yes...