From nobody Sat Feb 7 17:56:20 2026 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8741B558B7 for ; Mon, 8 Jul 2024 09:29:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430962; cv=none; b=TRmRkzArn5qHC10/U0ScWiqjqorpk0g+QPLihs0Y7eQ2QuhwG1WoAIN68Z0r+LXcfTrvulQvXtwiPHmtEyM2XrW/R4AAcqP7SLPUuIadJglcxySJOos8Pm4MmqYLB0R0WboB8gX1OTKJFPDHlhLIiCP83J2HxvLuFTOgphEB62M= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430962; c=relaxed/simple; bh=iPnzJDPYV69x97yqOMsVW8M8RvXBmzlPMZ6dojO6w9w=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=MedHdErqTeaABiaDnfG8CjJM2aonAQVyUhT8OUwyYy0gnVSK0x+Q6NSk4zHyUhCViJXdFjlDbydzV2r/Igad19FRjS1CCYLX9LJRTUgSc3cDUdu4wWBokPQqNeMHMAVTpF3dYrIKOhrztoVEdctE+WI3v0m7ADtgmn9MPer5Iyw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=ZhDoV64x; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="ZhDoV64x" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=D/qnIt6lTHMe7dedD5Wou6cHbGCP/Pb/6K/ANgI22y8=; b=ZhDoV64xmEOYU3QOIuxBQCNP+Y Ynnoc9WLR+a6eFbtR7QM7jbooUAyBMP6wgKs87UjpKsmZs5PeyTJeOwJfFJw4TwX5+lA78oK0jZSz mVAgKDK5wkYirKH9OrxYh+temyYaENpOByTAXm/IpzLMJ03kXhR6kVYy6dDpN1R8WVP/vZgSvdbA8 e6mQOawMKct21VRt92YBJkam4rt3EjOv4K/z4Imue/1vbdQH/pRnhlz/TVY3BVsQvvYrdSdvaX0ly s1OZJ9fwdUA9pyRAT5WjWB3zYQsW0XdPAAIKK8bMotWEuj7fUkcqNW3SxG0T9oiQ2UoW88KA8ptPH YJ3LXvLQ==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by casper.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sQkgN-00000006hOX-0jnI; Mon, 08 Jul 2024 09:29:15 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id 044993006B7; Mon, 8 Jul 2024 11:29:13 +0200 (CEST) Message-Id: <20240708092415.248663071@infradead.org> User-Agent: quilt/0.65 Date: Mon, 08 Jul 2024 11:12:42 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, oleg@redhat.com, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH 01/10] perf/uprobe: Re-indent labels References: <20240708091241.544262971@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Remove the silly label indenting. s/^\ \([[:alnum:]]*\):$/\1:/g Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/uprobes.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -205,7 +205,7 @@ static int __replace_page(struct vm_area folio_put(old_folio); =20 err =3D 0; - unlock: +unlock: mmu_notifier_invalidate_range_end(&range); folio_unlock(old_folio); return err; @@ -857,7 +857,7 @@ static int prepare_uprobe(struct uprobe smp_wmb(); /* pairs with the smp_rmb() in handle_swbp() */ set_bit(UPROBE_COPY_INSN, &uprobe->flags); =20 - out: +out: up_write(&uprobe->consumer_rwsem); =20 return ret; @@ -965,7 +965,7 @@ build_map_info(struct address_space *map struct map_info *info; int more =3D 0; =20 - again: +again: i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (!valid_vma(vma, is_register)) @@ -1019,7 +1019,7 @@ build_map_info(struct address_space *map } while (--more); =20 goto again; - out: +out: while (prev) prev =3D free_map_info(prev); return curr; @@ -1068,13 +1068,13 @@ register_for_each_vma(struct uprobe *upr err |=3D remove_breakpoint(uprobe, mm, info->vaddr); } =20 - unlock: +unlock: mmap_write_unlock(mm); - free: +free: mmput(mm); info =3D free_map_info(info); } - out: +out: percpu_up_write(&dup_mmap_sem); return err; } @@ -1159,7 +1159,7 @@ static int __uprobe_register(struct inod if (!IS_ALIGNED(ref_ctr_offset, sizeof(short))) return -EINVAL; =20 - retry: +retry: uprobe =3D alloc_uprobe(inode, offset, ref_ctr_offset); if (!uprobe) return -ENOMEM; @@ -1468,7 +1468,7 @@ static int xol_add_vma(struct mm_struct ret =3D 0; /* pairs with get_xol_area() */ smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */ - fail: +fail: mmap_write_unlock(mm); =20 return ret; @@ -1512,7 +1512,7 @@ static struct xol_area *__create_xol_are kfree(area->bitmap); free_area: kfree(area); - out: +out: return NULL; } =20 @@ -1915,7 +1915,7 @@ static void prepare_uretprobe(struct upr utask->return_instances =3D ri; =20 return; - fail: +fail: kfree(ri); } =20 @@ -2031,7 +2031,7 @@ static int is_trap_at_addr(struct mm_str =20 copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); put_page(page); - out: +out: /* This needs to return true for any variant of the trap insn */ return is_trap_insn(&opcode); } @@ -2159,7 +2159,7 @@ static void handle_trampoline(struct pt_ utask->return_instances =3D ri; return; =20 - sigill: +sigill: uprobe_warn(current, "handle uretprobe, sending SIGILL."); force_sig(SIGILL); From nobody Sat Feb 7 17:56:20 2026 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 84E915466B for ; Mon, 8 Jul 2024 09:29:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430962; cv=none; b=r/OvdcLptoEOqP2lDs3/IiFGFU5lQBhgiejhIDTwEkTodXMFYDDdFAHwphIWy5SFEAEHo2LPspKJZDTHoaxLj63f70ap8cvpKNLnGXqM2JBufCrtdvyC/akJBO9nykKGt4fziCX1nulnPZWMpJ1RkpIwlN+c0Dl4Pgn8hWIlUOY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430962; c=relaxed/simple; bh=aHcz8je/K6D8qyRMNtQ30mbLo3y2E4SL+2Pr8zv5Wis=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=lSlOmkdJRQ58MLpdU6Jb4gwk7PO4riU1CwoFcuXX0h/fsVd16c9hfLEAmcReZSGJ9SoFqOJrDzQKRfWYqSuEdXtZ2FIWMwtib0J38A30k3Rg7rJorYhYR4XtWSzn3pMWd6sVRQBG71njpIxp+z37ukM9g+eZjogbRMM8aSalvH8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=Tdxe3ytV; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="Tdxe3ytV" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=1bUUAiw4/p70dXaUe2S6wv8W5kuKSZvbGkuAwwX7C3c=; b=Tdxe3ytV0Sjef2aMwTMoaLQiKQ e381tmG00zuVrgDf5WXQmRZ3V3x+/DqTJ4igCE7xkO2IPbdSOth1nqLDMbjm4OwuCWLiiFfEOX258 KInKvbzWm0jRluM0IRq0ucF9HdJStnORzS2kghescouo9/36bXDdVre/EyOVhJl+C0cBf58jdr1UV q5c0vNiny/ko2gl4NrG0FDBZAttT8v9AdUXpbZLI+4LdT+BEPoLBx/HsmPKcCglj69dhFr4HuVU0I I4AY0f9mvGB0TMcbzi2KmbETWb+BYFcseEt91TD4v/58U4cxvueY0rwD0XjiI1QUlJp4LGcEKE1TU 157M2K0g==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by casper.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sQkgN-00000006hOY-0lNP; Mon, 08 Jul 2024 09:29:15 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id 0923B300AA5; Mon, 8 Jul 2024 11:29:13 +0200 (CEST) Message-Id: <20240708092415.351458388@infradead.org> User-Agent: quilt/0.65 Date: Mon, 08 Jul 2024 11:12:43 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, oleg@redhat.com, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH 02/10] perf/uprobe: Remove spurious whitespace References: <20240708091241.544262971@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/uprobes.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -67,7 +67,7 @@ struct uprobe { * The generic code assumes that it has two members of unknown type * owned by the arch-specific code: * - * insn - copy_insn() saves the original instruction here for + * insn - copy_insn() saves the original instruction here for * arch_uprobe_analyze_insn(). * * ixol - potentially modified instruction to execute out of @@ -95,18 +95,18 @@ static LIST_HEAD(delayed_uprobe_list); * allocated. */ struct xol_area { - wait_queue_head_t wq; /* if all slots are busy */ - atomic_t slot_count; /* number of in-use slots */ - unsigned long *bitmap; /* 0 =3D free slot */ + wait_queue_head_t wq; /* if all slots are busy */ + atomic_t slot_count; /* number of in-use slots */ + unsigned long *bitmap; /* 0 =3D free slot */ =20 struct vm_special_mapping xol_mapping; - struct page *pages[2]; + struct page *pages[2]; /* * We keep the vma's vm_start rather than a pointer to the vma * itself. The probed process or a naughty kernel module could make * the vma go away, and we must handle that reasonably gracefully. */ - unsigned long vaddr; /* Page(s) of instruction slots */ + unsigned long vaddr; /* Page(s) of instruction slots */ }; =20 /* From nobody Sat Feb 7 17:56:20 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3470D5466B for ; Mon, 8 Jul 2024 09:29:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430969; cv=none; b=Tk/RfAw/k9vTmlp3Ic+Thn7ueCJIgQiGugZj5DnJXz/PG48fParKx7T6SM7frKcpyEel4b8FJrmXfheuDu/7+XNFrfXsiiTdrLG4hsdbdSQom2bBeoftAXfwoNV7dGa9e/3t7lQ6S4E/OmF/Onm/+ElU2GMBeAQYaT4G6xpaXak= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430969; c=relaxed/simple; bh=YcR0uhyVp1UXF4y0oBeFBxmqAwWj8VkQLxscHUOYniA=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=qEv8j7kPo8LMuUKI3uKqgBhqU1QFRxUhS5FwsHopORi/BnFDeX3YsKvHpqPn7volj3xXahHsmx7ztgz+GMtgqLwhQ27qHVPFTGjwqSe10GrzWC9NP3/zf7rdBsv+fT/II0RO9gnwRwLGBjHNTgtzDn5sqXNg5muTVFw9tjQMV9U= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=F8/djpNQ; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="F8/djpNQ" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=nJ0jzIltPN4Hiu0OGMRSjwloxqmGPggzTGl+GF3aNWM=; b=F8/djpNQRIwf46Wf7FQPlsaTLk x51ddJlyjJkSwtyaXEHAqtWTpOOftxfXeILQ4QrM791qmdd8n0f3SJn1vdj1D3GwTn+yYrD5PQOuZ dAKbSo+JjMdMULb8OpFYjib56ghPtGoZIaapPzCl5gsUfLzLfO/N3W9dPk3SjM7i+DWHJJUC6Lc3L IGKyEE56A7ZTwIflvDAlxW50hOu8YDg6AzlD4fJ8NZfLCxDctog/3Uv9Ex0qBeX2qNnUzEpVCyumA 0z/6Uoqo7hhirAU84xoJbjJYJsagDLTdBBq7FC/baGWe2M90A1JOGAFLuTI2RkK39ZrX8FMMfohv/ UW1EoFNA==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by desiato.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sQkgO-00000000ZsN-02Um; Mon, 08 Jul 2024 09:29:19 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id 0E40C300CBA; Mon, 8 Jul 2024 11:29:13 +0200 (CEST) Message-Id: <20240708092415.464066230@infradead.org> User-Agent: quilt/0.65 Date: Mon, 08 Jul 2024 11:12:44 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, oleg@redhat.com, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH 03/10] rbtree: Provide rb_find_rcu() / rb_find_add_rcu() References: <20240708091241.544262971@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Much like latch_tree, add two RCU methods for the regular RB-tree, which can be used in conjunction with a seqcount to provide lockless lookups. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu (Google) --- include/linux/rbtree.h | 67 ++++++++++++++++++++++++++++++++++++++++++++= +++++ 1 file changed, 67 insertions(+) --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -245,6 +245,42 @@ rb_find_add(struct rb_node *node, struct } =20 /** + * rb_find_add_rcu() - find equivalent @node in @tree, or add @node + * @node: node to look-for / insert + * @tree: tree to search / modify + * @cmp: operator defining the node order + * + * Adds a Store-Release for link_node. + * + * Returns the rb_node matching @node, or NULL when no match is found and = @node + * is inserted. + */ +static __always_inline struct rb_node * +rb_find_add_rcu(struct rb_node *node, struct rb_root *tree, + int (*cmp)(struct rb_node *, const struct rb_node *)) +{ + struct rb_node **link =3D &tree->rb_node; + struct rb_node *parent =3D NULL; + int c; + + while (*link) { + parent =3D *link; + c =3D cmp(node, parent); + + if (c < 0) + link =3D &parent->rb_left; + else if (c > 0) + link =3D &parent->rb_right; + else + return parent; + } + + rb_link_node_rcu(node, parent, link); + rb_insert_color(node, tree); + return NULL; +} + +/** * rb_find() - find @key in tree @tree * @key: key to match * @tree: tree to search @@ -268,6 +304,37 @@ rb_find(const void *key, const struct rb else return node; } + + return NULL; +} + +/** + * rb_find_rcu() - find @key in tree @tree + * @key: key to match + * @tree: tree to search + * @cmp: operator defining the node order + * + * Notably, tree descent vs concurrent tree rotations is unsound and can r= esult + * in false-negatives. + * + * Returns the rb_node matching @key or NULL. + */ +static __always_inline struct rb_node * +rb_find_rcu(const void *key, const struct rb_root *tree, + int (*cmp)(const void *key, const struct rb_node *)) +{ + struct rb_node *node =3D tree->rb_node; + + while (node) { + int c =3D cmp(key, node); + + if (c < 0) + node =3D rcu_dereference_raw(node->rb_left); + else if (c > 0) + node =3D rcu_dereference_raw(node->rb_right); + else + return node; + } =20 return NULL; } From nobody Sat Feb 7 17:56:20 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 65C075579F for ; Mon, 8 Jul 2024 09:29:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430969; cv=none; b=ZPhjbKA3mQNH9RuOcWtH0KQP4ab5KWNC0HttfXSer/R1vbntuUekoYcb0u5cD3d1/+daAWr3ULhhSfZ4r38S230bLUs/b3HUxUq9ZPIMNKL5bz9rzTS/K1NG3BS29yw6c+RuwXE8bBfN+xlKJEPZBKlqUt3x4Gk6ckG6d+MZmE8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430969; c=relaxed/simple; bh=aTGIb0+D9Sv/jOsQ3tTClvWRoDWhCH/sWnarT4el9/Q=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=OHYMYLb7VqqTRJ5f/LhSALJD+R6Sk1HFEk6XYxPxayAecGTHLfGh5qXhY0zYpxls79mPmQyvt0VOeF1bs7Zh19SCL0D/2oabzAJArqwXT2gQMf1E20+h1kpcsJYo0Y+MkQZqXfWJaNlK7lG4Nz42ZZHBcJ8OpuXZSaiLzpfV5KY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=mmpXG2zb; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="mmpXG2zb" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=zdZYKFLVQz7+kKvF6YVvGcJqDTuHqI+KXzq8RKGs9qc=; b=mmpXG2zbMpxwAJZJMpb71M4RpE jjqNfmIKbELEopjL5cr2kPfQrKs6PNFoXYLUv4qPIAOM8sfD7PsDwlZSZeco0s7WVquN1I+Lw8xo+ c8Qv5e8hOUNywoT4bo5c4WAQ8HyRQWnOD/YCsYdr+th+xuptAKTSGtyVFTNnJsVy0g0Ptz1prfho9 LtZM0NI+tvvUVEpbUPWHO2xXxzZdiGJSeZGruMKYcGNBiy6/gERqUjhago0vwViukNIRBRyRBEorC Q6tVhmXb3nrnMV6gd0rxGDnYPte1ImmUa26jHmxt4BBimqZWaLmdB8kabtJIP3NPLeV1EfeWMrEHx K0Ae4C7g==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by desiato.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sQkgO-00000000ZsO-044y; Mon, 08 Jul 2024 09:29:19 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id 133B1301173; Mon, 8 Jul 2024 11:29:13 +0200 (CEST) Message-Id: <20240708092415.579623285@infradead.org> User-Agent: quilt/0.65 Date: Mon, 08 Jul 2024 11:12:45 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, oleg@redhat.com, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH 04/10] perf/uprobe: RCU-ify find_uprobe() References: <20240708091241.544262971@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" With handle_swbp() triggering concurrently on (all) CPUs, tree_lock becomes a bottleneck. Avoid treelock by doing RCU lookups of the uprobe. Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/uprobes.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -40,6 +40,7 @@ static struct rb_root uprobes_tree =3D RB_ #define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree) =20 static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */ +static seqcount_rwlock_t uprobes_seqcount =3D SEQCNT_RWLOCK_ZERO(uprobes_s= eqcount, &uprobes_treelock); =20 #define UPROBES_HASH_SZ 13 /* serialize uprobe->pending_list */ @@ -54,6 +55,7 @@ DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem) struct uprobe { struct rb_node rb_node; /* node in the rb tree */ refcount_t ref; + struct rcu_head rcu; struct rw_semaphore register_rwsem; struct rw_semaphore consumer_rwsem; struct list_head pending_list; @@ -593,6 +595,12 @@ static struct uprobe *get_uprobe(struct return uprobe; } =20 +static void uprobe_free_rcu(struct rcu_head *rcu) +{ + struct uprobe *uprobe =3D container_of(rcu, struct uprobe, rcu); + kfree(uprobe); +} + static void put_uprobe(struct uprobe *uprobe) { if (refcount_dec_and_test(&uprobe->ref)) { @@ -604,7 +612,7 @@ static void put_uprobe(struct uprobe *up mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(uprobe, NULL); mutex_unlock(&delayed_uprobe_lock); - kfree(uprobe); + call_rcu(&uprobe->rcu, uprobe_free_rcu); } } =20 @@ -653,7 +661,7 @@ static struct uprobe *__find_uprobe(stru .inode =3D inode, .offset =3D offset, }; - struct rb_node *node =3D rb_find(&key, &uprobes_tree, __uprobe_cmp_key); + struct rb_node *node =3D rb_find_rcu(&key, &uprobes_tree, __uprobe_cmp_ke= y); =20 if (node) return get_uprobe(__node_2_uprobe(node)); @@ -667,20 +675,32 @@ static struct uprobe *__find_uprobe(stru */ static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) { - struct uprobe *uprobe; + unsigned int seq; =20 - read_lock(&uprobes_treelock); - uprobe =3D __find_uprobe(inode, offset); - read_unlock(&uprobes_treelock); + guard(rcu)(); =20 - return uprobe; + do { + seq =3D read_seqcount_begin(&uprobes_seqcount); + struct uprobe *uprobe =3D __find_uprobe(inode, offset); + if (uprobe) { + /* + * Lockless RB-tree lookups are prone to false-negatives. + * If they find something, it's good. If they do not find, + * it needs to be validated. + */ + return uprobe; + } + } while (read_seqcount_retry(&uprobes_seqcount, seq)); + + /* Really didn't find anything. */ + return NULL; } =20 static struct uprobe *__insert_uprobe(struct uprobe *uprobe) { struct rb_node *node; =20 - node =3D rb_find_add(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); + node =3D rb_find_add_rcu(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp); if (node) return get_uprobe(__node_2_uprobe(node)); =20 @@ -702,7 +722,9 @@ static struct uprobe *insert_uprobe(stru struct uprobe *u; =20 write_lock(&uprobes_treelock); + write_seqcount_begin(&uprobes_seqcount); u =3D __insert_uprobe(uprobe); + write_seqcount_end(&uprobes_seqcount); write_unlock(&uprobes_treelock); =20 return u; @@ -936,7 +958,9 @@ static void delete_uprobe(struct uprobe return; =20 write_lock(&uprobes_treelock); + write_seqcount_begin(&uprobes_seqcount); rb_erase(&uprobe->rb_node, &uprobes_tree); + write_seqcount_end(&uprobes_seqcount); write_unlock(&uprobes_treelock); RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */ put_uprobe(uprobe); From nobody Sat Feb 7 17:56:20 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id F39A854F8C for ; Mon, 8 Jul 2024 09:29:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430970; cv=none; b=jW7S1jRFVn+BoJmh4AAHdou8FmKs3eGei7jbatFta4JpYppYzdQToDW4I4Gu24Z24uydA0MX7BcaUb/GhHGCLncrn4FY1gIjU9ipw4PGG/+YXHo/yoboNHSGlYNg8sMdrC8eVZzGT+uFFXSX6dNpv6zXELrXQuHCTq14BNOqc1Y= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430970; c=relaxed/simple; bh=Z+tf+zGbvC0gSaMaFf/2zXls1bdcxX+Z2+74bcL4sJ4=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=kpoq2AN3XVlnEbl+wR0vUsyuBhf3dRg1ErKAcs3McDAQ/2W0GYQTnLd4VEIsfGPO1jowEl/mJSG87KqsoXmf4s55b5Gj4F3k0voJnaAoKD+3abPgL/EuCbDxXBjzeUBmpAF1FbHlrtkEq1KymgpFX2STwdDOPCD0aAOrxaCCd4E= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=fLP2gPzh; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="fLP2gPzh" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=FqzzvelQ00c0zN3R4blHkZc53U+LXab6OIi85Tl6cJg=; b=fLP2gPzh1ziLfREnPNinzFXPxf sBEqJ7n7OTCxUyDyP8q/bXvawVZAyjInEmcSc9+0lpU6nc+ksfETXaboPaCDQ9WWygKzm7IuFyF3c mdAtHCaTIB2pjKBQaFTfbbfX+WqW4HmpzkMfvoh3bay70f2GyIRbGmr46tGoMC7SQ3366a4UbzO5c E05PXU/Lb8eBJ+rM9uYp7juW0cnlXrPuBo3PN4QpeyNH1TAFBGx6Ubprnrw9AfyfCzxVno4myTp0i 0nsukPCI+l7AkUVhOjNh3ynw+0lDiDKXAcm+7kGKipX5FCQApa+iyIIjpDEntWqkhB7FTd2yAI73Y av1EunCQ==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by desiato.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sQkgN-00000000ZsP-4BCt; Mon, 08 Jul 2024 09:29:18 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id 184B2302182; Mon, 8 Jul 2024 11:29:13 +0200 (CEST) Message-Id: <20240708092415.695619684@infradead.org> User-Agent: quilt/0.65 Date: Mon, 08 Jul 2024 11:12:46 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, oleg@redhat.com, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH 05/10] perf/uprobe: SRCU-ify uprobe->consumer list References: <20240708091241.544262971@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" With handle_swbp() hitting concurrently on (all) CPUs the uprobe->register_rwsem can get very contended. Replace it with SRCU+mutex. Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/uprobes.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -26,6 +26,7 @@ #include #include #include +#include =20 #include =20 @@ -49,6 +50,8 @@ static struct mutex uprobes_mmap_mutex[U =20 DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem); =20 +DEFINE_STATIC_SRCU(uprobes_srcu); + /* Have a copy of original instruction */ #define UPROBE_COPY_INSN 0 =20 @@ -56,7 +59,7 @@ struct uprobe { struct rb_node rb_node; /* node in the rb tree */ refcount_t ref; struct rcu_head rcu; - struct rw_semaphore register_rwsem; + struct mutex register_mutex; struct rw_semaphore consumer_rwsem; struct list_head pending_list; struct uprobe_consumer *consumers; @@ -750,7 +753,7 @@ static struct uprobe *alloc_uprobe(struc uprobe->inode =3D inode; uprobe->offset =3D offset; uprobe->ref_ctr_offset =3D ref_ctr_offset; - init_rwsem(&uprobe->register_rwsem); + mutex_init(&uprobe->register_mutex); init_rwsem(&uprobe->consumer_rwsem); =20 /* add to uprobes_tree, sorted on inode:offset */ @@ -1129,10 +1132,12 @@ void uprobe_unregister(struct inode *ino if (WARN_ON(!uprobe)) return; =20 - down_write(&uprobe->register_rwsem); + mutex_lock(&uprobe->register_mutex); __uprobe_unregister(uprobe, uc); - up_write(&uprobe->register_rwsem); + mutex_unlock(&uprobe->register_mutex); put_uprobe(uprobe); + + synchronize_srcu(&uprobes_srcu); } EXPORT_SYMBOL_GPL(uprobe_unregister); =20 @@ -1192,7 +1197,7 @@ static int __uprobe_register(struct inod * We can race with uprobe_unregister()->delete_uprobe(). * Check uprobe_is_active() and retry if it is false. */ - down_write(&uprobe->register_rwsem); + mutex_lock(&uprobe->register_mutex); ret =3D -EAGAIN; if (likely(uprobe_is_active(uprobe))) { consumer_add(uprobe, uc); @@ -1200,7 +1205,7 @@ static int __uprobe_register(struct inod if (ret) __uprobe_unregister(uprobe, uc); } - up_write(&uprobe->register_rwsem); + mutex_unlock(&uprobe->register_mutex); put_uprobe(uprobe); =20 if (unlikely(ret =3D=3D -EAGAIN)) @@ -1240,12 +1245,12 @@ int uprobe_apply(struct inode *inode, lo if (WARN_ON(!uprobe)) return ret; =20 - down_write(&uprobe->register_rwsem); + mutex_lock(&uprobe->register_mutex); for (con =3D uprobe->consumers; con && con !=3D uc ; con =3D con->next) ; if (con) ret =3D register_for_each_vma(uprobe, add ? uc : NULL); - up_write(&uprobe->register_rwsem); + mutex_unlock(&uprobe->register_mutex); put_uprobe(uprobe); =20 return ret; @@ -2087,14 +2092,19 @@ static struct uprobe *find_active_uprobe return uprobe; } =20 +#define for_each_consumer_rcu(pos, head) \ + for (pos =3D rcu_dereference_raw(head); pos; \ + pos =3D rcu_dereference_raw(pos->next)) + static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) { struct uprobe_consumer *uc; int remove =3D UPROBE_HANDLER_REMOVE; bool need_prep =3D false; /* prepare return uprobe, when needed */ =20 - down_read(&uprobe->register_rwsem); - for (uc =3D uprobe->consumers; uc; uc =3D uc->next) { + guard(srcu)(&uprobes_srcu); + + for_each_consumer_rcu(uc, uprobe->consumers) { int rc =3D 0; =20 if (uc->handler) { @@ -2116,7 +2126,6 @@ static void handler_chain(struct uprobe WARN_ON(!uprobe_is_active(uprobe)); unapply_uprobe(uprobe, current->mm); } - up_read(&uprobe->register_rwsem); } =20 static void @@ -2125,12 +2134,12 @@ handle_uretprobe_chain(struct return_ins struct uprobe *uprobe =3D ri->uprobe; struct uprobe_consumer *uc; =20 - down_read(&uprobe->register_rwsem); - for (uc =3D uprobe->consumers; uc; uc =3D uc->next) { + guard(srcu)(&uprobes_srcu); + + for_each_consumer_rcu(uc, uprobe->consumers) { if (uc->ret_handler) uc->ret_handler(uc, ri->func, regs); } - up_read(&uprobe->register_rwsem); } =20 static struct return_instance *find_next_ret_chain(struct return_instance = *ri) From nobody Sat Feb 7 17:56:20 2026 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 84EDB54F8C for ; Mon, 8 Jul 2024 09:29:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430962; cv=none; b=TsvICwTcCi47glcV5yuyfpQcFEYFvksbC/UVjMA+OhtZMeBvCiRWaNHCQ2KmNo+r88JmmXTu5lf0WuBCyRNpJ28TRCbNktTPi49eXu6BPhHWH/13nWxSYR5FacQaz9NQjAHkgAZXTdLWOND0G15ZGXKDpmEPnxqN+QU21BaCKtk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430962; c=relaxed/simple; bh=Jzk70wrwgyLqpwlKGY57fPB8+FPqSQUySn0ZoqTIHvk=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=SLJZqizgTIUwfj68J0gcC9ABQNU4WE+r5c8pCO2hhCDJcJ1eV2NF9O7JZBtzuWgo+UIa6X6zZzAVuX4TXNPybYx8MCp/DHqumfeN52D7IAmgG7ohOI1wnB++17VDQav6J3z4oQ603MVKT4Mh3kw1leqc+ZiFUtkY2FbQom/UxvA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=Og/wOdK6; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="Og/wOdK6" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=rOfuhX/Ngd/1oTixnYxtLLU1jaqckcc7jQQl+4rPfhA=; b=Og/wOdK6h9PcPwOSR4GCF6P3Qr mHdhMeReDtpf2IlRf+7AnhsUrNg2PkOZysnu6MdE/GHx4UnP0J7ieaIqJRORB8PMWKMBIjcfJ703Y W3XIyPy5i5mUmfUv+oUsLH8YhX7ShORQ0/477eJgzAsQf4PTEN1NGrbiXKCOXLlNlPoMfYoevRDGM VMXiIG5/m4qz0lnjeguJOEeBbzwdUrswAWlQhVYZe/DtQoPnlU4+u/1tFUtTbfD2HD/4b6P+jz2NJ VTWA0JHmfE8DLJ1yYzcLoYcRYFBFsKYzPpZ3UCCk/sRNoGmT9ao7+DdntbXzUEYqoUvZCzEy46X/o XNlNjXEA==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by casper.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sQkgN-00000006hOc-354R; Mon, 08 Jul 2024 09:29:15 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id 1D5A3302440; Mon, 8 Jul 2024 11:29:13 +0200 (CEST) Message-Id: <20240708092415.801757710@infradead.org> User-Agent: quilt/0.65 Date: Mon, 08 Jul 2024 11:12:47 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, oleg@redhat.com, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH 06/10] perf/uprobe: Split uprobe_unregister() References: <20240708091241.544262971@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" With uprobe_unregister() having grown a synchronize_srcu(), it becomes fairly slow to call. Esp. since both users of this API call it in a loop. Peel off the sync_srcu() and do it once, after the loop. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu (Google) --- include/linux/uprobes.h | 8 ++++++-- kernel/events/uprobes.c | 8 ++++++-- kernel/trace/bpf_trace.c | 6 ++++-- kernel/trace/trace_uprobe.c | 6 +++++- 4 files changed, 21 insertions(+), 7 deletions(-) --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -113,7 +113,8 @@ extern int uprobe_write_opcode(struct ar extern int uprobe_register(struct inode *inode, loff_t offset, struct upro= be_consumer *uc); extern int uprobe_register_refctr(struct inode *inode, loff_t offset, loff= _t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_= consumer *uc, bool); -extern void uprobe_unregister(struct inode *inode, loff_t offset, struct u= probe_consumer *uc); +extern void uprobe_unregister_nosync(struct inode *inode, loff_t offset, s= truct uprobe_consumer *uc); +extern void uprobe_unregister_sync(void); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start,= unsigned long end); extern void uprobe_start_dup_mmap(void); @@ -163,7 +164,10 @@ uprobe_apply(struct inode *inode, loff_t return -ENOSYS; } static inline void -uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consum= er *uc) +uprobe_unregister_nosync(struct inode *inode, loff_t offset, struct uprobe= _consumer *uc) +{ +} +static inline void uprobes_unregister_sync(void) { } static inline int uprobe_mmap(struct vm_area_struct *vma) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1126,7 +1126,7 @@ __uprobe_unregister(struct uprobe *uprob * @offset: offset from the start of the file. * @uc: identify which probe if multiple probes are colocated. */ -void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_c= onsumer *uc) +void uprobe_unregister_nosync(struct inode *inode, loff_t offset, struct u= probe_consumer *uc) { struct uprobe *uprobe; =20 @@ -1138,10 +1138,14 @@ void uprobe_unregister(struct inode *ino __uprobe_unregister(uprobe, uc); mutex_unlock(&uprobe->register_mutex); put_uprobe(uprobe); +} +EXPORT_SYMBOL_GPL(uprobe_unregister_nosync); =20 +void uprobe_unregister_sync(void) +{ synchronize_srcu(&uprobes_srcu); } -EXPORT_SYMBOL_GPL(uprobe_unregister); +EXPORT_SYMBOL_GPL(uprobe_unregister_sync); =20 /* * __uprobe_register - register a probe --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3181,9 +3181,11 @@ static void bpf_uprobe_unregister(struct u32 i; =20 for (i =3D 0; i < cnt; i++) { - uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, - &uprobes[i].consumer); + uprobe_unregister_nosync(d_real_inode(path->dentry), uprobes[i].offset, + &uprobes[i].consumer); } + if (cnt) + uprobe_unregister_sync(); } =20 static void bpf_uprobe_multi_link_release(struct bpf_link *link) --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1104,6 +1104,7 @@ static int trace_uprobe_enable(struct tr static void __probe_event_disable(struct trace_probe *tp) { struct trace_uprobe *tu; + bool sync =3D false; =20 tu =3D container_of(tp, struct trace_uprobe, tp); WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter)); @@ -1112,9 +1113,12 @@ static void __probe_event_disable(struct if (!tu->inode) continue; =20 - uprobe_unregister(tu->inode, tu->offset, &tu->consumer); + uprobe_unregister_nosync(tu->inode, tu->offset, &tu->consumer); + sync =3D true; tu->inode =3D NULL; } + if (sync) + uprobe_unregister_sync(); } =20 static int probe_event_enable(struct trace_event_call *call, From nobody Sat Feb 7 17:56:20 2026 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 84F125579F for ; Mon, 8 Jul 2024 09:29:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430962; cv=none; b=IfatNtWns59Pp5IqRWTuPnD4fwULiG0lSK1zYr70yNpwgxHx9SovzrNT8WjrDle5hy7ds9XI/eokbdi80Xqc6V591LlUrkkdBCvYJRPYzF3e3jvGoz1TEMbY9ibl5ZYkjjYCR+o0plmzZq6PTaXVL9JvKi0tFZQr85yIeqJ8HvM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430962; c=relaxed/simple; bh=XBXEJ+AdSFCenTIAnaHRXYDU1xC6HpLBXNVHMY67qMI=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=b5N/BUuXZ5O/kfKoXVWjYR2AJfmzN/j9ByGNDd+BJbfOGdqspS7fPhXjb+sISHcUkpFIMM+Q6bYUYiUR/XpALp1o9+CNfjCXgHGNbPdGxXekKEj3wooG0y8PxT67C52h3spRr0Xll1arS1rBWGLLJ6L0AlIVQ7bArAtVFajcf1Y= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=YQw2biEJ; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="YQw2biEJ" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=M7BG3on9gtFS2ZVkFa0TmKGBd6u6NcFvOKrWMeHPY+E=; b=YQw2biEJNBqZ+OhVgoggX6sTzn HBide21arRRMLXEx8qv5HGRZIPhFJx6WWkgKq/yjr7M8P1FHHCKIoaMct6F+2a7BkB3SuaUBMe3U7 DXn4Smw47cY3pkaCCgl7QwWDTF7ax7e79ZNIRZcwjqzBA0kaws27B6jq8nf+UXU7fhvA8MkCu7rUM tATAC5WCh6YZlq/tQCqb1nLU7yffguZ0WvNVrc2dkCgQUfzW3YDAj4FKTJ4IziQpA+zmTozUUcVC8 DrLJytwk9V+lDlVcenCwR0dpp2z9dPr4U2XJ3v/cGtBRuKNKWd4rV3/++OutEALg8VBH8+EwoZugT zFbGl3Zw==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by casper.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sQkgN-00000006hOd-37AX; Mon, 08 Jul 2024 09:29:15 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id 214F8302D54; Mon, 8 Jul 2024 11:29:13 +0200 (CEST) Message-Id: <20240708092415.906217175@infradead.org> User-Agent: quilt/0.65 Date: Mon, 08 Jul 2024 11:12:48 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, oleg@redhat.com, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH 07/10] perf/uprobe: Convert (some) uprobe->refcount to SRCU References: <20240708091241.544262971@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" With handle_swbp() hitting concurrently on (all) CPUs, potentially on the same uprobe, the uprobe->refcount can get *very* hot. Move the struct uprobe lifetime into uprobes_srcu such that it covers both the uprobe and the uprobe->consumers list. With this, handle_swbp() can use a single large SRCU critical section to avoid taking a refcount on the uprobe for it's duration. Notably, the single-step and uretprobe paths need a reference that leaves handle_swbp() and will, for now, still use ->refcount. Signed-off-by: Peter Zijlstra (Intel) --- kernel/events/uprobes.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -615,7 +615,7 @@ static void put_uprobe(struct uprobe *up mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(uprobe, NULL); mutex_unlock(&delayed_uprobe_lock); - call_rcu(&uprobe->rcu, uprobe_free_rcu); + call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu); } } =20 @@ -667,7 +667,7 @@ static struct uprobe *__find_uprobe(stru struct rb_node *node =3D rb_find_rcu(&key, &uprobes_tree, __uprobe_cmp_ke= y); =20 if (node) - return get_uprobe(__node_2_uprobe(node)); + return __node_2_uprobe(node); =20 return NULL; } @@ -680,7 +680,7 @@ static struct uprobe *find_uprobe(struct { unsigned int seq; =20 - guard(rcu)(); + lockdep_assert(srcu_read_lock_held(&uprobes_srcu)); =20 do { seq =3D read_seqcount_begin(&uprobes_seqcount); @@ -1130,6 +1130,8 @@ void uprobe_unregister_nosync(struct ino { struct uprobe *uprobe; =20 + guard(srcu)(&uprobes_srcu); + uprobe =3D find_uprobe(inode, offset); if (WARN_ON(!uprobe)) return; @@ -1137,7 +1139,6 @@ void uprobe_unregister_nosync(struct ino mutex_lock(&uprobe->register_mutex); __uprobe_unregister(uprobe, uc); mutex_unlock(&uprobe->register_mutex); - put_uprobe(uprobe); } EXPORT_SYMBOL_GPL(uprobe_unregister_nosync); =20 @@ -1247,6 +1248,8 @@ int uprobe_apply(struct inode *inode, lo struct uprobe_consumer *con; int ret =3D -ENOENT; =20 + guard(srcu)(&uprobes_srcu); + uprobe =3D find_uprobe(inode, offset); if (WARN_ON(!uprobe)) return ret; @@ -1257,7 +1260,6 @@ int uprobe_apply(struct inode *inode, lo if (con) ret =3D register_for_each_vma(uprobe, add ? uc : NULL); mutex_unlock(&uprobe->register_mutex); - put_uprobe(uprobe); =20 return ret; } @@ -1977,7 +1979,7 @@ pre_ssout(struct uprobe *uprobe, struct return err; } =20 - utask->active_uprobe =3D uprobe; + utask->active_uprobe =3D get_uprobe(uprobe); utask->state =3D UTASK_SSTEP; return 0; } @@ -2108,7 +2110,7 @@ static void handler_chain(struct uprobe int remove =3D UPROBE_HANDLER_REMOVE; bool need_prep =3D false; /* prepare return uprobe, when needed */ =20 - guard(srcu)(&uprobes_srcu); + lockdep_assert(srcu_read_lock_held(&uprobes_srcu)); =20 for_each_consumer_rcu(uc, uprobe->consumers) { int rc =3D 0; @@ -2227,6 +2229,8 @@ static void handle_swbp(struct pt_regs * if (bp_vaddr =3D=3D get_trampoline_vaddr()) return handle_trampoline(regs); =20 + guard(srcu)(&uprobes_srcu); + uprobe =3D find_active_uprobe(bp_vaddr, &is_swbp); if (!uprobe) { if (is_swbp > 0) { @@ -2255,7 +2259,7 @@ static void handle_swbp(struct pt_regs * * new and not-yet-analyzed uprobe at the same address, restart. */ if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) - goto out; + return; =20 /* * Pairs with the smp_wmb() in prepare_uprobe(). @@ -2268,22 +2272,17 @@ static void handle_swbp(struct pt_regs * =20 /* Tracing handlers use ->utask to communicate with fetch methods */ if (!get_utask()) - goto out; + return; =20 if (arch_uprobe_ignore(&uprobe->arch, regs)) - goto out; + return; =20 handler_chain(uprobe, regs); =20 if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) - goto out; - - if (!pre_ssout(uprobe, regs, bp_vaddr)) return; =20 - /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */ -out: - put_uprobe(uprobe); + pre_ssout(uprobe, regs, bp_vaddr); } =20 /* From nobody Sat Feb 7 17:56:20 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B7ADC55E58 for ; Mon, 8 Jul 2024 09:29:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430970; cv=none; b=MMb1qOUdPXg78e//C6kMtm2EqhNh87sKYCNZIrGM3KVfcvdZi4YiOFYhbKNk+2F3GF+7nK3R2H1WbI+9tGC7CvAB94a+bsW2Wv7eA6/O26qxvI135HoBm+koT7fKiiAlS1TZFS3PBrTExfsBrI3nxNCFmfHf0iXmDR6yTuPC1Ak= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430970; c=relaxed/simple; bh=dTtktbIQzPN0M1WT/SSamB3BTYm0QnUYyOtyVkFgo2s=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=NxkLzt3UG8F426rS1kqgTOSMM9d+ZI3okNUp8E1DtpRgUpDxVcGHLfSeenE8SUKnJv6gnVHKRO88oQjefjpS9Cuk//UvJx27BKgUQ4NZj+cH2oKrtyuJo/j90wHb/bw2gLqaD0PuT1Ub6sr2vIDtRJDKsG55rdTTcQv+fSyyulM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=XlrAmO7w; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="XlrAmO7w" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=epm86oBKZwoCXF9Cm3n9QKwfFnScPDeksYSx49nPLjc=; b=XlrAmO7wkpxZa38ZJDKZYvgdCT 0I/265L5x/eqAMAJz3wL9jS+qGVvJ18V6Rjtv5mxbvA2MCcdKwphchej4geFBSgtLKal7kxxACgVk RZYepdzrjpf05+nP8jgTWMJhCQoYeVbPLRmXxP6tSmQaoZs7Rh2N7Fs9InZCdB3mqK+37r43Uw+Q6 BGrV5sv0yde4GstfYlZNR3LyPDqdQYG4ddoF3+l3c5x6r5EdrfK20PCtYXK/WoGzlbzz+WH+h8oj1 o2zxu8lcEPd1QVSWmaZ7ksF+8y5D3nEyJr5o/iJL37vO8owyKXJN1XypJv8l4VfHUZ018e+RN3cj0 pdP5OtWA==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by desiato.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sQkgN-00000000ZsQ-4AzS; Mon, 08 Jul 2024 09:29:18 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id 254BF302DCD; Mon, 8 Jul 2024 11:29:13 +0200 (CEST) Message-Id: <20240708092416.010695534@infradead.org> User-Agent: quilt/0.65 Date: Mon, 08 Jul 2024 11:12:49 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, oleg@redhat.com, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH 08/10] srcu: Add __srcu_clone_read_lock() References: <20240708091241.544262971@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" In order to support carrying an srcu_read_lock() section across fork, where both the parent and child process will do: srcu_read_unlock(), it is needed to account for the extra decrement with an extra increment at fork time. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/srcu.h | 1 + include/linux/srcutiny.h | 10 ++++++++++ kernel/rcu/srcutree.c | 5 +++++ 3 files changed, 16 insertions(+) --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -55,6 +55,7 @@ void call_srcu(struct srcu_struct *ssp, void (*func)(struct rcu_head *head)); void cleanup_srcu_struct(struct srcu_struct *ssp); int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_clone_read_lock(struct srcu_struct *ssp, int idx); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); void synchronize_srcu(struct srcu_struct *ssp); unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -71,6 +71,16 @@ static inline int __srcu_read_lock(struc return idx; } =20 +static inline void __srcu_clone_read_lock(struct srcu_struct *ssp, int idx) +{ + int newval; + + preempt_disable(); // Needed for PREEMPT_AUTO + newval =3D READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1; + WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); + preempt_enable(); +} + static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) { synchronize_srcu(ssp); --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -720,6 +720,11 @@ int __srcu_read_lock(struct srcu_struct } EXPORT_SYMBOL_GPL(__srcu_read_lock); =20 +void __srcu_clone_read_lock(struct srcu_struct *ssp, int idx) +{ + this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); +} + /* * Removes the count for the old reader from the appropriate per-CPU * element of the srcu_struct. Note that this may well be a different From nobody Sat Feb 7 17:56:20 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B7A85558B7 for ; Mon, 8 Jul 2024 09:29:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430970; cv=none; b=KerTtnfFOc8AGQuQWRgviUBtYMWtVplfxzDlz4cJLRu16GT67y3EG9VugF2vk+l5ZZ7BipnsXJ1nCDF+qH0MkEqtw54kbb4Z96X938DOXSIHvpOXkwIex30q+P9EMRuUgVO34H8Q47gN0PKTVV71iFx3hCiq89Xh4oR19WlIXus= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430970; c=relaxed/simple; bh=19oikYIhlf6k4p96gB5CwVw/zCLin1gB3y02oDAksuA=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=fXNalCO/rqF+g9vCGeckuwLK407Spa1QLZ6lY3MvILMAm9Jmg4/HVFfrZoCNdO3Z9abj0mx+W6Xqr92WT9NNaMW8r3SAxSyODHyIr1lRv2WfNjtzrHqO5PKJOG6pB/xpT+3OQ0O3JmXKt0v8LpsbsEC+joJ4RQ7jMAYDOMgDUQs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=GGCoToo9; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="GGCoToo9" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=INjGkbUShkrtBrjMmEIfWNm46FJQN+XQG6PJX5gCW08=; b=GGCoToo9n3ejpYpeNouOt/pl7T 3hJ4OqSPkjNJ7qTMi9gs8/gCZYZ8GVSiSTQzT5s82aKpuHnN/hrFcLbXhqtFU24yaSW7Ht8Zb/nGm xwHMBcnDsKi50clCamGgVflHDohEnFOe7/V4V/mrQh29BnDRkAH5g+06990r0fSgAArab99f7kGkS 40hI4eNn4OiDOA1/rRXIpByLQkWrWlyEm6bKGo4IWwYGa/AoIUh61X7N2RUyuPQt2Ja+o3jLEURrF 2MSbxqiHVp/KnNn3aLY6hbKu7Jp5lVIpCR4oovaHwsSXn+hLREP+7WkyCIeCjNQkpLi7m0wKu4k09 qW8RL6ng==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by desiato.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sQkgO-00000000ZsR-15RL; Mon, 08 Jul 2024 09:29:19 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id 292F9303DD8; Mon, 8 Jul 2024 11:29:13 +0200 (CEST) Message-Id: <20240708092416.127360526@infradead.org> User-Agent: quilt/0.65 Date: Mon, 08 Jul 2024 11:12:50 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, oleg@redhat.com, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH 09/10] perf/uprobe: Convert single-step and uretprobe to SRCU References: <20240708091241.544262971@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Both single-step and uretprobes take a refcount on struct uprobe in handle_swbp() in order to ensure struct uprobe stays extant until a next trap. Since uprobe_unregister() only cares about the uprobe_consumer life-time, and these intra-trap sections can be arbitrarily large, create a second SRCU domain to cover these. Notably, a uretprobe with a registered return_instance that never triggers -- because userspace -- will currently pin the return_instance and related uprobe until the task dies. With this convertion to SRCU this behaviour will inhibit freeing of all uprobes. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/uprobes.h | 2 ++ kernel/events/uprobes.c | 23 ++++++++++++++++------- 2 files changed, 18 insertions(+), 7 deletions(-) --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -78,6 +78,7 @@ struct uprobe_task { =20 struct return_instance *return_instances; unsigned int depth; + unsigned int active_srcu_idx; }; =20 struct return_instance { @@ -86,6 +87,7 @@ struct return_instance { unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ bool chained; /* true, if instance is nested */ + int srcu_idx; =20 struct return_instance *next; /* keep as stack */ }; --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -51,6 +51,7 @@ static struct mutex uprobes_mmap_mutex[U DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem); =20 DEFINE_STATIC_SRCU(uprobes_srcu); +DEFINE_STATIC_SRCU(uretprobes_srcu); =20 /* Have a copy of original instruction */ #define UPROBE_COPY_INSN 0 @@ -598,12 +599,18 @@ static struct uprobe *get_uprobe(struct return uprobe; } =20 -static void uprobe_free_rcu(struct rcu_head *rcu) +static void uprobe_free_stage2(struct rcu_head *rcu) { struct uprobe *uprobe =3D container_of(rcu, struct uprobe, rcu); kfree(uprobe); } =20 +static void uprobe_free_stage1(struct rcu_head *rcu) +{ + struct uprobe *uprobe =3D container_of(rcu, struct uprobe, rcu); + call_srcu(&uretprobes_srcu, &uprobe->rcu, uprobe_free_stage2); +} + static void put_uprobe(struct uprobe *uprobe) { if (refcount_dec_and_test(&uprobe->ref)) { @@ -615,7 +622,7 @@ static void put_uprobe(struct uprobe *up mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(uprobe, NULL); mutex_unlock(&delayed_uprobe_lock); - call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu); + call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_stage1); } } =20 @@ -1735,7 +1742,7 @@ unsigned long uprobe_get_trap_addr(struc static struct return_instance *free_ret_instance(struct return_instance *r= i) { struct return_instance *next =3D ri->next; - put_uprobe(ri->uprobe); + srcu_read_unlock(&uretprobes_srcu, ri->srcu_idx); kfree(ri); return next; } @@ -1753,7 +1760,7 @@ void uprobe_free_utask(struct task_struc return; =20 if (utask->active_uprobe) - put_uprobe(utask->active_uprobe); + srcu_read_unlock(&uretprobes_srcu, utask->active_srcu_idx); =20 ri =3D utask->return_instances; while (ri) @@ -1796,7 +1803,7 @@ static int dup_utask(struct task_struct return -ENOMEM; =20 *n =3D *o; - get_uprobe(n->uprobe); + __srcu_clone_read_lock(&uretprobes_srcu, n->srcu_idx); n->next =3D NULL; =20 *p =3D n; @@ -1939,7 +1946,8 @@ static void prepare_uretprobe(struct upr orig_ret_vaddr =3D utask->return_instances->orig_ret_vaddr; } =20 - ri->uprobe =3D get_uprobe(uprobe); + ri->srcu_idx =3D srcu_read_lock(&uretprobes_srcu); + ri->uprobe =3D uprobe; ri->func =3D instruction_pointer(regs); ri->stack =3D user_stack_pointer(regs); ri->orig_ret_vaddr =3D orig_ret_vaddr; @@ -1979,7 +1987,8 @@ pre_ssout(struct uprobe *uprobe, struct return err; } =20 - utask->active_uprobe =3D get_uprobe(uprobe); + utask->active_srcu_idx =3D srcu_read_lock(&uretprobes_srcu); + utask->active_uprobe =3D uprobe; utask->state =3D UTASK_SSTEP; return 0; } From nobody Sat Feb 7 17:56:20 2026 Received: from desiato.infradead.org (desiato.infradead.org [90.155.92.199]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 251AA56452 for ; Mon, 8 Jul 2024 09:29:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.92.199 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430970; cv=none; b=mllTubQ2l/lQcPywL8TqJ+rUVlWrCmd7W+AcOcpvG2ojtSiBAvNR9va0Z8UYQ1w42hPib4hZz23jF9VPcQ/IyHoV1R0FUBokjfRjpwlb/+Bob7o6Vg09j+dH9ST34+R+C+I5LUeSYQVnb6sFOvvDsIV7fIWdsrGMU8E/j3u2Lvw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1720430970; c=relaxed/simple; bh=NIznFFC+YqfstGk8XO7oJGHArYFE63Hknj2SoWt6TsY=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=HP60VQaTGm0c/JdRf3Es0XwZuokdneuikbqLWrrZ2Kfk+5Y+iOH98XDn8+3UWVrjAB3CiTrRLFFe2oL0uSBcybeATkyq+0RvW7fadf71rMwGH/UQ6LurunU9M08MK2SMspU8i0fJToSoLNoWJfpgyoHBTMVoZSfN7LkoOsfV1/o= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=BzQoAiQL; arc=none smtp.client-ip=90.155.92.199 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="BzQoAiQL" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=desiato.20200630; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=AYprHqdZvozegjyHQBgBMRoiJh+6953VLAW68QuttTQ=; b=BzQoAiQL9I7o+H+8dxZD+BCApE tp6gHlJGlDKtfvwVz+Jtx/n5jLZ+ZSw+IVZu0vbs56KOSaIHQnAA2E1JtZijVXITclURJ9Q1RTjzW VebEQcJSojL26exBrIpxB2ZwUJBDlHPYCgEcwJ9DWlobpZmpHApk4Hj9u1IoXGwAskJFhTCyT3fG0 JAj9j3vgXWaFG2f/x4pYu7fX1N9mgig+JFS1P5w+zmsxn31puXqeJAG19mOHHDCkxV447BF1v2tq7 RcJfudZU9fqcVQGvfzPJPmyObjSJKmIJd7JWC9vqQcfUDPK/3MXD/ztPhUbgR4aNjQbPEPv8xoupI 0PnWmiXw==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by desiato.infradead.org with esmtpsa (Exim 4.97.1 #2 (Red Hat Linux)) id 1sQkgO-00000000ZsS-1FMa; Mon, 08 Jul 2024 09:29:19 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id 2DCE9304BE6; Mon, 8 Jul 2024 11:29:13 +0200 (CEST) Message-Id: <20240708092416.235666453@infradead.org> User-Agent: quilt/0.65 Date: Mon, 08 Jul 2024 11:12:51 +0200 From: Peter Zijlstra To: mingo@kernel.org, andrii@kernel.org Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, rostedt@goodmis.org, mhiramat@kernel.org, oleg@redhat.com, jolsa@kernel.org, clm@meta.com, paulmck@kernel.org Subject: [PATCH 10/10] perf/uprobe: Add uretprobe timer References: <20240708091241.544262971@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" In order to put a bound on the uretprobe_srcu critical section, add a timer to uprobe_task. Upon every RI added or removed the timer is pushed forward to now + 1s. If the timer were ever to fire, it would convert the SRCU 'reference' to a refcount reference if possible. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/uprobes.h | 8 +++++ kernel/events/uprobes.c | 66 +++++++++++++++++++++++++++++++++++++++++++= +---- 2 files changed, 68 insertions(+), 6 deletions(-) --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -15,6 +15,7 @@ #include #include #include +#include =20 struct vm_area_struct; struct mm_struct; @@ -79,6 +80,10 @@ struct uprobe_task { struct return_instance *return_instances; unsigned int depth; unsigned int active_srcu_idx; + + struct timer_list ri_timer; + struct callback_head ri_task_work; + struct task_struct *task; }; =20 struct return_instance { @@ -86,7 +91,8 @@ struct return_instance { unsigned long func; unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ - bool chained; /* true, if instance is nested */ + u8 chained; /* true, if instance is nested */ + u8 has_ref; int srcu_idx; =20 struct return_instance *next; /* keep as stack */ --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1742,7 +1742,12 @@ unsigned long uprobe_get_trap_addr(struc static struct return_instance *free_ret_instance(struct return_instance *r= i) { struct return_instance *next =3D ri->next; - srcu_read_unlock(&uretprobes_srcu, ri->srcu_idx); + if (ri->uprobe) { + if (ri->has_ref) + put_uprobe(ri->uprobe); + else + srcu_read_unlock(&uretprobes_srcu, ri->srcu_idx); + } kfree(ri); return next; } @@ -1766,11 +1771,48 @@ void uprobe_free_utask(struct task_struc while (ri) ri =3D free_ret_instance(ri); =20 + timer_delete_sync(&utask->ri_timer); + task_work_cancel(utask->task, &utask->ri_task_work); xol_free_insn_slot(t); kfree(utask); t->utask =3D NULL; } =20 +static void return_instance_task_work(struct callback_head *head) +{ + struct uprobe_task *utask =3D container_of(head, struct uprobe_task, ri_t= ask_work); + struct return_instance *ri; + + for (ri =3D utask->return_instances; ri; ri =3D ri->next) { + if (!ri->uprobe) + continue; + if (ri->has_ref) + continue; + if (refcount_inc_not_zero(&ri->uprobe->ref)) + ri->has_ref =3D true; + else + ri->uprobe =3D NULL; + srcu_read_unlock(&uretprobes_srcu, ri->srcu_idx); + } +} + +static void return_instance_timer(struct timer_list *timer) +{ + struct uprobe_task *utask =3D container_of(timer, struct uprobe_task, ri_= timer); + task_work_add(utask->task, &utask->ri_task_work, TWA_SIGNAL); +} + +static struct uprobe_task *alloc_utask(struct task_struct *task) +{ + struct uprobe_task *utask =3D kzalloc(sizeof(struct uprobe_task), GFP_KER= NEL); + if (!utask) + return NULL; + timer_setup(&utask->ri_timer, return_instance_timer, 0); + init_task_work(&utask->ri_task_work, return_instance_task_work); + utask->task =3D task; + return utask; +} + /* * Allocate a uprobe_task object for the task if necessary. * Called when the thread hits a breakpoint. @@ -1782,7 +1824,7 @@ void uprobe_free_utask(struct task_struc static struct uprobe_task *get_utask(void) { if (!current->utask) - current->utask =3D kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); + current->utask =3D alloc_utask(current); return current->utask; } =20 @@ -1791,7 +1833,7 @@ static int dup_utask(struct task_struct struct uprobe_task *n_utask; struct return_instance **p, *o, *n; =20 - n_utask =3D kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); + n_utask =3D alloc_utask(t); if (!n_utask) return -ENOMEM; t->utask =3D n_utask; @@ -1803,13 +1845,20 @@ static int dup_utask(struct task_struct return -ENOMEM; =20 *n =3D *o; - __srcu_clone_read_lock(&uretprobes_srcu, n->srcu_idx); + if (n->uprobe) { + if (n->has_ref) + get_uprobe(n->uprobe); + else + __srcu_clone_read_lock(&uretprobes_srcu, n->srcu_idx); + } n->next =3D NULL; =20 *p =3D n; p =3D &n->next; n_utask->depth++; } + if (n_utask->return_instances) + mod_timer(&n_utask->ri_timer, jiffies + HZ); =20 return 0; } @@ -1957,6 +2006,8 @@ static void prepare_uretprobe(struct upr ri->next =3D utask->return_instances; utask->return_instances =3D ri; =20 + mod_timer(&utask->ri_timer, jiffies + HZ); + return; fail: kfree(ri); @@ -2151,6 +2202,9 @@ handle_uretprobe_chain(struct return_ins struct uprobe *uprobe =3D ri->uprobe; struct uprobe_consumer *uc; =20 + if (!uprobe) + return; + guard(srcu)(&uprobes_srcu); =20 for_each_consumer_rcu(uc, uprobe->consumers) { @@ -2197,8 +2251,10 @@ static void handle_trampoline(struct pt_ =20 instruction_pointer_set(regs, ri->orig_ret_vaddr); do { - if (valid) + if (valid) { handle_uretprobe_chain(ri, regs); + mod_timer(&utask->ri_timer, jiffies + HZ); + } ri =3D free_ret_instance(ri); utask->depth--; } while (ri !=3D next);