From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 748AEC61DA7 for ; Fri, 27 Jan 2023 19:58:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233241AbjA0T6J (ORCPT ); Fri, 27 Jan 2023 14:58:09 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37590 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233105AbjA0T5i (ORCPT ); Fri, 27 Jan 2023 14:57:38 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 827C68CE0E for ; Fri, 27 Jan 2023 11:55:37 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-4fee82718afso65564437b3.5 for ; Fri, 27 Jan 2023 11:55:37 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=SRJKiHKrq/SvrK23nJ6WvAwYz6ftOZ3c7xtMrR/Zkmo=; b=q++c4JbFs1cULNmXHJ6C3DczcQ0hXzOKIr6+9Qx2yz39vZWA+rjLc18PKWJkWy6B+C UeBdui8K3sk0l0OIGcxNzLRt494xbxI7WVRAtP+VLTOL1/q+UNWJf0WDLHuQPthduco2 hKKj52t3wKu54Vbflyx5DWFngCga+0YCh2aWf3wXfSgRE76VHyLLWka1QvKlb5EEsVq+ yKsc5SnLc4E9cZlzwXYLALtVVR/qLY2NGEjlF8VIz3gJJ8VVrEVxdVCt2bqpvNVXSzLO bCSv1unz+7C8JA4tTmfPKXKgKgaiBEIUDtx7XKU+dM+xg3yKNZdzkdQzW0TW1MecPnG9 7xHA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=SRJKiHKrq/SvrK23nJ6WvAwYz6ftOZ3c7xtMrR/Zkmo=; b=7lYglgmIJOhTygxrBuL2xHFAQv6Cyeu6N45WB8ACorCeqqC+OmB6jswNMJ+VBsA+2t BfR82ptjgHY3mkm9ZVoALZYaK+N6hI0pO+n+6R+00yoec21jB+nwKD3I25ZMjRSNhsF4 E50lXpwQ6Cky4oh300k5CpVFNcbTIhMLxWKG5jXg+22j3FNNPLCcs8HfhsW81Tolf2BC f1Aa3aFDvuUH+7flBPnpHAp4YDyOJAJesEQ0VF08xiCk8LfhtcspvWR16QSW5lFN/WYj R4sZXWztTqFlFZKoapKa7IXdl2ufzAtBJ7o9RS1/d7DrG8Jzu61loAiPzxVIvWhHFRTN aD0A== X-Gm-Message-State: AFqh2koej8TwI34zFPQB3NG0Mm7M5nm4XGoesfk+4YGnPaZzhplMjB+A S+jEfAFGrOgGNMTPGUQz+kpoFTAWxl8= X-Google-Smtp-Source: AMrXdXvEqVwWOYAYjpqjq4VZxkQLftXilDeGRL6KyIgBgBs2rP6fdUqH1kwtp8PmmItyT8YCkU5DjbIMLFI= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a25:4e54:0:b0:7fe:6d35:1a28 with SMTP id c81-20020a254e54000000b007fe6d351a28mr4351236ybb.534.1674848476456; Fri, 27 Jan 2023 11:41:16 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:38 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-2-surenb@google.com> Subject: [PATCH v2 01/33] maple_tree: Be more cautious about dead nodes From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com, Liam Howlett Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Liam Howlett ma_pivots() and ma_data_end() may be called with a dead node. Ensure to that the node isn't dead before using the returned values. This is necessary for RCU mode of the maple tree. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett Signed-off-by: Suren Baghdasaryan --- lib/maple_tree.c | 52 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 5e9703189259..e5eeecd14eee 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -537,6 +537,7 @@ static inline bool ma_dead_node(const struct maple_node= *node) =20 return (parent =3D=3D node); } + /* * mte_dead_node() - check if the @enode is dead. * @enode: The encoded maple node @@ -618,6 +619,8 @@ static inline unsigned int mas_alloc_req(const struct m= a_state *mas) * @node - the maple node * @type - the node type * + * In the event of a dead node, this array may be %NULL + * * Return: A pointer to the maple node pivots */ static inline unsigned long *ma_pivots(struct maple_node *node, @@ -1089,8 +1092,11 @@ static int mas_ascend(struct ma_state *mas) a_type =3D mas_parent_enum(mas, p_enode); a_node =3D mte_parent(p_enode); a_slot =3D mte_parent_slot(p_enode); - pivots =3D ma_pivots(a_node, a_type); a_enode =3D mt_mk_node(a_node, a_type); + pivots =3D ma_pivots(a_node, a_type); + + if (unlikely(ma_dead_node(a_node))) + return 1; =20 if (!set_min && a_slot) { set_min =3D true; @@ -1394,6 +1400,9 @@ static inline unsigned char ma_data_end(struct maple_= node *node, { unsigned char offset; =20 + if (!pivots) + return 0; + if (type =3D=3D maple_arange_64) return ma_meta_end(node, type); =20 @@ -1429,6 +1438,9 @@ static inline unsigned char mas_data_end(struct ma_st= ate *mas) return ma_meta_end(node, type); =20 pivots =3D ma_pivots(node, type); + if (unlikely(ma_dead_node(node))) + return 0; + offset =3D mt_pivots[type] - 1; if (likely(!pivots[offset])) return ma_meta_end(node, type); @@ -4498,6 +4510,9 @@ static inline int mas_prev_node(struct ma_state *mas,= unsigned long min) node =3D mas_mn(mas); slots =3D ma_slots(node, mt); pivots =3D ma_pivots(node, mt); + if (unlikely(ma_dead_node(node))) + return 1; + mas->max =3D pivots[offset]; if (offset) mas->min =3D pivots[offset - 1] + 1; @@ -4519,6 +4534,9 @@ static inline int mas_prev_node(struct ma_state *mas,= unsigned long min) slots =3D ma_slots(node, mt); pivots =3D ma_pivots(node, mt); offset =3D ma_data_end(node, mt, pivots, mas->max); + if (unlikely(ma_dead_node(node))) + return 1; + if (offset) mas->min =3D pivots[offset - 1] + 1; =20 @@ -4567,6 +4585,7 @@ static inline int mas_next_node(struct ma_state *mas,= struct maple_node *node, struct maple_enode *enode; int level =3D 0; unsigned char offset; + unsigned char node_end; enum maple_type mt; void __rcu **slots; =20 @@ -4590,7 +4609,11 @@ static inline int mas_next_node(struct ma_state *mas= , struct maple_node *node, node =3D mas_mn(mas); mt =3D mte_node_type(mas->node); pivots =3D ma_pivots(node, mt); - } while (unlikely(offset =3D=3D ma_data_end(node, mt, pivots, mas->max))); + node_end =3D ma_data_end(node, mt, pivots, mas->max); + if (unlikely(ma_dead_node(node))) + return 1; + + } while (unlikely(offset =3D=3D node_end)); =20 slots =3D ma_slots(node, mt); pivot =3D mas_safe_pivot(mas, pivots, ++offset, mt); @@ -4606,6 +4629,9 @@ static inline int mas_next_node(struct ma_state *mas,= struct maple_node *node, mt =3D mte_node_type(mas->node); slots =3D ma_slots(node, mt); pivots =3D ma_pivots(node, mt); + if (unlikely(ma_dead_node(node))) + return 1; + offset =3D 0; pivot =3D pivots[0]; } @@ -4652,11 +4678,14 @@ static inline void *mas_next_nentry(struct ma_state= *mas, return NULL; } =20 - pivots =3D ma_pivots(node, type); slots =3D ma_slots(node, type); - mas->index =3D mas_safe_min(mas, pivots, mas->offset); + pivots =3D ma_pivots(node, type); count =3D ma_data_end(node, type, pivots, mas->max); - if (ma_dead_node(node)) + if (unlikely(ma_dead_node(node))) + return NULL; + + mas->index =3D mas_safe_min(mas, pivots, mas->offset); + if (unlikely(ma_dead_node(node))) return NULL; =20 if (mas->index > max) @@ -4810,6 +4839,11 @@ static inline void *mas_prev_nentry(struct ma_state = *mas, unsigned long limit, =20 slots =3D ma_slots(mn, mt); pivots =3D ma_pivots(mn, mt); + if (unlikely(ma_dead_node(mn))) { + mas_rewalk(mas, index); + goto retry; + } + if (offset =3D=3D mt_pivots[mt]) pivot =3D mas->max; else @@ -6624,11 +6658,11 @@ static inline void *mas_first_entry(struct ma_state= *mas, struct maple_node *mn, while (likely(!ma_is_leaf(mt))) { MT_BUG_ON(mas->tree, mte_dead_node(mas->node)); slots =3D ma_slots(mn, mt); - pivots =3D ma_pivots(mn, mt); - max =3D pivots[0]; entry =3D mas_slot(mas, slots, 0); + pivots =3D ma_pivots(mn, mt); if (unlikely(ma_dead_node(mn))) return NULL; + max =3D pivots[0]; mas->node =3D entry; mn =3D mas_mn(mas); mt =3D mte_node_type(mas->node); @@ -6648,13 +6682,13 @@ static inline void *mas_first_entry(struct ma_state= *mas, struct maple_node *mn, if (likely(entry)) return entry; =20 - pivots =3D ma_pivots(mn, mt); - mas->index =3D pivots[0] + 1; mas->offset =3D 1; entry =3D mas_slot(mas, slots, 1); + pivots =3D ma_pivots(mn, mt); if (unlikely(ma_dead_node(mn))) return NULL; =20 + mas->index =3D pivots[0] + 1; if (mas->index > limit) goto none; =20 --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A4C67C61DA4 for ; Fri, 27 Jan 2023 19:42:59 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231881AbjA0Tm6 (ORCPT ); Fri, 27 Jan 2023 14:42:58 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50624 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231681AbjA0Tmx (ORCPT ); Fri, 27 Jan 2023 14:42:53 -0500 Received: from mail-pj1-x104a.google.com (mail-pj1-x104a.google.com [IPv6:2607:f8b0:4864:20::104a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A602B7AE44 for ; Fri, 27 Jan 2023 11:42:21 -0800 (PST) Received: by mail-pj1-x104a.google.com with SMTP id om10-20020a17090b3a8a00b002299e350deaso4781924pjb.1 for ; Fri, 27 Jan 2023 11:42:21 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=6D7AVbhEFxAo5EKdbJ2HQfTq9cKDp9C6gMDUsIDn2rU=; b=ij3G6594G9QMVIcaC7y7IotZlrRGSd461+VUEA/BMKS/DqdJRnleVk5IzJ5gil69bZ nXwMrZpHBfopZhY+dBOM8TntT+xgyMzx0lhn0c4XjFlDdq8hUc51rQLAJayIjAlxU0Vs tSDUCQts0+fWuoHVHrLcPeZOnjtBdS6uOzMiFPrkU4AVT++YMj0+eDXitszbX73gjVet Uj7on6Z68KT42I30IFAHZ8MnQyKVo+KH97KXkIJWbVMmYdISzeeP/HtVHdR/3Pzjnpiz r0YfwFvkG3mRoMMu6hvaQ0O0QqVpO+Mcl7G8itnKrK/kLoHBd5Ey6CcVhBTBp6nBJTd/ 24fw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=6D7AVbhEFxAo5EKdbJ2HQfTq9cKDp9C6gMDUsIDn2rU=; b=5nPrbRm9BDFfVpwkFKr5cFjhiDQgUq9n3D4YOhCJTh3mLLYu5Q8UFU4n9C/hQa0Fcv R99XWI96OyBysoEVRtRug/NvqAtSS2YT17qcKIu6Zax+mgQkNIzRXnufNyA7SZ6+EkKj 6CUb0Rfnnsr8mLX1yuw0vJ2evI5Bs/W/uP7qy2WPKIUS2HnQ1DbVkg8dFh+tB+vSOAkA E6b5/o162LD/ITIgYxTjp3UUKUh9hhSM0keF1mx265aRQk21g+O3x8FXTUKE/mfylNHL a8b25BLSXC2cWULM3x6Ztdr+m0KjG+9arJ4HUnoLCuTNbSWL4TC6pdNywR9jp6y9a9Az AX4A== X-Gm-Message-State: AO0yUKXgDJyHl8MsqKy2+pJFUILsmFRX0zIjGI6aGZkx7OvXYcokLyWz siwMAHxBRtY0wzm/ZkPAyJLSU5wopFI= X-Google-Smtp-Source: AK7set+8nPPCBQ/ToCHxQhHQnspD+CZKcqljSV2Aj5HMLvXZA8Wu8rpp2+AujJr6dncfulY9izqabQa2p3o= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a05:6a00:1990:b0:593:909f:ed45 with SMTP id d16-20020a056a00199000b00593909fed45mr72889pfl.0.1674848478930; Fri, 27 Jan 2023 11:41:18 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:39 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-3-surenb@google.com> Subject: [PATCH v2 02/33] maple_tree: Detect dead nodes in mas_start() From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com, Liam Howlett Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Liam Howlett When initially starting a search, the root node may already be in the process of being replaced in RCU mode. Detect and restart the walk if this is the case. This is necessary for RCU mode of the maple tree. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett Signed-off-by: Suren Baghdasaryan --- lib/maple_tree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index e5eeecd14eee..482e17a460cb 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1353,12 +1353,16 @@ static inline struct maple_enode *mas_start(struct = ma_state *mas) mas->max =3D ULONG_MAX; mas->depth =3D 0; =20 +retry: root =3D mas_root(mas); /* Tree with nodes */ if (likely(xa_is_node(root))) { mas->depth =3D 1; mas->node =3D mte_safe_root(root); mas->offset =3D 0; + if (mte_dead_node(mas->node)) + goto retry; + return NULL; } =20 --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 9675DC54EAA for ; Fri, 27 Jan 2023 19:52:57 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232990AbjA0Tw4 (ORCPT ); Fri, 27 Jan 2023 14:52:56 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59326 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230120AbjA0Twk (ORCPT ); Fri, 27 Jan 2023 14:52:40 -0500 Received: from mail-oo1-xc4a.google.com (mail-oo1-xc4a.google.com [IPv6:2607:f8b0:4864:20::c4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A4D0B8CE1C for ; Fri, 27 Jan 2023 11:49:45 -0800 (PST) Received: by mail-oo1-xc4a.google.com with SMTP id z14-20020a4ad58e000000b005137826d392so1384149oos.21 for ; Fri, 27 Jan 2023 11:49:45 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=FUzzHzCQcxOurh9YuvudjXpNdZx02NIZtoLw3Jde/PE=; b=S2HR++ybc/ns4vYecXBlgBA5lh7rKCR5RBxCn2d6nLoproYmP4Y/TuK8A2ykL1TZCy PQi2lsVKc//h5m3TPGFUZRCScYCxf/iBRsPO4Le2L6kyLee01De7/gBctPL5x+AFYdbM +GjlXUavIluJZluW6t/CA9goOPJfDGVnM+Mgt+aMp9WxpRPbD/Tewkr6wZubL9JQbZ/Q D22ZQZJELsrUxamrbo349CKHJL642zBXbi0BO/uzmOjlGEGSeG1j/mwEO9F4yWD/yh+R Z4EUx0fbMYpLh0Co5d8CYSkewP6ymMJoH7lGt/nPXvwtHvtN4R6vwQVewCoOaVtJA9nQ 6Lhw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=FUzzHzCQcxOurh9YuvudjXpNdZx02NIZtoLw3Jde/PE=; b=WMLZrewXyt6zBgf/Eo/qML7TW/0vjQFunJ/BWDDLIktSu8dndD4e9yofSBvNEB2R53 EDX1L75md4yvoK7k2gwPStVu4HCgkC8neOiJyox+K2wAtnsaIHqI3JewMX+VCjh0VyQj geTj2ar6N8b3V+1umQq4Puj8A4rv7T9r+8H7Knmfm9pWOH1DySyn/JBbIN9d78HFXSa1 I+wx2a3ONZ/jDNodiI5ktsUfFZO3QS99Jbz09wxudyUVHHEcKcvELWYz4BMqmC6MQQkO h5JUliIg9NhcYYRwuxatHA2jnzuV6XYFRdNxXKgsxsX97O89v6hJr9E15bkC3NpfiBgJ HGJg== X-Gm-Message-State: AFqh2kouhnDE8/Xee9100wqDuiX5j1pVEqy83dxdIciKbxi7Oy+qNLOA 4NNXVw+S71oezQW3askId9tX2L75ZVA= X-Google-Smtp-Source: AK7set8Zoaf7xb4TZLOBvvU/fOAKWYAOQ/jwdJrqulf4473/2u2hKvPvmXN7ahB+v/eEMKhFLKA99DnsrP0= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:7e4b:0:b0:506:4f19:740c with SMTP id p11-20020a817e4b000000b005064f19740cmr1518489ywn.383.1674848481189; Fri, 27 Jan 2023 11:41:21 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:40 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-4-surenb@google.com> Subject: [PATCH v2 03/33] maple_tree: Fix freeing of nodes in rcu mode From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com, Liam Howlett Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Liam Howlett The walk to destroy the nodes was not always setting the node type and would result in a destroy method potentially using the values as nodes. Avoid this by setting the correct node types. This is necessary for the RCU mode of the maple tree. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett Signed-off-by: Suren Baghdasaryan --- lib/maple_tree.c | 73 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 11 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 482e17a460cb..73917dd2c608 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -895,6 +895,44 @@ static inline void ma_set_meta(struct maple_node *mn, = enum maple_type mt, meta->end =3D end; } =20 +/* + * mas_clear_meta() - clear the metadata information of a node, if it exis= ts + * @mas: The maple state + * @mn: The maple node + * @mt: The maple node type + * @offset: The offset of the highest sub-gap in this node. + * @end: The end of the data in this node. + */ +static inline void mas_clear_meta(struct ma_state *mas, struct maple_node = *mn, + enum maple_type mt) +{ + struct maple_metadata *meta; + unsigned long *pivots; + void __rcu **slots; + void *next; + + switch (mt) { + case maple_range_64: + pivots =3D mn->mr64.pivot; + if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { + slots =3D mn->mr64.slot; + next =3D mas_slot_locked(mas, slots, + MAPLE_RANGE64_SLOTS - 1); + if (unlikely((mte_to_node(next) && mte_node_type(next)))) + return; /* The last slot is a node, no metadata */ + } + fallthrough; + case maple_arange_64: + meta =3D ma_meta(mn, mt); + break; + default: + return; + } + + meta->gap =3D 0; + meta->end =3D 0; +} + /* * ma_meta_end() - Get the data end of a node from the metadata * @mn: The maple node @@ -5448,20 +5486,22 @@ static inline int mas_rev_alloc(struct ma_state *ma= s, unsigned long min, * mas_dead_leaves() - Mark all leaves of a node as dead. * @mas: The maple state * @slots: Pointer to the slot array + * @type: The maple node type * * Must hold the write lock. * * Return: The number of leaves marked as dead. */ static inline -unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots) +unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots, + enum maple_type mt) { struct maple_node *node; enum maple_type type; void *entry; int offset; =20 - for (offset =3D 0; offset < mt_slot_count(mas->node); offset++) { + for (offset =3D 0; offset < mt_slots[mt]; offset++) { entry =3D mas_slot_locked(mas, slots, offset); type =3D mte_node_type(entry); node =3D mte_to_node(entry); @@ -5480,14 +5520,13 @@ unsigned char mas_dead_leaves(struct ma_state *mas,= void __rcu **slots) =20 static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offs= et) { - struct maple_node *node, *next; + struct maple_node *next; void __rcu **slots =3D NULL; =20 next =3D mas_mn(mas); do { - mas->node =3D ma_enode_ptr(next); - node =3D mas_mn(mas); - slots =3D ma_slots(node, node->type); + mas->node =3D mt_mk_node(next, next->type); + slots =3D ma_slots(next, next->type); next =3D mas_slot_locked(mas, slots, offset); offset =3D 0; } while (!ma_is_leaf(next->type)); @@ -5551,11 +5590,14 @@ static inline void __rcu **mas_destroy_descend(stru= ct ma_state *mas, node =3D mas_mn(mas); slots =3D ma_slots(node, mte_node_type(mas->node)); next =3D mas_slot_locked(mas, slots, 0); - if ((mte_dead_node(next))) + if ((mte_dead_node(next))) { + mte_to_node(next)->type =3D mte_node_type(next); next =3D mas_slot_locked(mas, slots, 1); + } =20 mte_set_node_dead(mas->node); node->type =3D mte_node_type(mas->node); + mas_clear_meta(mas, node, node->type); node->piv_parent =3D prev; node->parent_slot =3D offset; offset =3D 0; @@ -5575,13 +5617,18 @@ static void mt_destroy_walk(struct maple_enode *eno= de, unsigned char ma_flags, =20 MA_STATE(mas, &mt, 0, 0); =20 - if (mte_is_leaf(enode)) + mas.node =3D enode; + if (mte_is_leaf(enode)) { + node->type =3D mte_node_type(enode); goto free_leaf; + } =20 + ma_flags &=3D ~MT_FLAGS_LOCK_MASK; mt_init_flags(&mt, ma_flags); mas_lock(&mas); =20 - mas.node =3D start =3D enode; + mte_to_node(enode)->ma_flags =3D ma_flags; + start =3D enode; slots =3D mas_destroy_descend(&mas, start, 0); node =3D mas_mn(&mas); do { @@ -5589,7 +5636,8 @@ static void mt_destroy_walk(struct maple_enode *enode= , unsigned char ma_flags, unsigned char offset; struct maple_enode *parent, *tmp; =20 - node->slot_len =3D mas_dead_leaves(&mas, slots); + node->type =3D mte_node_type(mas.node); + node->slot_len =3D mas_dead_leaves(&mas, slots, node->type); if (free) mt_free_bulk(node->slot_len, slots); offset =3D node->parent_slot + 1; @@ -5613,7 +5661,8 @@ static void mt_destroy_walk(struct maple_enode *enode= , unsigned char ma_flags, } while (start !=3D mas.node); =20 node =3D mas_mn(&mas); - node->slot_len =3D mas_dead_leaves(&mas, slots); + node->type =3D mte_node_type(mas.node); + node->slot_len =3D mas_dead_leaves(&mas, slots, node->type); if (free) mt_free_bulk(node->slot_len, slots); =20 @@ -5623,6 +5672,8 @@ static void mt_destroy_walk(struct maple_enode *enode= , unsigned char ma_flags, free_leaf: if (free) mt_free_rcu(&node->rcu); + else + mas_clear_meta(&mas, node, node->type); } =20 /* --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id BC893C54EAA for ; Fri, 27 Jan 2023 19:43:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232000AbjA0TnL (ORCPT ); Fri, 27 Jan 2023 14:43:11 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50662 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231848AbjA0Tm5 (ORCPT ); Fri, 27 Jan 2023 14:42:57 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C966E82421 for ; Fri, 27 Jan 2023 11:42:29 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id p19-20020a25d813000000b0080b78270db5so6347404ybg.15 for ; Fri, 27 Jan 2023 11:42:29 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=DF9zqRO1prEDm4IKfYXh+Z69pFj8RdYSRvmvJpFQZKM=; b=rnpUdmPcBnY7plbcdmS2pwvwnctQIRIqHumfjs2guCuPsY+Dw6YQ2tfutal8HnrgXz edy5Yrgey9QhoRSbyw/SmRVhoDEfIcnSP0t0LCmLUom2gJSxBSb6RoRoQ6R6E1A4+8T0 XcTb3Fhtcz5YcundAJZb+WYoiQCLWWkiPl+/L7XZyu1tWh8b/ENfJXXDc0COCB4H0HVg yuAJnUBiF1z/T9ta5KOn1AHf3cQZUH/oUuoUg6oOo5mcl+MifhrO/XwhHeojlZjv/4Mm lccC2wtCBVxTksQLP5lorO6u35MCkxakWEmMvAgnG98JEeGI6fsdBcznmI1LtXlDE24U yV4Q== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=DF9zqRO1prEDm4IKfYXh+Z69pFj8RdYSRvmvJpFQZKM=; b=DvFbCFhTP7LJ35OJ5A8wGJSN2PMiyM1gwykU48S9kgEdyQwwVHaQEs5AR6KspwLBF4 AGd5KYEGutxqqY1yB3aUVyL8acEgDBv9IXVjpgek4KI5vSXhpY5GSJUD9hTUGeWN32n9 mXtNH1qMPBye6iitsb56bnWmYAjEmuTLV1xznPoQvlZTmiTvqUrocKtc4Jd8htNlb/2A L//3LCrnwKyb7t+dY66QrN7Zxvfi3AvwAQTGXQsN1HG+bgd5WhLfNF5kzgzHnLCormvy wA8e8qml+MAjDJBltYxP1SFiRSvlxnQl8rlwNXa9diEYM4uZQ7Y7uX27oUAjF71ik+IN 8ZcQ== X-Gm-Message-State: AFqh2koyKgevasGTPFVBILuIewlaB/USuAmoldq80VvJoAVgxFg0yPoD DYLhxVdWMuBFMpIwK+xa3t+uOuj+SEo= X-Google-Smtp-Source: AMrXdXs3xZaVcR3p65iqnfy8qe0w+PmLMIhbeI9n6IEvkAzMLp6jSQOzi5T/oBy2kRvdJXFWY9xcFs4GkSg= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a0d:c8c5:0:b0:4f3:10b1:1351 with SMTP id k188-20020a0dc8c5000000b004f310b11351mr5009568ywd.516.1674848483749; Fri, 27 Jan 2023 11:41:23 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:41 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-5-surenb@google.com> Subject: [PATCH v2 04/33] maple_tree: remove extra smp_wmb() from mas_dead_leaves() From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com, Liam Howlett Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Liam Howlett The call to mte_set_dead_node() before the smp_wmb() already calls smp_wmb() so this is not needed. This is an optimization for the RCU mode of the maple tree. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett Signed-off-by: Suren Baghdasaryan --- lib/maple_tree.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 73917dd2c608..75cce2d4d5da 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5510,7 +5510,6 @@ unsigned char mas_dead_leaves(struct ma_state *mas, v= oid __rcu **slots, break; =20 mte_set_node_dead(entry); - smp_wmb(); /* Needed for RCU */ node->type =3D type; rcu_assign_pointer(slots[offset], node); } --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1378BC54EAA for ; Fri, 27 Jan 2023 19:43:10 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231980AbjA0TnI (ORCPT ); Fri, 27 Jan 2023 14:43:08 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50624 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231859AbjA0Tm5 (ORCPT ); Fri, 27 Jan 2023 14:42:57 -0500 Received: from mail-pf1-x44a.google.com (mail-pf1-x44a.google.com [IPv6:2607:f8b0:4864:20::44a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 497B68242F for ; Fri, 27 Jan 2023 11:42:30 -0800 (PST) Received: by mail-pf1-x44a.google.com with SMTP id cr14-20020a056a000f0e00b0058da951c487so2786838pfb.0 for ; Fri, 27 Jan 2023 11:42:30 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=fluAUEldqgo6Mr/4Su/gpwJVTd/f+QKHo7F0iCGeqOg=; b=B0QFA0zTTAgJFhNgrdDgKRfLtInL5VmAHE1KFwNYXYIlcWvaJJq1PKzYA8ckdy7Kyn xAwTf5YPOblK6+TJj1tGvdojweaIBaHFiTxRDHhkDnK6qDnyvgtYbq5iiXWxBxOCwUuh WaET5h8CLSuaMj6adU6fJWH6EpE5SqyIKnzFJ/pir1JZwDIco5M5jQ2a/9VWhgwD3iWL QgWDwglZ0wa8AC1rJh0bk86XOm5Sb2wt2duuTU/yqZcdsgUl6ZFLCYWIeCgAmHx4ObRd hHIbD6pxz2ke/VukXpDhoYuOo0GEGrcZQn8DqZwlpC0FiE7wMl7MPAjekSdb7LenTMW0 255w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=fluAUEldqgo6Mr/4Su/gpwJVTd/f+QKHo7F0iCGeqOg=; b=zS84MyA4LiNIUaKu8tV8iiqiew1AwtBjNR252+XwRk/medbzfToEY5qFEQ+zBpSsJj VAi2TtzuhCBPjIui0ouzulTOHIDGtJeeX3ah+kpczFIBLuYLvbq+bcajcBiKW8+Qn4fw Bdl8dok5pWKoRaJICgbTF8uwKj3wELoV0gnx7zoM3K7B9lzTqsK2mTlUumSL31iMxYWj ckOtsW2wpqzHU6ee0rEGprHhJpjc3suqfz22jNb+7F14s7jBeLJ1sW4zxIMFotfHluPR KZhibpltSKLFSBynLXMYlQ7Jsun9rQfZwEuDrP1NIFUc+/SH7hxcuhEOBSHmQrAv8U5v l5aw== X-Gm-Message-State: AO0yUKXtDRsuF/qLUIR2cHZLt3zCNlQALXhEus6lHnEsvexDxod/az77 CYR2Pkg1ef53Db3jlwySejXsmbQ3S9Q= X-Google-Smtp-Source: AK7set+gvzIFHouo+ELhD7kNJY3eRZqYYIzPpjJr9+paWQDRkxxsh7BJRXlaLIfiaGLvKexw3FDEYkSiOQU= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a17:90b:a16:b0:225:eaa2:3f5d with SMTP id gg22-20020a17090b0a1600b00225eaa23f5dmr12984pjb.2.1674848485998; Fri, 27 Jan 2023 11:41:25 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:42 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-6-surenb@google.com> Subject: [PATCH v2 05/33] maple_tree: Fix write memory barrier of nodes once dead for RCU mode From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com, "Liam R. Howlett" Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Liam R. Howlett" During the development of the maple tree, the strategy of freeing multiple nodes changed and, in the process, the pivots were reused to store pointers to dead nodes. To ensure the readers see accurate pivots, the writers need to mark the nodes as dead and call smp_wmb() to ensure any readers can identify the node as dead before using the pivot values. There were two places where the old method of marking the node as dead without smp_wmb() were being used, which resulted in RCU readers seeing the wrong pivot value before seeing the node was dead. Fix this race condition by using mte_set_node_dead() which has the smp_wmb() call to ensure the race is closed. Add a WARN_ON() to the ma_free_rcu() call to ensure all nodes being freed are marked as dead to ensure there are no other call paths besides the two updated paths. This is necessary for the RCU mode of the maple tree. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam R. Howlett Signed-off-by: Suren Baghdasaryan --- lib/maple_tree.c | 7 +++++-- tools/testing/radix-tree/maple.c | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 75cce2d4d5da..49e399e8afaa 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -178,7 +178,7 @@ static void mt_free_rcu(struct rcu_head *head) */ static void ma_free_rcu(struct maple_node *node) { - node->parent =3D ma_parent_ptr(node); + WARN_ON(node->parent !=3D ma_parent_ptr(node)); call_rcu(&node->rcu, mt_free_rcu); } =20 @@ -1771,8 +1771,10 @@ static inline void mas_replace(struct ma_state *mas,= bool advanced) rcu_assign_pointer(slots[offset], mas->node); } =20 - if (!advanced) + if (!advanced) { + mte_set_node_dead(old_enode); mas_free(mas, old_enode); + } } =20 /* @@ -4211,6 +4213,7 @@ static inline bool mas_wr_node_store(struct ma_wr_sta= te *wr_mas) done: mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end); if (in_rcu) { + mte_set_node_dead(mas->node); mas->node =3D mt_mk_node(newnode, wr_mas->type); mas_replace(mas, false); } else { diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/ma= ple.c index 958ee9bdb316..4c89ff333f6f 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -108,6 +108,7 @@ static noinline void check_new_node(struct maple_tree *= mt) MT_BUG_ON(mt, mn->slot[1] !=3D NULL); MT_BUG_ON(mt, mas_allocated(&mas) !=3D 0); =20 + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); mas.node =3D MAS_START; mas_nomem(&mas, GFP_KERNEL); @@ -160,6 +161,7 @@ static noinline void check_new_node(struct maple_tree *= mt) MT_BUG_ON(mt, mas_allocated(&mas) !=3D i); MT_BUG_ON(mt, !mn); MT_BUG_ON(mt, not_empty(mn)); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); } =20 @@ -192,6 +194,7 @@ static noinline void check_new_node(struct maple_tree *= mt) MT_BUG_ON(mt, not_empty(mn)); MT_BUG_ON(mt, mas_allocated(&mas) !=3D i - 1); MT_BUG_ON(mt, !mn); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); } =20 @@ -210,6 +213,7 @@ static noinline void check_new_node(struct maple_tree *= mt) mn =3D mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); MT_BUG_ON(mt, mas_allocated(&mas) !=3D j - 1); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); } MT_BUG_ON(mt, mas_allocated(&mas) !=3D 0); @@ -233,6 +237,7 @@ static noinline void check_new_node(struct maple_tree *= mt) MT_BUG_ON(mt, mas_allocated(&mas) !=3D i - j); mn =3D mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); MT_BUG_ON(mt, mas_allocated(&mas) !=3D i - j - 1); } @@ -269,6 +274,7 @@ static noinline void check_new_node(struct maple_tree *= mt) mn =3D mas_pop_node(&mas); /* get the next node. */ MT_BUG_ON(mt, mn =3D=3D NULL); MT_BUG_ON(mt, not_empty(mn)); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); } MT_BUG_ON(mt, mas_allocated(&mas) !=3D 0); @@ -294,6 +300,7 @@ static noinline void check_new_node(struct maple_tree *= mt) mn =3D mas_pop_node(&mas2); /* get the next node. */ MT_BUG_ON(mt, mn =3D=3D NULL); MT_BUG_ON(mt, not_empty(mn)); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); } MT_BUG_ON(mt, mas_allocated(&mas2) !=3D 0); @@ -334,10 +341,12 @@ static noinline void check_new_node(struct maple_tree= *mt) MT_BUG_ON(mt, mas_allocated(&mas) !=3D MAPLE_ALLOC_SLOTS + 2); mn =3D mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); for (i =3D 1; i <=3D MAPLE_ALLOC_SLOTS + 1; i++) { mn =3D mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); } MT_BUG_ON(mt, mas_allocated(&mas) !=3D 0); @@ -375,6 +384,7 @@ static noinline void check_new_node(struct maple_tree *= mt) mas_node_count(&mas, i); /* Request */ mas_nomem(&mas, GFP_KERNEL); /* Fill request */ mn =3D mas_pop_node(&mas); /* get the next node. */ + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); mas_destroy(&mas); =20 @@ -382,10 +392,13 @@ static noinline void check_new_node(struct maple_tree= *mt) mas_node_count(&mas, i); /* Request */ mas_nomem(&mas, GFP_KERNEL); /* Fill request */ mn =3D mas_pop_node(&mas); /* get the next node. */ + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); mn =3D mas_pop_node(&mas); /* get the next node. */ + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); mn =3D mas_pop_node(&mas); /* get the next node. */ + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); mas_destroy(&mas); } @@ -35369,6 +35382,7 @@ static noinline void check_prealloc(struct maple_tr= ee *mt) MT_BUG_ON(mt, allocated !=3D 1 + height * 3); mn =3D mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) !=3D allocated - 1); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) !=3D 0); mas_destroy(&mas); @@ -35386,6 +35400,7 @@ static noinline void check_prealloc(struct maple_tr= ee *mt) mas_destroy(&mas); allocated =3D mas_allocated(&mas); MT_BUG_ON(mt, allocated !=3D 0); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); =20 MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) !=3D 0); @@ -35756,6 +35771,7 @@ void farmer_tests(void) tree.ma_root =3D mt_mk_node(node, maple_leaf_64); mt_dump(&tree); =20 + node->parent =3D ma_parent_ptr(node); ma_free_rcu(node); =20 /* Check things that will make lockdep angry */ --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8A21DC38142 for ; Fri, 27 Jan 2023 19:43:22 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231824AbjA0TnU (ORCPT ); Fri, 27 Jan 2023 14:43:20 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50644 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231783AbjA0Tm7 (ORCPT ); Fri, 27 Jan 2023 14:42:59 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0A3E584B67 for ; Fri, 27 Jan 2023 11:42:35 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-507aac99fdfso65529237b3.11 for ; Fri, 27 Jan 2023 11:42:35 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=A97N+ZL4sr7Moa7VA3MaIfeszXjKUidgl5sbUl4VE3M=; b=h4z95aWqlTijLuw2+pa/O521iwctvYJ/JE9Gk94Om8JNdm1J4qRBJ1EfNZDOaurfUb h0xAr5ZA79m4jhsd5KmptV8L5YTVXkJEqIIo06fTcAgDWyf2Iw1unXEy8R6f3F+h7n0/ eKAno27phPvhqEsCndY+cAQm1TD4BuL7gONpQNAXYFn8Kf2q+ZhvCxmCxJsT4hK4blKF lDmG8J5RutFJ9mwfq4DqWgfTIN8+TeptwgRlAF+qB5k0K+pf2NR8dkIUNhEjxCEKvwc/ GAm3buayP+cfA/HX9e16HQfg7j+WzzAIFSLNgZD3Zm/44+VjSwN/Bplr8AdUjUNKklkq c5Vw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=A97N+ZL4sr7Moa7VA3MaIfeszXjKUidgl5sbUl4VE3M=; b=nMiyFHMJqR8b2ffuEaRwQtF07DKs2Hwn3ZXPEfWZicJGrNC4annzsol19JRM0dBeVW PWAaYNKJCBshwA/YVevYrhwU1mTZBK+Wa/L8ULBJL/I7rXR/XsAWhJdoMi1GdmaBlhsp KutnA6+FtBtxLIXzYs74wUUud94H7A732GOsYTWWRa8Dg/8/IWP2JUxhHqw9SHVfB+gE hVOX0K0aKdNA3lPmgZVqjMSBeQnADpk3FN+rRtMNhfN600qGD/Uwf9TEKMwve909GBW5 C8Qs1mDRRNfTydx5BMh/6Vsu31TooEGJa4+oDunenpSclGnFDl47NUfCd3THcv0icJ9I 1oUw== X-Gm-Message-State: AO0yUKWbdOKiCcfIAT5+/rB0xRlsSBQAXJ83uy5xYMz8sRjqSRkrOxpy +qXaoeCVmYvz57CnHzquaIIfijgV3uQ= X-Google-Smtp-Source: AK7set8r1BYS7Wv/oAXgdJ6+dIiQhgZ/QiambSorQvdFTePlCpS2jC0PmTUUP3dVFKS1ORR7IfLO9gpuOYg= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:9bd6:0:b0:509:5557:c194 with SMTP id s205-20020a819bd6000000b005095557c194mr892323ywg.449.1674848488954; Fri, 27 Jan 2023 11:41:28 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:43 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-7-surenb@google.com> Subject: [PATCH v2 06/33] maple_tree: Add smp_rmb() to dead node detection From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com, "Liam R. Howlett" Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Liam R. Howlett" Add an smp_rmb() before reading the parent pointer to ensure that anything read from the node prior to the parent pointer hasn't been reordered ahead of this check. The is necessary for RCU mode. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam R. Howlett Signed-off-by: Suren Baghdasaryan --- lib/maple_tree.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 49e399e8afaa..859303d2da90 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -532,9 +532,11 @@ static inline struct maple_node *mte_parent(const stru= ct maple_enode *enode) */ static inline bool ma_dead_node(const struct maple_node *node) { - struct maple_node *parent =3D (void *)((unsigned long) - node->parent & ~MAPLE_NODE_MASK); + struct maple_node *parent; =20 + /* Do not reorder reads from the node prior to the parent check */ + smp_rmb(); + parent =3D (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); return (parent =3D=3D node); } =20 @@ -549,6 +551,8 @@ static inline bool mte_dead_node(const struct maple_eno= de *enode) struct maple_node *parent, *node; =20 node =3D mte_to_node(enode); + /* Do not reorder reads from the node prior to the parent check */ + smp_rmb(); parent =3D mte_parent(enode); return (parent =3D=3D node); } --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8F5FDC54EAA for ; Fri, 27 Jan 2023 19:43:27 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232182AbjA0Tn0 (ORCPT ); Fri, 27 Jan 2023 14:43:26 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50752 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231809AbjA0Tm7 (ORCPT ); Fri, 27 Jan 2023 14:42:59 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 095CB83244 for ; Fri, 27 Jan 2023 11:42:35 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-50660dd3263so65567817b3.19 for ; Fri, 27 Jan 2023 11:42:35 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=mDt1g22Fa9gYu2Kb7/F2UNWz7p2znEfhohgEMazkZAE=; b=n77zu0oRsJL7OvjwFNjS7T1FAOtu4hdvxpK7kq3XthnUipBv5szcKq0gW6Eh2qWZ+N VqA54npBtuI1r6BsRB8yCAVHYO39SuVCqNfhqCm/Ya4OMopwGqTAxzjzqXFTaLEmWefn G7d9A2dlsE9NEs9J39+IVmn9jXa2H7iuxd7eUv1r4SdiVN/BsV9E4rX7Fx/7SJnqIKhp 5pljM+LwjnmUAESpDExSAXXjwcvSQjwdpBA+epV9jtEBYonrLt6ilkcmvKbIgLrFIqhL dWwHvu2PRt1rquBCjgEA2IpAdLCPilrM/O5s952uzrtLUUUIX7VUyMYpZs8xK3jyFgDu 2ztw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=mDt1g22Fa9gYu2Kb7/F2UNWz7p2znEfhohgEMazkZAE=; b=3u7GOPP1AKy2zKIq6G7zqCcugp/6Hn3KcNWtsLFQoVHTnfbjqokSGNchF4AuTEJmyE VwbvUkxFuKw8hz57F2R3zoCjRefE+oB240mj7JfAztahEOOL3I3Di51VkTGtgb8OGS+2 58zODxxVpscJ8vj2UbCh93ts+8MjxVcuMRkgE8bsw7AGY1v912HYt44B+vLZcfguvZ69 bytaGPTb58DfVRkx5vIJiCjLs33cL743GT7nukr6Qlj4Lq30RGZDUUO4IfLczLEa+OHR ArWby+d7mFEJ0T1tzm8DKLTNLkpzvYHnD/DhBStRHO8acg2asDn/gBQQSaN7rMAdfMUv Zaxw== X-Gm-Message-State: AO0yUKVZtrPJ0JjIIwuhszcyTk8HzwA3woOP5bu1lJBcRtQ4Tx8H0td0 mtdVE6oCz1pdhL/y4fp7DyKqtX6tnYw= X-Google-Smtp-Source: AK7set9vKZUOyTcPurWe/bFPJyhfVHuV/nLiUj+ueaBvjKNu+svIA1blUVC8f6GWjxXLSfMUdNgMQcXmD6M= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a25:354:0:b0:80b:543d:2106 with SMTP id 81-20020a250354000000b0080b543d2106mr1234365ybd.10.1674848491227; Fri, 27 Jan 2023 11:41:31 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:44 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-8-surenb@google.com> Subject: [PATCH v2 07/33] mm: Enable maple tree RCU mode by default. From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com, "Liam R. Howlett" Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Liam R. Howlett" Use the maple tree in RCU mode for VMA tracking. This is necessary for the use of per-VMA locking. RCU mode is enabled by default but disabled when exiting an mm and for the new tree during a fork. Also enable RCU for the tree used in munmap operations to ensure the nodes remain valid for readers. Signed-off-by: Liam R. Howlett Signed-off-by: Suren Baghdasaryan --- include/linux/mm_types.h | 3 ++- kernel/fork.c | 3 +++ mm/mmap.c | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index da983aedb741..8410c3052148 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -768,7 +768,8 @@ struct mm_struct { unsigned long cpu_bitmap[]; }; =20 -#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) +#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ + MT_FLAGS_USE_RCU) extern struct mm_struct init_mm; =20 /* Pointer magic because the dynamic array size confuses some compilers. */ diff --git a/kernel/fork.c b/kernel/fork.c index 5e3029ea8e1e..5f23d5e03362 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -617,6 +617,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *= mm, if (retval) goto out; =20 + mt_clear_in_rcu(vmi.mas.tree); for_each_vma(old_vmi, mpnt) { struct file *file; =20 @@ -700,6 +701,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *= mm, retval =3D arch_dup_mmap(oldmm, mm); loop_out: vma_iter_free(&vmi); + if (!retval) + mt_set_in_rcu(vmi.mas.tree); out: mmap_write_unlock(mm); flush_tlb_mm(oldmm); diff --git a/mm/mmap.c b/mm/mmap.c index 9d0dadb55e9a..9efe13d36df7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2277,7 +2277,8 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct = vm_area_struct *vma, int count =3D 0; int error =3D -ENOMEM; MA_STATE(mas_detach, &mt_detach, 0, 0); - mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN); + mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & + (MT_FLAGS_LOCK_MASK | MT_FLAGS_USE_RCU)); mt_set_external_lock(&mt_detach, &mm->mmap_lock); =20 /* @@ -3042,6 +3043,7 @@ void exit_mmap(struct mm_struct *mm) */ set_bit(MMF_OOM_SKIP, &mm->flags); mmap_write_lock(mm); + mt_clear_in_rcu(&mm->mm_mt); free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb); --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 71650C38142 for ; Fri, 27 Jan 2023 19:44:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232364AbjA0ToN (ORCPT ); Fri, 27 Jan 2023 14:44:13 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50932 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232110AbjA0TnT (ORCPT ); Fri, 27 Jan 2023 14:43:19 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id CE321BBA8 for ; Fri, 27 Jan 2023 11:42:53 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id b201-20020a2534d2000000b0080b87d910a2so6434989yba.21 for ; Fri, 27 Jan 2023 11:42:53 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=pFZiiTJ1lM5Y9cE9VJMMN6c4zys62lyfuQWFB/SvSfY=; b=m4Wtor8vj3J0ycRC6QGu22LySA+n2MGlbf/sH76T8N8Pnp/d8SuPXb/kR+Hyj37+ad 48NkH1zhtbcV3yGPaiQ5KPXnB06jsyj37BRmFZaU4H+LJ2YSwMP7BoYjL006YrtxIZWR VsSyGQhSW7cL19AuEXNwVKmULWycfkr7kU4POPAgtbT6hDzlZniUKpxVu1Hu98jAvRgz bcxq0t9OKK2J7/3O0lRl4dNTg073Sp+ojK3rnWcxuyGW0RvPh9anMnsNc+W5/rLzSMsJ Ho/St/2m0p0zsIWZF5QsNOVQeuQXkfsBP43+XpjCQzZI9euWanUB/WzPnytDjlkSAgXd IXOQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=pFZiiTJ1lM5Y9cE9VJMMN6c4zys62lyfuQWFB/SvSfY=; b=1U2RGa6dSBim6mLUcCleFv/won4LB+ibPH52TR5dhn53u5bm6Hxi2ZySeG0f1XCGB5 L7AP+dFpW9rWziz2keU1hW23Km7kjmugVGs7j73S9n/pA6FBIrFR3C57iHq/RVc2PUVk plhjFtuVTx0rdWtmF+L4bo0vDngFGDnTTZxL287+RmXN/VxPKITk3yaHELOVGQ9Fsdav ouvp5Bqn1X6wQFxWEXnSmTNPWgsgfniPncduyY18kORPeh2Si0l8n70rWahQ0VEf+0vw zlQ3vt6JMqkNiuaajuKA7AS3if5jBarWzjQ+51tOU43bvh2LnHWCIToB/f5mVUtFXL9l EXow== X-Gm-Message-State: AFqh2krsdCWztM2Yn/OqeUYvXHhJkH/kzCWIgoQ5SOGkb8rgvsZ2T4nQ JBZr+VS/F9+npZuRwE9Tz2qJfB+BYNg= X-Google-Smtp-Source: AMrXdXt7ky1YKmCHrEvdSmppryHuxdZmy/ExGSeixqws3R/QJJ6NNyUiC3cubRusLU6Amdx/dq1XzZif1cI= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:1d47:0:b0:3f4:6c62:18d1 with SMTP id d68-20020a811d47000000b003f46c6218d1mr3383963ywd.491.1674848493581; Fri, 27 Jan 2023 11:41:33 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:45 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-9-surenb@google.com> Subject: [PATCH v2 08/33] mm: introduce CONFIG_PER_VMA_LOCK From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" This configuration variable will be used to build the support for VMA locking during page fault handling. This is enabled on supported architectures with SMP and MMU set. The architecture support is needed since the page fault handler is called from the architecture's page faulting code which needs modifications to handle faults under VMA lock. Signed-off-by: Suren Baghdasaryan --- mm/Kconfig | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/Kconfig b/mm/Kconfig index ca98b2072df5..2e4a7e61768a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1211,6 +1211,18 @@ config LRU_GEN_STATS This option has a per-memcg and per-node memory overhead. # } =20 +config ARCH_SUPPORTS_PER_VMA_LOCK + def_bool n + +config PER_VMA_LOCK + def_bool y + depends on ARCH_SUPPORTS_PER_VMA_LOCK && MMU && SMP + help + Allow per-vma locking during page fault handling. + + This feature allows locking each virtual memory area separately when + handling page faults instead of taking mmap_lock. + source "mm/damon/Kconfig" =20 endmenu --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 37548C54EAA for ; Fri, 27 Jan 2023 19:43:33 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232199AbjA0Tn2 (ORCPT ); Fri, 27 Jan 2023 14:43:28 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50774 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231846AbjA0TnA (ORCPT ); Fri, 27 Jan 2023 14:43:00 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C453F82432 for ; Fri, 27 Jan 2023 11:42:36 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-4ff7c6679f2so65753727b3.12 for ; Fri, 27 Jan 2023 11:42:36 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=Nu9/mdu38G6WtBjXb6Do7xpUyt6CdrDbmAUDhoyp47k=; b=oFyOjlhEa6jPD1d+pKzu8xqMu9sShcBJM0zkcdXAO9bWZHfs5hDegJ8sEKWFr4AQNl iGPCIH7iC8aR5RiIjRoH9W2KUncZPWl0b5vsJXQEB2UE60mYiP5ldYuNlEW3dM96CgJp +2KdvoCyVN7G6APzAMKWAfXHTUhAA+JaJZhJgfnGwlr68Zr0GLZrYj2YnOQkKo96sWGj Ng0DOWnRANnP+XQns5edhVeWyYl+9EdfDgkHbYKOkVF++Ku/0IaWanDTI7HqCQuL/w5R edw8ZGqmSQJCCmVgs5lV30HhGrOZqx2K3qMJgNbHCqjkT1CdH2dJNnKk0QEKLZZ9jCPS y7/w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=Nu9/mdu38G6WtBjXb6Do7xpUyt6CdrDbmAUDhoyp47k=; b=h3TEow3zRjG0lfCe5mvSQw5za2RRH4bDNJ2uFc8FE1Vk4O+/vca4y+1MhzhifpHZjF E8iCbuMu3k8njZgssRUSPH0Nd28ho8ZoHuDe/DrMao42v5gYamGAagDmFjMhhxzkzSY/ UGAvLWHmSvZ2kK07e9pNJDPSV5292hZDRv7kt29SfJpizclfJMrE8MiPfp73a38gqvcr 080uEza2rx7zqJmrUmIfF78T30hK+psCMEuX2cREmAgIXE1DQKox+FzA2kkBRXYBECRL BwOusTDNLykSbKbiuSxxKy9DNyBUFR0jTkCS2UlXO7udGjoxuP6PcHGuE+YDWaAhj8Q2 ZRHw== X-Gm-Message-State: AFqh2krEdkqLw+u/U5/61Hh8v+Wvx4W99aaUMVbiwxIw9pXjUSRW9QFK ek0PN/SUSzJ9s7kXfSjfZQEDIW8k5cQ= X-Google-Smtp-Source: AMrXdXtMMT5r56SGjlxKGvDbQirEmfUIu1ilIQcv1/3wi/4TSk/ascWeVtYTcuwkoFwuriGWfFUnQoKWHlg= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a0d:cdc7:0:b0:4df:f712:5d57 with SMTP id p190-20020a0dcdc7000000b004dff7125d57mr3818080ywd.303.1674848495850; Fri, 27 Jan 2023 11:41:35 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:46 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-10-surenb@google.com> Subject: [PATCH v2 09/33] mm: rcu safe VMA freeing From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Michel Lespinasse This prepares for page faults handling under VMA lock, looking up VMAs under protection of an rcu read lock, instead of the usual mmap read lock. Signed-off-by: Michel Lespinasse Signed-off-by: Suren Baghdasaryan --- include/linux/mm_types.h | 13 ++++++++++--- kernel/fork.c | 20 +++++++++++++++++++- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8410c3052148..62e413f84011 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -480,9 +480,16 @@ struct anon_vma_name { struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ =20 - unsigned long vm_start; /* Our start address within vm_mm. */ - unsigned long vm_end; /* The first byte after our end address - within vm_mm. */ + union { + struct { + /* VMA covers [vm_start; vm_end) addresses within mm */ + unsigned long vm_start; + unsigned long vm_end; + }; +#ifdef CONFIG_PER_VMA_LOCK + struct rcu_head vm_rcu; /* Used for deferred freeing. */ +#endif + }; =20 struct mm_struct *vm_mm; /* The address space we belong to. */ =20 diff --git a/kernel/fork.c b/kernel/fork.c index 5f23d5e03362..314d51eb91da 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -479,12 +479,30 @@ struct vm_area_struct *vm_area_dup(struct vm_area_str= uct *orig) return new; } =20 -void vm_area_free(struct vm_area_struct *vma) +static void __vm_area_free(struct vm_area_struct *vma) { free_anon_vma_name(vma); kmem_cache_free(vm_area_cachep, vma); } =20 +#ifdef CONFIG_PER_VMA_LOCK +static void vm_area_free_rcu_cb(struct rcu_head *head) +{ + struct vm_area_struct *vma =3D container_of(head, struct vm_area_struct, + vm_rcu); + __vm_area_free(vma); +} +#endif + +void vm_area_free(struct vm_area_struct *vma) +{ +#ifdef CONFIG_PER_VMA_LOCK + call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb); +#else + __vm_area_free(vma); +#endif +} + static void account_kernel_stack(struct task_struct *tsk, int account) { if (IS_ENABLED(CONFIG_VMAP_STACK)) { --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6EF02C61DA4 for ; Fri, 27 Jan 2023 19:43:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232008AbjA0Tnh (ORCPT ); Fri, 27 Jan 2023 14:43:37 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51056 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231931AbjA0TnF (ORCPT ); Fri, 27 Jan 2023 14:43:05 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9BD478A547 for ; Fri, 27 Jan 2023 11:42:40 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id b201-20020a2534d2000000b0080b87d910a2so6435179yba.21 for ; Fri, 27 Jan 2023 11:42:40 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=OfsRQat1IE0fDeYSs5Psw7qjLLZE8mkiXmSdSDqQs6E=; b=Q7jceUjhtfhNXIUFz0/MzwmS5ZgtWEAK+cBK0blt69F8+KIFs/AwOKnAqPB+IEDKT/ FyLbx1gLVqD34Rlv7gRilV5FNygR87+qF/41EC627UqwDLTg6NJEFnAj/1s6LMME1gBy h9D9urWoN0e32uyucIiiCDtt59fZ91UJGpShxrAxprHccHpeDkPGS432b78Iuw1rAi+Y PGxS0P/Csnmusy4TpNpOjbr38NhV2uRvkkx/I7vAEJVPhb/fpP3E3BB7l9N5Jq8Ddy4K G0deOaLN7bYZJlZOXJL3xM94KhFiD/71EdmjsD4THVYmj7eB4g9ZYLPfdvfcp6yLyVR8 Oqkg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=OfsRQat1IE0fDeYSs5Psw7qjLLZE8mkiXmSdSDqQs6E=; b=Bfr+txlCYis8bHpMFGgq5vlFNPvsd8cpOWeZybDcNlMvo1og1+9BceuR3DW5a4SDL1 0q8r3eG3It+MGgyhtmEa/knqmILVulY/92sQsuoPIz4sSWu73bR5rKjl35AiuhhF8v06 sFfMp8PSLI3ldBvOHQBdT1BTUBpWnRRzhqE4PDxbTq36/SnmYRZ2X+vCIbc3H8urgoex k2lv2x9aeWH1rnqWQaIm1DH0biu6uVW0xT3KozbF1cUmdaFqWjw/JJEg+9ABpCLPmb7N McZlNrNT2YSfIrrtVnaGIZjNhBTkxBBJ36tf5Xic2kW1jn6Hlpu2dnyOCyGbCS+kveLv Du0g== X-Gm-Message-State: AO0yUKU2P/uugDWxMvSyycdgLaCCnWctXTC2zmyAq4ra6epEZTegdGw2 erOvZQYa/DOKK4sFywa94qR21YjlVqA= X-Google-Smtp-Source: AK7set9LIYXWd5UhR4gu2cf1OYz0XpBzDlrTVxbQWEhDgYvTk1OPOqMByF3iTmnXKTsrl+3RDXXXUtjCRT8= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a0d:f2c3:0:b0:50a:468d:48d8 with SMTP id b186-20020a0df2c3000000b0050a468d48d8mr731943ywf.104.1674848498315; Fri, 27 Jan 2023 11:41:38 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:47 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-11-surenb@google.com> Subject: [PATCH v2 10/33] mm: move mmap_lock assert function definitions From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Move mmap_lock assert function definitions up so that they can be used by other mmap_lock routines. Signed-off-by: Suren Baghdasaryan --- include/linux/mmap_lock.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 96e113e23d04..e49ba91bb1f0 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -60,6 +60,18 @@ static inline void __mmap_lock_trace_released(struct mm_= struct *mm, bool write) =20 #endif /* CONFIG_TRACING */ =20 +static inline void mmap_assert_locked(struct mm_struct *mm) +{ + lockdep_assert_held(&mm->mmap_lock); + VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); +} + +static inline void mmap_assert_write_locked(struct mm_struct *mm) +{ + lockdep_assert_held_write(&mm->mmap_lock); + VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); +} + static inline void mmap_init_lock(struct mm_struct *mm) { init_rwsem(&mm->mmap_lock); @@ -150,18 +162,6 @@ static inline void mmap_read_unlock_non_owner(struct m= m_struct *mm) up_read_non_owner(&mm->mmap_lock); } =20 -static inline void mmap_assert_locked(struct mm_struct *mm) -{ - lockdep_assert_held(&mm->mmap_lock); - VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); -} - -static inline void mmap_assert_write_locked(struct mm_struct *mm) -{ - lockdep_assert_held_write(&mm->mmap_lock); - VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); -} - static inline int mmap_lock_is_contended(struct mm_struct *mm) { return rwsem_is_contended(&mm->mmap_lock); --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E3D50C38142 for ; Fri, 27 Jan 2023 19:43:44 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232279AbjA0Tnm (ORCPT ); Fri, 27 Jan 2023 14:43:42 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51168 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231961AbjA0TnG (ORCPT ); Fri, 27 Jan 2023 14:43:06 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9BCA28A546 for ; Fri, 27 Jan 2023 11:42:40 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-5028c723d28so65352297b3.15 for ; Fri, 27 Jan 2023 11:42:40 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=XXZYVjn5Y9alAXdawo6LSRZijnPMVBvcI/Ib+FHEz28=; b=dBDLlmFtxDGaSIxnsKi7TuOlfLmJDoa6COq6/jSvBgh64Qerq1YmsLWwhnAAxCyVf/ g582eX0Eo4U8zmrxOFGYnkJmijlE67J9D9xBXGrS1NtU3RqxGXmBh2f947WBs0NX/Zll emlcCuJwSkFMTmaE18K/oCpx+uIqpIqH/tqes3ZD00h73pebtBtpRPZksR0g19LqKl4v xmDEdLHNhpj3CDXqX8SiQjapJ6l/dJ1IUEAjdkc/2LeqJcIWhkIf+A2Qxlo3WsGI6Yzg srHRwUq16G0bEIrEimCIRBcTpWLvsmh9iUkFM5cCzPZIfcbsD+d957wvwGuV97WpJ3D1 vEJw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=XXZYVjn5Y9alAXdawo6LSRZijnPMVBvcI/Ib+FHEz28=; b=7PpcU/ReNrz6WTtym2Iw1X+0pHQ+uRBRf+Nd6Ux1HflBbRMN57dOQQV12HewNqvXoN /0u9bFKLdTzClf925X8Pt2VUtAYYmAvXHJzgv8qhYjF+xnYCcf6url8VIMYY10WVCDg7 0xXhzMTf4Oe6yQaekydKGxjuOzbCooXY8+hVJ1MkW27T0WMQEhhghgv18ewGQznGPqVg xR1aR7rXMdQLcsBkGeyiu6E+YZqbQm4OXZ/SfehObJdZRZDZRWW6y9dYPyCP09D7Uj1i QSuDlzw8UPtiXtEx9H9lcF08ZCkjSA0+H5ZxLU345OALWAgemRuTgzZ1raLczzCjPaWg LjCA== X-Gm-Message-State: AFqh2ko0jj7YuEELWcBExjwKBZ9J37jhyD9GGiOP8QqQ8VBiVRZlYATy Tf+ENdTMbEDRSuEeAH9u8/lMn/FkgzY= X-Google-Smtp-Source: AMrXdXuJ9Lmn9omVCHOpi5PBakoG2f5tpLAKUWWlsexTwKMDDrEOz6rknp2w/KZIAkkkuCmWH0rkraaKink= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a05:6902:10cb:b0:7b8:16c:e66d with SMTP id w11-20020a05690210cb00b007b8016ce66dmr5496306ybu.85.1674848500886; Fri, 27 Jan 2023 11:41:40 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:48 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-12-surenb@google.com> Subject: [PATCH v2 11/33] mm: add per-VMA lock and helper functions to control it From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Introduce per-VMA locking. The lock implementation relies on a per-vma and per-mm sequence counters to note exclusive locking: - read lock - (implemented by vma_start_read) requires the vma (vm_lock_seq) and mm (mm_lock_seq) sequence counters to differ. If they match then there must be a vma exclusive lock held somewhere. - read unlock - (implemented by vma_end_read) is a trivial vma->lock unlock. - write lock - (vma_start_write) requires the mmap_lock to be held exclusively and the current mm counter is assigned to the vma counter. This will allow multiple vmas to be locked under a single mmap_lock write lock (e.g. during vma merging). The vma counter is modified under exclusive vma lock. - write unlock - (vma_end_write_all) is a batch release of all vma locks held. It doesn't pair with a specific vma_start_write! It is done before exclusive mmap_lock is released by incrementing mm sequence counter (mm_lock_seq). - write downgrade - if the mmap_lock is downgraded to the read lock, all vma write locks are released as well (effectivelly same as write unlock). Signed-off-by: Suren Baghdasaryan --- include/linux/mm.h | 82 +++++++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 8 ++++ include/linux/mmap_lock.h | 13 +++++++ kernel/fork.c | 4 ++ mm/init-mm.c | 3 ++ 5 files changed, 110 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index dd295c020e85..fee08e8fdce7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -617,6 +617,87 @@ struct vm_operations_struct { unsigned long addr); }; =20 +#ifdef CONFIG_PER_VMA_LOCK +static inline void vma_init_lock(struct vm_area_struct *vma) +{ + init_rwsem(&vma->lock); + vma->vm_lock_seq =3D -1; +} + +/* + * Try to read-lock a vma. The function is allowed to occasionally yield f= alse + * locked result to avoid performance overhead, in which case we fall back= to + * using mmap_lock. The function should never yield false unlocked result. + */ +static inline bool vma_start_read(struct vm_area_struct *vma) +{ + /* Check before locking. A race might cause false locked result. */ + if (vma->vm_lock_seq =3D=3D READ_ONCE(vma->vm_mm->mm_lock_seq)) + return false; + + if (unlikely(down_read_trylock(&vma->lock) =3D=3D 0)) + return false; + + /* + * Overflow might produce false locked result. + * False unlocked result is impossible because we modify and check + * vma->vm_lock_seq under vma->lock protection and mm->mm_lock_seq + * modification invalidates all existing locks. + */ + if (unlikely(vma->vm_lock_seq =3D=3D READ_ONCE(vma->vm_mm->mm_lock_seq)))= { + up_read(&vma->lock); + return false; + } + return true; +} + +static inline void vma_end_read(struct vm_area_struct *vma) +{ + rcu_read_lock(); /* keeps vma alive till the end of up_read */ + up_read(&vma->lock); + rcu_read_unlock(); +} + +static inline void vma_start_write(struct vm_area_struct *vma) +{ + int mm_lock_seq; + + mmap_assert_write_locked(vma->vm_mm); + + /* + * current task is holding mmap_write_lock, both vma->vm_lock_seq and + * mm->mm_lock_seq can't be concurrently modified. + */ + mm_lock_seq =3D READ_ONCE(vma->vm_mm->mm_lock_seq); + if (vma->vm_lock_seq =3D=3D mm_lock_seq) + return; + + down_write(&vma->lock); + vma->vm_lock_seq =3D mm_lock_seq; + up_write(&vma->lock); +} + +static inline void vma_assert_write_locked(struct vm_area_struct *vma) +{ + mmap_assert_write_locked(vma->vm_mm); + /* + * current task is holding mmap_write_lock, both vma->vm_lock_seq and + * mm->mm_lock_seq can't be concurrently modified. + */ + VM_BUG_ON_VMA(vma->vm_lock_seq !=3D READ_ONCE(vma->vm_mm->mm_lock_seq), v= ma); +} + +#else /* CONFIG_PER_VMA_LOCK */ + +static inline void vma_init_lock(struct vm_area_struct *vma) {} +static inline bool vma_start_read(struct vm_area_struct *vma) + { return false; } +static inline void vma_end_read(struct vm_area_struct *vma) {} +static inline void vma_start_write(struct vm_area_struct *vma) {} +static inline void vma_assert_write_locked(struct vm_area_struct *vma) {} + +#endif /* CONFIG_PER_VMA_LOCK */ + static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *= mm) { static const struct vm_operations_struct dummy_vm_ops =3D {}; @@ -625,6 +706,7 @@ static inline void vma_init(struct vm_area_struct *vma,= struct mm_struct *mm) vma->vm_mm =3D mm; vma->vm_ops =3D &dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); + vma_init_lock(vma); } =20 /* Use when VMA is not part of the VMA tree and needs no locking */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 62e413f84011..88619c6a29a3 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -508,6 +508,11 @@ struct vm_area_struct { vm_flags_t __private __vm_flags; }; =20 +#ifdef CONFIG_PER_VMA_LOCK + int vm_lock_seq; + struct rw_semaphore lock; +#endif + /* * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. @@ -633,6 +638,9 @@ struct mm_struct { * init_mm.mmlist, and are protected * by mmlist_lock */ +#ifdef CONFIG_PER_VMA_LOCK + int mm_lock_seq; +#endif =20 =20 unsigned long hiwater_rss; /* High-watermark of RSS usage */ diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index e49ba91bb1f0..aab8f1b28d26 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -72,6 +72,17 @@ static inline void mmap_assert_write_locked(struct mm_st= ruct *mm) VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); } =20 +#ifdef CONFIG_PER_VMA_LOCK +static inline void vma_end_write_all(struct mm_struct *mm) +{ + mmap_assert_write_locked(mm); + /* No races during update due to exclusive mmap_lock being held */ + WRITE_ONCE(mm->mm_lock_seq, mm->mm_lock_seq + 1); +} +#else +static inline void vma_end_write_all(struct mm_struct *mm) {} +#endif + static inline void mmap_init_lock(struct mm_struct *mm) { init_rwsem(&mm->mmap_lock); @@ -114,12 +125,14 @@ static inline bool mmap_write_trylock(struct mm_struc= t *mm) static inline void mmap_write_unlock(struct mm_struct *mm) { __mmap_lock_trace_released(mm, true); + vma_end_write_all(mm); up_write(&mm->mmap_lock); } =20 static inline void mmap_write_downgrade(struct mm_struct *mm) { __mmap_lock_trace_acquire_returned(mm, false, true); + vma_end_write_all(mm); downgrade_write(&mm->mmap_lock); } =20 diff --git a/kernel/fork.c b/kernel/fork.c index 314d51eb91da..9141427a98b2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -474,6 +474,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struc= t *orig) */ data_race(memcpy(new, orig, sizeof(*new))); INIT_LIST_HEAD(&new->anon_vma_chain); + vma_init_lock(new); dup_anon_vma_name(orig, new); } return new; @@ -1147,6 +1148,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm= , struct task_struct *p, seqcount_init(&mm->write_protect_seq); mmap_init_lock(mm); INIT_LIST_HEAD(&mm->mmlist); +#ifdef CONFIG_PER_VMA_LOCK + mm->mm_lock_seq =3D 0; +#endif mm_pgtables_bytes_init(mm); mm->map_count =3D 0; mm->locked_vm =3D 0; diff --git a/mm/init-mm.c b/mm/init-mm.c index c9327abb771c..33269314e060 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -37,6 +37,9 @@ struct mm_struct init_mm =3D { .page_table_lock =3D __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .arg_lock =3D __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), .mmlist =3D LIST_HEAD_INIT(init_mm.mmlist), +#ifdef CONFIG_PER_VMA_LOCK + .mm_lock_seq =3D 0, +#endif .user_ns =3D &init_user_ns, .cpu_bitmap =3D CPU_BITS_NONE, #ifdef CONFIG_IOMMU_SVA --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2A2CEC38142 for ; Fri, 27 Jan 2023 19:43:59 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232307AbjA0Tn4 (ORCPT ); Fri, 27 Jan 2023 14:43:56 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50752 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231740AbjA0TnR (ORCPT ); Fri, 27 Jan 2023 14:43:17 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 92F67173C for ; Fri, 27 Jan 2023 11:42:47 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id o1-20020a252801000000b0080b8600bdc9so6361211ybo.3 for ; Fri, 27 Jan 2023 11:42:47 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=Hq20e7KS4f4C48PKmYYknVSNimtVVHYzyV8VwmVTJ0Y=; b=SD5yhaj9WNDyBmH2zVXxIQSAPvkdOOdT/eZZRKlGP77z2J9nVnn6Obfbxv5sauUEV/ LCtaz+vX4qip57892cEocWjVTNmQS4/TdyxbxT8xLbIWgkt6lFG5ZxopwUuxBJEnTkjr vQID9RMKTIQGW2sRQmcGmPfXjoMh0D1eDPyuL/b6rqwKxYdkfEYgEgHjQL0Tw8ZlMbjX ZH2/uoF+k+VRzud7oIym//RQdtO+lmKAHdfjocpDVWhkn9piFYOBRbFkKQo8sM+osjEQ FxFQ0vmeBJ9JfrQeSA2WMn51BintgFmdPrgL1EZjLw25vLNDWAlkic6I+BYJ/3xqK9CW +GQQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=Hq20e7KS4f4C48PKmYYknVSNimtVVHYzyV8VwmVTJ0Y=; b=xbMAK4Bn4uhVjDHVf3YtwlIXPk0dxw7s513AIEBLb/aFdW7jNTCRS10jOkysBvTZ/v zyIvnaPSx51ikrXwyY16sr6Liz1jwjLJnZRgpXNeWA4QrVBEn3dO0i3Ej3MC9Khp7Q7X JXpuZx87/lVqa0CzO70z3eFtzmoGGl3e503liZJGHU6CfvibO5yussLexm5ekpS/iHlB OLH3sMjTvW7MOQIKOJF1IotNa9YRiURGaXezzFvJBr9tbqpotzJ2v0m+H8BllzcDYbrM KSO/M+5aLYPmp0Oxhz5a7QnoU12smiE2p+yKc6oxiEQqEPO3lDsIs6tR0goNULUH3wCN UgJQ== X-Gm-Message-State: AO0yUKUI+KrJtqbbROnssNiZErS0gVFaBf5G47r63UyqAzpdzpGZOUtP pcfFXwUkPXNXvMtaWOlGIDOT5nxB8Hc= X-Google-Smtp-Source: AK7set9CI7/3kZjfJSWqixfLkcUok8XHsx1YNSreUFU581ZlI+jbpFxEiZ86BfATPj+NjPN8bnYvtim468k= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a25:b283:0:b0:80b:c7cb:6d1f with SMTP id k3-20020a25b283000000b0080bc7cb6d1fmr1156521ybj.298.1674848502930; Fri, 27 Jan 2023 11:41:42 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:49 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-13-surenb@google.com> Subject: [PATCH v2 12/33] mm: mark VMA as being written when changing vm_flags From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Updates to vm_flags have to be done with VMA marked as being written for preventing concurrent page faults or other modifications. Signed-off-by: Suren Baghdasaryan --- include/linux/mm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index fee08e8fdce7..66dca140695e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -720,21 +720,21 @@ static inline void vm_flags_init(struct vm_area_struc= t *vma, static inline void vm_flags_reset(struct vm_area_struct *vma, vm_flags_t flags) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); vm_flags_init(vma, flags); } =20 static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); ACCESS_PRIVATE(vma, __vm_flags) |=3D flags; } =20 static inline void vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); ACCESS_PRIVATE(vma, __vm_flags) &=3D ~flags; } =20 @@ -755,7 +755,7 @@ static inline void __vm_flags_mod(struct vm_area_struct= *vma, static inline void vm_flags_mod(struct vm_area_struct *vma, vm_flags_t set, vm_flags_t clear) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); __vm_flags_mod(vma, set, clear); } =20 --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 02F63C54EAA for ; Fri, 27 Jan 2023 19:44:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232338AbjA0ToH (ORCPT ); Fri, 27 Jan 2023 14:44:07 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50624 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231774AbjA0TnT (ORCPT ); Fri, 27 Jan 2023 14:43:19 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4C5521707 for ; Fri, 27 Jan 2023 11:42:50 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-4b34cf67fb6so65244707b3.6 for ; Fri, 27 Jan 2023 11:42:50 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=icGEQDhVNg8AQkGt0Dau1cb43VWZWeb/kUxMjHSAwGA=; b=nhjpt9VbHDPsHYg33OlJfuq5Iz35VmBZ3kiMxPcXRXZZYdOboFlR0spKtxfGWFFcLn eBkIzKgB8QHaANcEzDHfkdLN1fRlxg2tqUBsMTv/7fNevBLbu/43ueowkRgVsLx1vLRo uHdRFKLtGeWmcojM55WkW4Vbk+IWqqj37nvYWjKvQOLDRhcdlXynMiveqJDQteuGpWZz ETc6Sup2efRPDAlMWE74qe3dlQIJeV8JWIsag7a7AlDm28r/breAdd9LIL/XkRDxD2X3 yud8bBPMeadGbrZ7ftbCoegvJj4wUFszXo3BfVZSLdIr/XgKY9tYqvkkj2bzILo8Fvtn ItZw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=icGEQDhVNg8AQkGt0Dau1cb43VWZWeb/kUxMjHSAwGA=; b=y1HZAyEVXuN9pXeCS2t5qJSou9ObpYXCbbNtNA2JwnwOoFIEUN/bECVKKhd07WP/0r 6TNZKusGdxCRcH4XFI7UEr8qFwnbUwhu/M5ebxDPlMuSuY9ysmIIdNTFJn6m3WpoicD6 mQCLFb4BUY79qHln00XP1YbuW3Ljl3If9APCkKwJj9o7kzw9R3EiIcNYQ18QMhS/rmBc 0r9up9QuxckVync0u3RMgFvqJwZa+H1GxodCHr0JnfLIHag4cAKojD8qihR4HQF40zUI 9QP5T18zb8TkCEDcJbM/vzM7eaRaTFszoi31gF25tcWg72GZ4UN6xhA00/r4AsdMI7eN JVig== X-Gm-Message-State: AO0yUKXcFnMYtY8XvKy6qfMYqRNV601DG3sjngfFmok7Mny19cesngMc MbJpJXtj1ereY5NAWMIlKHT/bMURsZk= X-Google-Smtp-Source: AK7set+R/81FxpFmvdAm43AdDVGV+lMO8EmoQcyFeqmQ4bL2GAdYwzTWfE7122Mwrx91JEcHzv5APn9iCmY= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:48cb:0:b0:506:4c8b:231e with SMTP id v194-20020a8148cb000000b005064c8b231emr1848392ywa.80.1674848505193; Fri, 27 Jan 2023 11:41:45 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:50 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-14-surenb@google.com> Subject: [PATCH v2 13/33] mm/mmap: move VMA locking before vma_adjust_trans_huge call From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" vma_adjust_trans_huge() modifies the VMA and such modifications should be done after VMA is marked as being written. Therefore move VMA flag modifications before vma_adjust_trans_huge() so that VMA is marked before all these modifications. Signed-off-by: Suren Baghdasaryan --- mm/mmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/mmap.c b/mm/mmap.c index 9efe13d36df7..7467d691e357 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2910,11 +2910,12 @@ static int do_brk_flags(struct vma_iterator *vmi, s= truct vm_area_struct *vma, if (vma_iter_prealloc(vmi)) goto unacct_fail; =20 + /* Set flags first to implicitly lock the VMA before updates */ + vm_flags_set(vma, VM_SOFTDIRTY); vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0); init_vma_prep(&vp, vma); vma_prepare(&vp); vma->vm_end =3D addr + len; - vm_flags_set(vma, VM_SOFTDIRTY); vma_iter_store(vmi, vma); =20 vma_complete(&vp, vmi, mm); --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 28D98C61DA4 for ; Fri, 27 Jan 2023 19:44:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232134AbjA0ToK (ORCPT ); Fri, 27 Jan 2023 14:44:10 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51616 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231838AbjA0TnT (ORCPT ); Fri, 27 Jan 2023 14:43:19 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 606A14C08 for ; Fri, 27 Jan 2023 11:42:52 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-5066df312d7so65757957b3.0 for ; Fri, 27 Jan 2023 11:42:52 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=7Tx+qt9ZwCfzeziH+AxLsfIEHPwRgvIaxjFEfdwiAVs=; b=MwDUDcWNdtFYfmhAuC43tEYj+bx0/7zv78dJ0hpiWDqIAfrnPySCh31pwGtES+llPo g9aJ1BaJcPmj6QIFKuJtNB4+OKnKX4w74YBlWwFtrSbaxVsErI4QpqtlUvrQKvku3x0Y fv8bSpOOjUv/1+/JuPfz5M6ciXa9EK4OZHTaoyyN5QHozfr1J0btP+Cb6s+q8s6DTvgx Eb/YBgAB71iKrit1/2Z+f0K+0VaK6nNNkqEz24OPAMCTiIMwYOn1N+e1hh+02mLm2qhR X0SB7IZ09+W7stQU0DgYwMtwYdWCgN82KCjbLOZQQ4WgzpAt4BTc4SEqK+AY1F70NhR7 prsQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=7Tx+qt9ZwCfzeziH+AxLsfIEHPwRgvIaxjFEfdwiAVs=; b=V0BaELmMQZ69LYaSXhJDxSjpLq08iRF43H36xf+HTJhHeKFU94MN+Yypdg/U1ec0vy MqoNopfj2scoG2kitq6Lo/Qtnp2SrAGoCWIX1fMpftOlfYhnPnQgoH/+g03vUeYZI5fS Yd2pcWbR+LdMssq8OfmMlzC9feazNCfyRzjUFEKo1mzbCyDkmMOBmiawpsYls4Wi6dUo hyPo4QoPkuICVMcymrxgUXhdHzi14ddFAxFXtxTgle3KKQWOua7vsTFYogcLDhzV7rN1 w3qyFenVl8AozhAo0vUd4/XOZT8u/aKSjJuWKdOg9T1vsyfEsUF9Zuy8aYyvTLKchS2a CL2Q== X-Gm-Message-State: AO0yUKVZKKh0JMCEWiABt0r/SCEcmWcKNEA4V8AAEJdiQbVcddRjAcFo 5QW8M/0oTahlvpg13KbNeT/q2PnYEKs= X-Google-Smtp-Source: AK7set+eTaGDRvBxQnOso6u8cp6LCvB6g6dz+di36ExTAmlNkOKvBeltyp9kLCTqVY9tlqnocHmCi479DpA= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:6008:0:b0:506:833a:c32e with SMTP id u8-20020a816008000000b00506833ac32emr1175163ywb.50.1674848507730; Fri, 27 Jan 2023 11:41:47 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:51 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-15-surenb@google.com> Subject: [PATCH v2 14/33] mm/khugepaged: write-lock VMA while collapsing a huge page From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Protect VMA from concurrent page fault handler while collapsing a huge page. Page fault handler needs a stable PMD to use PTL and relies on per-VMA lock to prevent concurrent PMD changes. pmdp_collapse_flush(), set_huge_pmd() and collapse_and_free_pmd() can modify a PMD, which will not be detected by a page fault handler without proper locking. Before this patch, page tables can be walked under any one of the mmap_lock, the mapping lock, and the anon_vma lock; so when khugepaged unlinks and frees page tables, it must ensure that all of those either are locked or don't exist. This patch adds a fourth lock under which page tables can be traversed, and so khugepaged must also lock out that one. Signed-off-by: Suren Baghdasaryan --- mm/khugepaged.c | 5 +++++ mm/rmap.c | 31 ++++++++++++++++--------------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index eb38bd1b1b2f..b938b286cdc3 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1134,6 +1134,7 @@ static int collapse_huge_page(struct mm_struct *mm, u= nsigned long address, if (result !=3D SCAN_SUCCEED) goto out_up_write; =20 + vma_start_write(vma); anon_vma_lock_write(vma->anon_vma); =20 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, address, @@ -1601,6 +1602,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, uns= igned long addr, goto drop_hpage; } =20 + /* Lock the vma before taking i_mmap and page table locks */ + vma_start_write(vma); + /* * We need to lock the mapping so that from here on, only GUP-fast and * hardware page walks can access the parts of the page tables that @@ -1806,6 +1810,7 @@ static int retract_page_tables(struct address_space *= mapping, pgoff_t pgoff, result =3D SCAN_PTE_UFFD_WP; goto unlock_next; } + vma_start_write(vma); collapse_and_free_pmd(mm, vma, addr, pmd); if (!cc->is_khugepaged && is_target) result =3D set_huge_pmd(vma, addr, pmd, hpage); diff --git a/mm/rmap.c b/mm/rmap.c index 15ae24585fc4..8e1a2ad9ca53 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -25,21 +25,22 @@ * mapping->invalidate_lock (in filemap_fault) * page->flags PG_locked (lock_page) * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share, see hugetlbfs be= low) - * mapping->i_mmap_rwsem - * anon_vma->rwsem - * mm->page_table_lock or pte_lock - * swap_lock (in swap_duplicate, swap_info_get) - * mmlist_lock (in mmput, drain_mmlist and others) - * mapping->private_lock (in block_dirty_folio) - * folio_lock_memcg move_lock (in block_dirty_folio) - * i_pages lock (widely used) - * lruvec->lru_lock (in folio_lruvec_lock_irq) - * inode->i_lock (in set_page_dirty's __mark_inode_dirty) - * bdi.wb->list_lock (in set_page_dirty's __mark_inode_d= irty) - * sb_lock (within inode_lock in fs/fs-writeback.c) - * i_pages lock (widely used, in set_page_dirty, - * in arch-dependent flush_dcache_mmap_lock, - * within bdi.wb->list_lock in __sync_single= _inode) + * vma_start_write + * mapping->i_mmap_rwsem + * anon_vma->rwsem + * mm->page_table_lock or pte_lock + * swap_lock (in swap_duplicate, swap_info_get) + * mmlist_lock (in mmput, drain_mmlist and others) + * mapping->private_lock (in block_dirty_folio) + * folio_lock_memcg move_lock (in block_dirty_folio) + * i_pages lock (widely used) + * lruvec->lru_lock (in folio_lruvec_lock_irq) + * inode->i_lock (in set_page_dirty's __mark_inode_dir= ty) + * bdi.wb->list_lock (in set_page_dirty's __mark_inode= _dirty) + * sb_lock (within inode_lock in fs/fs-writeback.c) + * i_pages lock (widely used, in set_page_dirty, + * in arch-dependent flush_dcache_mmap_loc= k, + * within bdi.wb->list_lock in __sync_sing= le_inode) * * anon_vma->rwsem,mapping->i_mmap_rwsem (memory_failure, collect_procs_= anon) * ->tasklist_lock --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 54ADCC54EAA for ; Fri, 27 Jan 2023 19:44:32 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232442AbjA0Tob (ORCPT ); Fri, 27 Jan 2023 14:44:31 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51666 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232144AbjA0TnU (ORCPT ); Fri, 27 Jan 2023 14:43:20 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4015312851 for ; Fri, 27 Jan 2023 11:42:55 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-5063c0b909eso64912297b3.7 for ; Fri, 27 Jan 2023 11:42:55 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=xM1gV9tZESxmDxiysmAmLZq38ZaKZXSlDI1GeX9HKic=; b=lE9qRR84U5+rYrRmzpPAw7Ud6uFSOEezLerrdXzskk/7N7IWEzIS+P8h33l5/GHLwm pzf0b8p0OfVWGzGLb/dMUWWv/4+TG3PN/VW/HgmJDf4GTYQxz4qhZvzGmSe6iwYXQbEv pmRWin5z3kTr/dQHGmOmQouL9X7gNxEvHMfX/Mw4+j3xs8bwN3ke4zv6/7Jpk/cXGw8a hnXfsAcVwtJnLNS//9cNJnJKu2XJduYEuopAyOxXqZTZsrcp23rvXM55Jk6t7ntayedf QFH2QchxeUajMjFYvRPLkdcrU5vfcJYPLkQ5Wb/mKP36Ds4BBv5V6MGo7zAGbp1SePYq xw1w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=xM1gV9tZESxmDxiysmAmLZq38ZaKZXSlDI1GeX9HKic=; b=73GImSy7IXwMLb7/l1uQMMn730QaZUY2hwN3cv/BXAsFGU+VFcep1WFHEVFprIE8ma AM+QxCr6025GiRkn/dFaFC49Fgu6jdd8dz8jRJBlkld8WDOGeWa8dwPsRjbuiBwx/3pd C2RINAYmlDXlrHhn+x+lSDByJnheZGPe3BM9e/4whIdeKrXKPVQHuz2azjTJkqwk4e23 kZWf+itTB97msJR8x3/Hgug04Ew7OoFlfEsth30QIe9DZEsYEV3hN0KoCedAFuXFMMB3 TCV2kWFvKKKpcKw9R78wJvtGkblMaNeyfh9Em+u7UJbquxI5YLdnpcLmYLFSny/mKGap tvrw== X-Gm-Message-State: AFqh2koSJTXsnCcicGEuDI+73eFPxZXGEG5rH3qONUUPHNF78i8OpfSG nGfml7JoA78HlNwP9i6WsAI0BXFP/ZI= X-Google-Smtp-Source: AMrXdXsibCiIco7YlC7GUbshkOFJQAk8PpTkup371cElxWyegDeUsxNSE/XyOxE1JUpmHxGWPZOyG9ioimM= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a25:b94d:0:b0:801:e503:dd0c with SMTP id s13-20020a25b94d000000b00801e503dd0cmr2559229ybm.384.1674848510225; Fri, 27 Jan 2023 11:41:50 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:52 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-16-surenb@google.com> Subject: [PATCH v2 15/33] mm/mmap: write-lock VMAs before merging, splitting or expanding them From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Decisions about whether VMAs can be merged, split or expanded must be made while VMAs are protected from the changes which can affect that decision. For example, merge_vma uses vma->anon_vma in its decision whether the VMA can be merged. Meanwhile, page fault handler changes vma->anon_vma during COW operation. Write-lock all VMAs which might be affected by a merge or split operation before making decision how such operations should be performed. Signed-off-by: Suren Baghdasaryan --- mm/mmap.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 7467d691e357..6fff76334177 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -269,8 +269,11 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) */ vma_iter_init(&vmi, mm, oldbrk); next =3D vma_find(&vmi, newbrk + PAGE_SIZE + stack_guard_gap); - if (next && newbrk + PAGE_SIZE > vm_start_gap(next)) - goto out; + if (next) { + vma_start_write(next); + if (newbrk + PAGE_SIZE > vm_start_gap(next)) + goto out; + } =20 brkvma =3D vma_prev_limit(&vmi, mm->start_brk); /* Ok, looks good - let it rip. */ @@ -912,10 +915,17 @@ struct vm_area_struct *vma_merge(struct vma_iterator = *vmi, struct mm_struct *mm, if (vm_flags & VM_SPECIAL) return NULL; =20 + if (prev) + vma_start_write(prev); next =3D find_vma(mm, prev ? prev->vm_end : 0); + if (next) + vma_start_write(next); mid =3D next; - if (next && next->vm_end =3D=3D end) /* cases 6, 7, 8 */ + if (next && next->vm_end =3D=3D end) { /* cases 6, 7, 8 */ next =3D find_vma(mm, next->vm_end); + if (next) + vma_start_write(next); + } =20 /* verify some invariant that must be enforced by the caller */ VM_WARN_ON(prev && addr <=3D prev->vm_start); @@ -2163,6 +2173,7 @@ int __split_vma(struct vma_iterator *vmi, struct vm_a= rea_struct *vma, WARN_ON(vma->vm_start >=3D addr); WARN_ON(vma->vm_end <=3D addr); =20 + vma_start_write(vma); if (vma->vm_ops && vma->vm_ops->may_split) { err =3D vma->vm_ops->may_split(vma, addr); if (err) @@ -2518,6 +2529,8 @@ unsigned long mmap_region(struct file *file, unsigned= long addr, =20 /* Attempt to expand an old mapping */ /* Check next */ + if (next) + vma_start_write(next); if (next && next->vm_start =3D=3D end && !vma_policy(next) && can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen, NULL_VM_UFFD_CTX, NULL)) { @@ -2527,6 +2540,8 @@ unsigned long mmap_region(struct file *file, unsigned= long addr, } =20 /* Check prev */ + if (prev) + vma_start_write(prev); if (prev && prev->vm_end =3D=3D addr && !vma_policy(prev) && (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file, pgoff, vma->vm_userfaultfd_ctx, NULL) : @@ -2900,6 +2915,8 @@ static int do_brk_flags(struct vma_iterator *vmi, str= uct vm_area_struct *vma, if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT)) return -ENOMEM; =20 + if (vma) + vma_start_write(vma); /* * Expand the existing vma if possible; Note that singular lists do not * occur after forking, so the expand will only happen on new VMAs. --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C5175C61DA7 for ; Fri, 27 Jan 2023 19:45:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232346AbjA0Tpw (ORCPT ); Fri, 27 Jan 2023 14:45:52 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51106 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232371AbjA0ToU (ORCPT ); Fri, 27 Jan 2023 14:44:20 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id DBED32ED53 for ; Fri, 27 Jan 2023 11:43:11 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-4bdeb1bbeafso65144387b3.4 for ; Fri, 27 Jan 2023 11:43:11 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=z2/pTj5C3cgEFiOJp2CeCn46OI8ibDfme0ccGGe2ykQ=; b=Cb3p2QcNsUnk+wL6/K6P4ED35dG3ZUcB734zcAsddvwr/PVSkrRYzepHFsLMU82wqr DCByjFxSWTXuXxKpPzrEyhOes5tD65SfDW58DVPGzZ4bl/GM62EXvV72BYyh+FkQftRL 9ik0iporDuduJD370bXV4KniJvgp40Hs/4xlWBjuzB4w/OEbzQnMEDOcEM26lHODokML 801aFmGgsonG3Ea3SKObTIkdYEG/WUj/siwNQ/Elnoh6/xVR3ypDahdEx2v1JUXFD+l6 cbwz8ftG3Ry5zhC62QLkQy0TB2bZZjts/vu8EdbF1X6krxdoifCb7qgLuiaEVqlGXvIi PdsQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=z2/pTj5C3cgEFiOJp2CeCn46OI8ibDfme0ccGGe2ykQ=; b=bdCGFf6cMuLUD8Uq0vdVuagR1jKFPZaK89oZgrnVf+cdT806KX/aFulRurodZ05Wic ZklDK0jmmfqJUDy9eWGIauQ2QqqY4an1oDWRrwiHrqzpaf8jchuTh+2xojazDIv58kPB rK1UhTxJ7H/Kbogudyn2nD7Uqm6iPLCmieD/NLIO1qbrwRtbJV3Pav2EvSvs45Vf8cFl USWIEUwK+q2RmgRgzAgyRPK+4vkuEkIZBQkkiTus+V+9scNtStZZubxAySnUplfaSve9 2RMbFH7y2R4LI72cNrQzIPs3MMRT5PR0kInlbrdjhwjlzcle0Dsl6LTSVib0utmL/FT0 Hasg== X-Gm-Message-State: AFqh2krdyO6GovkVmIQPCfNaF2t0HY+847oOKszqr56nYc+9andaG4jn TnviBoMycSLuODwXZInvKArrBiffd8c= X-Google-Smtp-Source: AMrXdXvc0ioDltykhYGthi7quhcG5gFdvKFRYGkIf9fgUkYhxxt/rX6VwG8N7AhqKPLty1w08hNt6tf3mAM= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a0d:f6c2:0:b0:4dc:1241:cc0a with SMTP id g185-20020a0df6c2000000b004dc1241cc0amr6165876ywf.499.1674848512603; Fri, 27 Jan 2023 11:41:52 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:53 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-17-surenb@google.com> Subject: [PATCH v2 16/33] mm/mmap: write-lock VMA before shrinking or expanding it From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" vma_expand and vma_shrink change VMA boundaries. Expansion might also result in freeing of an adjacent VMA. Write-lock affected VMAs to prevent concurrent page faults. Signed-off-by: Suren Baghdasaryan --- mm/mmap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 6fff76334177..60038c24d836 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -674,6 +674,9 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area= _struct *vma, ret =3D dup_anon_vma(vma, next); if (ret) return ret; + + /* Lock the VMA before removing it */ + vma_start_write(next); } =20 init_multi_vma_prep(&vp, vma, NULL, remove_next ? next : NULL, NULL); @@ -686,6 +689,7 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area= _struct *vma, if (vma_iter_prealloc(vmi)) goto nomem; =20 + vma_start_write(vma); vma_adjust_trans_huge(vma, start, end, 0); /* VMA iterator points to previous, so set to start if necessary */ if (vma_iter_addr(vmi) !=3D start) @@ -725,6 +729,7 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area= _struct *vma, if (vma_iter_prealloc(vmi)) return -ENOMEM; =20 + vma_start_write(vma); init_vma_prep(&vp, vma); vma_adjust_trans_huge(vma, start, end, 0); vma_prepare(&vp); --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0CD8FC38142 for ; Fri, 27 Jan 2023 19:45:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232235AbjA0Tok (ORCPT ); Fri, 27 Jan 2023 14:44:40 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51548 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232176AbjA0TnW (ORCPT ); Fri, 27 Jan 2023 14:43:22 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6C190227A3 for ; Fri, 27 Jan 2023 11:43:03 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-434eb7c6fa5so65876357b3.14 for ; Fri, 27 Jan 2023 11:43:03 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=L4ezA/DdKPqYrQ5cS+Ke2gtcDFDEWQ5+mTqJ9joAzRk=; b=GKKENfMfMG4N0pZCcl9OmPXOIcATJjfZsJmDxgzcwEK7KGqwCsad+AWtcOXvaoC0br qvO01w86MwzjSygGqi8PA6cojX9dXaSV7GQ7IPy4D+7hoAEyyoJoE4vESGbmz//KocjS XInPKVcNcxUU/7v9tYz8Eq3N+worhIjUrPeLT3iGbzfEm71MChhnFLrlat60IF4TyfFg cT/sx9cN16Dq7Csnqgd8AQwbf9+2MwGgrR42iD3xwMMpBpotxh+QKt5ySOsx98KoL0iq FNYiUMi+Hv+PIq3dZA5NAAwxArkgkUL/euE7T+pJZglO6ldDOiv8hb2KxyDrVWXCBL/Q MOaA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=L4ezA/DdKPqYrQ5cS+Ke2gtcDFDEWQ5+mTqJ9joAzRk=; b=aJNtgNfBVDFVITTGk1sfpBqXPwGFJfeWRjHIr0C+EpkZJP/uUnMKjIrtlyoRQ8xbyE k/ejOf/PWYbitVr1vn1ucQyc7ah/yk8ISnqJQmvBy3jbwEyxyq52HgOYn1+1/ZaEhCyb WmAin9d4K0UFhPnzhY5VNinPxM1VV2FtLwIeu62nAopm574UaBAhZjn5RiAH8VEbli5r VU+TlTLLAfZNZdyh2dHrJY5ar4zluW4NNklkrhmiJlYLc1EVW0h+wLJpkOR2GqWtQWgA SR7SzoUKE3eE14v5dvf0janWtMkl8pSh9dzFA47bYaQ9Qy8FWQ6gOugclVOAG+qLIi4B 8+UA== X-Gm-Message-State: AFqh2kp3RUyDPUctKMITWf5kkPCIZ6Cq8ner6woMPoFnPKD7J0RnyEuG LwmHOzqea4N6+5jzPRL/PKwRNXHHi0w= X-Google-Smtp-Source: AMrXdXtaMEzriXRGDDaPyWp6cI2qFr9C55J9V7x8prQAmgawr911NF2wOHqTeINFz8M7FkUN1SViGFe49Jc= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a25:3c9:0:b0:803:3696:c473 with SMTP id 192-20020a2503c9000000b008033696c473mr2168756ybd.224.1674848514947; Fri, 27 Jan 2023 11:41:54 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:54 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-18-surenb@google.com> Subject: [PATCH v2 17/33] mm/mremap: write-lock VMA while remapping it to a new address range From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com, Laurent Dufour Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Write-lock VMA as locked before copying it and when copy_vma produces a new VMA. Signed-off-by: Suren Baghdasaryan Reviewed-by: Laurent Dufour --- mm/mmap.c | 1 + mm/mremap.c | 1 + 2 files changed, 2 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 60038c24d836..b3c247073aa0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3202,6 +3202,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct= **vmap, get_file(new_vma->vm_file); if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); + vma_start_write(new_vma); if (vma_link(mm, new_vma)) goto out_vma_link; *need_rmap_locks =3D false; diff --git a/mm/mremap.c b/mm/mremap.c index 411a85682b58..dd541e59edda 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -623,6 +623,7 @@ static unsigned long move_vma(struct vm_area_struct *vm= a, return -ENOMEM; } =20 + vma_start_write(vma); new_pgoff =3D vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); new_vma =3D copy_vma(&vma, new_addr, new_len, new_pgoff, &need_rmap_locks); --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id D07A6C54EAA for ; Fri, 27 Jan 2023 19:58:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231892AbjA0T6B (ORCPT ); Fri, 27 Jan 2023 14:58:01 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37534 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229776AbjA0T5g (ORCPT ); Fri, 27 Jan 2023 14:57:36 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 25D824A20F for ; Fri, 27 Jan 2023 11:55:34 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id x188-20020a2531c5000000b00716de19d76bso6391663ybx.19 for ; Fri, 27 Jan 2023 11:55:34 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=vIuOo+WD7hhMM8dbfJD4mGHBD08/Cqf6l0kIi97gWpI=; b=iI2a7MepfOBNDqB7wu+/83gKbYOOOUW8grtwa3CiYxFM9JfwE6yM70Ne3Bb3nWcBVw Yu75uY4jTmGekedOn8ngeIwpB83dCuwgdGAmo3PP8/5H9WQZ6zUPSiR4iOLBbFN7215j t5fY8PD6baQLdLtkjh6u6ca2lGsT7fHu5oxzuRWhyEjLoCc6wJIYtxHPj4OPz1R/r154 +v9JTIHiwr1S1nQG1EzjVKOQzPqMVCb1Q8Qi6vYQuYU/8I9rd1kQnHGC5kdzCbnlOWk7 FPBIbkrQ5+p0qEBCYmhl8CWtOia+KC/VA/z2TgOLyA8KXzLYreAkUsa906GcFZG1qYyr SCmA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=vIuOo+WD7hhMM8dbfJD4mGHBD08/Cqf6l0kIi97gWpI=; b=Q+bNhDyyt0TgmdfFWixxRubvRkV0JVtQuIRZEjoVFtEwPCokeMtGgJpAB6vGNCK/Iv auIwJVr7QM34ouwh9SgkIz8KlF/QxrZli5tXnMlmB3mt9amDsLQn7+28tYW9k240saMC Y6UGEWUIJkV5kUWltzkcXzVSFlEBIcTs1W/srBOc0AhMcM/U/PVYF3tk+XVwBnA/ySys azK1kA3l5vWVQJHi5/xPCnUwRbUuUo49L+EQESG4S7itbI9L/Fzr4OX7X+bJj1Ge90rF 2or7hcf5cC57gww/4lhBnUf2QGbC7tqK5tL/RZhDyJB8A1SM6/1B+DchGQtRL/WLERsN w2mw== X-Gm-Message-State: AO0yUKXdcn+owEL6f3C+DlxvoIA7Hw27I29aEq3pQbWRWffDX+HWuU+2 CjRZA5tm1PebCb6hrH0CZiUR8bgNZA4= X-Google-Smtp-Source: AK7set91XCOXqw0Dg5UJpzZX/mqq3ueYFNiPcfvNNp2V4Tl7t8riuyYgZdoB/8yFEdiYxfiOzkRBBcWpyqU= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:3e15:0:b0:506:653c:db97 with SMTP id l21-20020a813e15000000b00506653cdb97mr1675918ywa.201.1674848517274; Fri, 27 Jan 2023 11:41:57 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:55 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-19-surenb@google.com> Subject: [PATCH v2 18/33] mm: write-lock VMAs before removing them from VMA tree From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Write-locking VMAs before isolating them ensures that page fault handlers don't operate on isolated VMAs. Signed-off-by: Suren Baghdasaryan --- mm/mmap.c | 1 + mm/nommu.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index b3c247073aa0..5bdfd087b632 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2261,6 +2261,7 @@ int split_vma(struct vma_iterator *vmi, struct vm_are= a_struct *vma, static inline int munmap_sidetree(struct vm_area_struct *vma, struct ma_state *mas_detach) { + vma_start_write(vma); mas_set_range(mas_detach, vma->vm_start, vma->vm_end - 1); if (mas_store_gfp(mas_detach, vma, GFP_KERNEL)) return -ENOMEM; diff --git a/mm/nommu.c b/mm/nommu.c index 57ba243c6a37..2ab162d773e2 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -588,6 +588,7 @@ static int delete_vma_from_mm(struct vm_area_struct *vm= a) current->pid); return -ENOMEM; } + vma_start_write(vma); cleanup_vma_from_mm(vma); =20 /* remove from the MM's tree and list */ @@ -1519,6 +1520,10 @@ void exit_mmap(struct mm_struct *mm) */ mmap_write_lock(mm); for_each_vma(vmi, vma) { + /* + * No need to lock VMA because this is the only mm user and no + * page fault handled can race with it. + */ cleanup_vma_from_mm(vma); delete_vma(mm, vma); cond_resched(); --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 59F8EC61DA4 for ; Fri, 27 Jan 2023 19:45:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232675AbjA0Tor (ORCPT ); Fri, 27 Jan 2023 14:44:47 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51764 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229908AbjA0TnY (ORCPT ); Fri, 27 Jan 2023 14:43:24 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 58F98241DB for ; Fri, 27 Jan 2023 11:43:04 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-5066df312d7so65762327b3.0 for ; Fri, 27 Jan 2023 11:43:04 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=PbZvlTacNRMtTYgLZEFyk5N8KO4rHMcD13osXtjRKQc=; b=dIXUHHJyt3zldvcJ25OA8M/N2ekAkSadxrM4Vc86G1vAxHUN7FQNbSzlWzbH3EF3Ay Np/mC7lgd8yNrbdx+fJZdOiPs5Ax7Y+isaOSeA0W3slHJVunqRE0R5gvKOYJUJLOV6vy eusS7uxTBSUHthbBE/M+AcF029iuITRiXxkeDAE1bHTcC4qSxwHlMi32ImCbpO9h9EMV +wvjzOskJitBDhg128jcOLxlukATYCWhVPwB6m3ynKWMcyTvnD5SZLfuK2oGD5H+Rl4/ hAABUVAQozzvBCIG2lhb5atfzqUlxx9w7Rjl6rs3/T9zVK0hATJevSe9VLcJ9uJd0aXA HxXQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=PbZvlTacNRMtTYgLZEFyk5N8KO4rHMcD13osXtjRKQc=; b=OKM2kmpli9KDeUFRuIAXDrdxpZOJYp8dv2/63eL69ttgejgCS+EsZUlJA4VrOOM3UW BL6G0wRFMHsNk1RSDZpx7WDYC7LrH0rVKJWGcmsY8VHIWtwQpqJ4aLIfBI6cAG2v9bMy E0oR0cNENUDepWaos+sR3+hfXD5sKgvGkPXgluCn2DWtZKBJPnEXTqqYsHXgiDKUVeav UNRt56YcZS+ZLX0qqyBsqkdp+2x+u5ZHGvxD5XU2TLpy2GwtXE8SpSCX/FyQEOU/XfCn R3mmYNA/ge/p7O/+OeviQeNO2G+7vVqgektrIj5L4DEu38jg3hpfTEeflrqe1/RlAnBG kqKQ== X-Gm-Message-State: AFqh2ko0Ble8j1LpeqmY2aFD5MF7qmLw1UYTHcYIgDrHvynN3MT5BQrb jujQt567/JM1MpY/Fv0KdTbje95qbmo= X-Google-Smtp-Source: AMrXdXtxOfNby/WJjyD0PPf8WyFmfovtGs47EqfediusuKgjOEAtqZl1SXyt3bJWMQsRbQEhpFz5LQpAf70= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:cd4:0:b0:4dc:e929:40fa with SMTP id 203-20020a810cd4000000b004dce92940famr5633708ywm.331.1674848519684; Fri, 27 Jan 2023 11:41:59 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:56 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-20-surenb@google.com> Subject: [PATCH v2 19/33] mm: conditionally write-lock VMA in free_pgtables From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Normally free_pgtables needs to lock affected VMAs except for the case when VMAs were isolated under VMA write-lock. munmap() does just that, isolating while holding appropriate locks and then downgrading mmap_lock and dropping per-VMA locks before freeing page tables. Add a parameter to free_pgtables for such scenario. Signed-off-by: Suren Baghdasaryan --- mm/internal.h | 2 +- mm/memory.c | 6 +++++- mm/mmap.c | 5 +++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 90bb2078444c..52d7e9c2e58f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -105,7 +105,7 @@ void folio_activate(struct folio *folio); =20 void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long floor, - unsigned long ceiling); + unsigned long ceiling, bool mm_wr_locked); void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte); =20 struct zap_details; diff --git a/mm/memory.c b/mm/memory.c index 7a04a1130ec1..d48c76e9fa57 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -348,7 +348,7 @@ void free_pgd_range(struct mmu_gather *tlb, =20 void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *vma, unsigned long floor, - unsigned long ceiling) + unsigned long ceiling, bool mm_wr_locked) { MA_STATE(mas, mt, vma->vm_end, vma->vm_end); =20 @@ -366,6 +366,8 @@ void free_pgtables(struct mmu_gather *tlb, struct maple= _tree *mt, * Hide vma from rmap and truncate_pagecache before freeing * pgtables */ + if (mm_wr_locked) + vma_start_write(vma); unlink_anon_vmas(vma); unlink_file_vma(vma); =20 @@ -380,6 +382,8 @@ void free_pgtables(struct mmu_gather *tlb, struct maple= _tree *mt, && !is_vm_hugetlb_page(next)) { vma =3D next; next =3D mas_find(&mas, ceiling - 1); + if (mm_wr_locked) + vma_start_write(vma); unlink_anon_vmas(vma); unlink_file_vma(vma); } diff --git a/mm/mmap.c b/mm/mmap.c index 5bdfd087b632..57cb3a2ac9b1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2157,7 +2157,8 @@ static void unmap_region(struct mm_struct *mm, struct= maple_tree *mt, update_hiwater_rss(mm); unmap_vmas(&tlb, mt, vma, start, end, mm_wr_locked); free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, - next ? next->vm_start : USER_PGTABLES_CEILING); + next ? next->vm_start : USER_PGTABLES_CEILING, + mm_wr_locked); tlb_finish_mmu(&tlb); } =20 @@ -3069,7 +3070,7 @@ void exit_mmap(struct mm_struct *mm) mmap_write_lock(mm); mt_clear_in_rcu(&mm->mm_mt); free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, - USER_PGTABLES_CEILING); + USER_PGTABLES_CEILING, true); tlb_finish_mmu(&tlb); =20 /* --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6C55EC54EAA for ; Fri, 27 Jan 2023 19:45:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232735AbjA0Tow (ORCPT ); Fri, 27 Jan 2023 14:44:52 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52018 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232218AbjA0Tng (ORCPT ); Fri, 27 Jan 2023 14:43:36 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AC901241F3 for ; Fri, 27 Jan 2023 11:43:04 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id r8-20020a252b08000000b007b989d5e105so6377360ybr.11 for ; Fri, 27 Jan 2023 11:43:04 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=1VGgj1TmnjNrGwceq1Y9mwtgHdomU/ZJ3zMBBp7twvM=; b=kcxorgjDvieL4H0gsZ4o6ev4StdVYsVDA7GZVrIORQdjZKoFiGI1KoZQdzzvMshMrE snI0AzaYSvDLC9zEn2IBQumHO6CKLkrL/i3wJ9ueET6bm8oRn7QEPBX+BM7UODPO3hzZ U4MGRqXmViJMsS4HDNU6vnAmwplZNvnqe2D+qu3fGFBMwOJOM+QBc0aicDuQnfczJKX+ vPW/+j1Kiez+csRZc1jn4fVsV2fgBwTNZpRX4guJZVX8gsy3dFik2ggDGPcziixMOIIZ ZlSkjqSVG2I0FEWcD7z9JqxBzo/zAlN1m8/Qkt+nlr6qzU3SYltCmGn1Rp59psmpvaAD Fzjw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=1VGgj1TmnjNrGwceq1Y9mwtgHdomU/ZJ3zMBBp7twvM=; b=3iK6K7ENcx0k/Repn0MSoWvvLTAkiaupoqzbJ2J0EsnnXLCAuzJqYfQQrRP6M7tcWS 1+duRey7zR0EK6d4M6MIZf4HwWIlZNJUmHOrefqK4n3Zz+AsNDIoUJKbehzVNPUnic+f 3GOCMoKT4NWQdH+wrdW5qF5ibSVguw3mP6gbZRUAr5loUpU2Tzq7B0zMguGvtOiEePq8 OtpZKFfu2GGuy0MJloCPFF123Xb80fcWttjRiXSEJPQSPFYoDO7TJj2NwsjhbZdlm5je OPmP9dPsnFRqpYPaVKenngglK5pMLJ+RalIVjr89XF0ICpTBB2d0tw2mjemdF3QrtSYH LITw== X-Gm-Message-State: AO0yUKXLeTr7jBoLd3GntH8blmAjJh47+3B5S/nKd1LE/QKrg0aZutaT NggF7seh9q3XZlgAWmAGbWxu5pS+uiY= X-Google-Smtp-Source: AK7set/jPsOwzlm+kUPz+CABbySieLCP3cL/8jypOFocWiEQOE8fcbH4Z91B+4ltIKP12TkxYenF+FhrAT0= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a0d:cb8b:0:b0:50b:1b97:f19a with SMTP id n133-20020a0dcb8b000000b0050b1b97f19amr434148ywd.164.1674848522027; Fri, 27 Jan 2023 11:42:02 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:57 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-21-surenb@google.com> Subject: [PATCH v2 20/33] mm/mmap: write-lock adjacent VMAs if they can grow into unmapped area From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" While unmapping VMAs, adjacent VMAs might be able to grow into the area being unmapped. In such cases write-lock adjacent VMAs to prevent this growth. Signed-off-by: Suren Baghdasaryan --- mm/mmap.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 57cb3a2ac9b1..3baf218836bb 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2399,11 +2399,13 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struc= t vm_area_struct *vma, * down_read(mmap_lock) and collide with the VMA we are about to unmap. */ if (downgrade) { - if (next && (next->vm_flags & VM_GROWSDOWN)) + if (next && (next->vm_flags & VM_GROWSDOWN)) { + vma_start_write(next); downgrade =3D false; - else if (prev && (prev->vm_flags & VM_GROWSUP)) + } else if (prev && (prev->vm_flags & VM_GROWSUP)) { + vma_start_write(prev); downgrade =3D false; - else + } else mmap_write_downgrade(mm); } =20 --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7D554C38142 for ; Fri, 27 Jan 2023 19:45:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232244AbjA0TpU (ORCPT ); Fri, 27 Jan 2023 14:45:20 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51566 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232287AbjA0Tns (ORCPT ); Fri, 27 Jan 2023 14:43:48 -0500 Received: from mail-pl1-x649.google.com (mail-pl1-x649.google.com [IPv6:2607:f8b0:4864:20::649]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8254D2FCC4 for ; Fri, 27 Jan 2023 11:43:05 -0800 (PST) Received: by mail-pl1-x649.google.com with SMTP id z10-20020a170902ccca00b001898329db72so3335005ple.21 for ; Fri, 27 Jan 2023 11:43:05 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=Cy3b4YdYe1JheypyM1qEqlntx+LNPOiVtlvqoTpRTGk=; b=oI0LJkxWJWb4FwrADuA4kk3Z1VuG/PqmB+wfudzVRf1FutZSssy7j1ZDpJRsJeC2x6 AK+2fBcgvA6Z1yebvss3n+ODruOq7UlamXlS7DfsbRapPdytBdlVYqH5VQCwBS0bW5Cs 14NyRgqd5c0dS96F3WtaSc6UYIsY2Q866sZsgDc9kakHc8RR5OiJX50DOvF/naV3sW35 K5NQwDrUGeBVt7kutcZKVO5xLo4MSKoGaFWDyWGZ3caTDR6ZBpyZR8N/O+LnlyPsEjZa UEkxm7VSV5agQqzuF3YgYMg0fZBeMdeXvS4WgRk9vqB/9OKyGK8SEZXO7u7cVN0V9JOR wUhw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=Cy3b4YdYe1JheypyM1qEqlntx+LNPOiVtlvqoTpRTGk=; b=nH0a9Tz1hFZgirjd/YDxR0lQAY6YbZoxaOIZcQiFuxjz9z6rP/0B6fLXepvpCVKxFS lMualyArBM3DCZ/JaMTB+CYUHEDYNxjCPyQXsp4O1ZvJddq9nb9AhjdNsyk4ZvB0OaY0 KikJn5wc9cV+nqkK5wZ37Gv5mOXeZJA3amaqLoIJFlczkIE3kg/5D36YXMG6zzAr7MYw OIISF59YAnZUX01QKZMdvfnp2HJGuwQbooaC/L/MI6fJYdcmhOdVUgeMEYeKOBTLyPhw 6Ked58SwVaMC+thfsr+k2kXN059RdymwtjfJbVRbDMGUsam1IxI/G9Fe+omduRw6veTh x0NA== X-Gm-Message-State: AO0yUKUu6SI5c84gN2C4n7bggG9LLXw6mskn9v40zGwF3RRmIqTQ6UDz yghLAPPUryuIOKvnNjmGsQDJveHOjI0= X-Google-Smtp-Source: AK7set/yd1hcYRvbMqSOfXS0bVRoPLBLaG1qWLcI1OwZ+QZcbDYXkFqGvC1aaqenThgkEbN36zZqWHoF630= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a63:ba1a:0:b0:4dc:3b99:6f67 with SMTP id k26-20020a63ba1a000000b004dc3b996f67mr1091417pgf.47.1674848524282; Fri, 27 Jan 2023 11:42:04 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:58 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-22-surenb@google.com> Subject: [PATCH v2 21/33] kernel/fork: assert no VMA readers during its destruction From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Assert there are no holders of VMA lock for reading when it is about to be destroyed. Signed-off-by: Suren Baghdasaryan --- kernel/fork.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/fork.c b/kernel/fork.c index 9141427a98b2..a08cc0e2bfde 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -491,6 +491,9 @@ static void vm_area_free_rcu_cb(struct rcu_head *head) { struct vm_area_struct *vma =3D container_of(head, struct vm_area_struct, vm_rcu); + + /* The vma should not be locked while being destroyed. */ + VM_BUG_ON_VMA(rwsem_is_locked(&vma->lock), vma); __vm_area_free(vma); } #endif --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8CE29C61DB3 for ; Fri, 27 Jan 2023 19:45:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232800AbjA0To6 (ORCPT ); Fri, 27 Jan 2023 14:44:58 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51564 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232244AbjA0Tng (ORCPT ); Fri, 27 Jan 2023 14:43:36 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6DECC2DE4B for ; Fri, 27 Jan 2023 11:43:05 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-4c8e781bc0aso66308987b3.22 for ; Fri, 27 Jan 2023 11:43:05 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=mMW5WtXlwQRVw5jLv0uMPkgP2YoyRT4XQf8L1G2g0sM=; b=mdQhCODmER/WcuXnOwjGOPGsluXP611R3K8yAAqGfZO+eojPR0iLkDWsNYoGq20Pxc pLGndC0gNc0k/+qBk7J/uFwYPbCjsW7XmlHDcZCSFndQyDIXmYjY6yxKamOAoeP6UD5R 10GuF5qfONyUFHlhJRc27yht8CtL7+xZMg96kBN6doFn/8DSebIrGsIvYM1XfrVRdrIT Vzd4VicNtC2Hx0ebeUSEC751adwD7ZH4dP9Tg/rHSx32WXBUDV2TcmMYgTc4y3CEq8j+ 2IuM945ICEGJ6/3bquYB/9nBx6UvS9o2Tl6iob7NJEFRIs2YxCbcwKtUsh6O7/MdtiEH lFnw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=mMW5WtXlwQRVw5jLv0uMPkgP2YoyRT4XQf8L1G2g0sM=; b=YIac+PxJGZptkH5hsFdPViXAs5WSWvfp2/AQyEb+fMGNv9+3L3/+6EgX+YxG1xIF/H 0zm5S457OYuR9+PXprRr0Gf75fVyCu/0QQRdXkWI0StOn7TEB8Qou/ncjlyt8JTwVERB uDfE7Xzv2HpeCfUyxQ90K5UDTGZpLXEueCNtrpgr8kcwnBFOdcdmunv9smmna4EbfaqL 1zPpCGX6pGnJzRSYkFbIbHIHqFtSobDA9dxhZM4byM1nGOAJlMXxymLKEnGFf+U+5zi0 MGidSYdSrIhMRE/WIHKrnc/Us1tgO+iGGI0Id7h96AHkTnFrrDliOUHVNW5JufhiI8PI N8Fw== X-Gm-Message-State: AFqh2kpOoiFZo+llAgVkjhK3CHgBN1G5zbNvyIFbn37Xyog9Z4akpYvn St41weWyf/OgjepfEUBWZUNw9W+CmH0= X-Google-Smtp-Source: AMrXdXvm/6CR5hD1SSLODW5O6UqAFkTGTyEvJF4A+5Su+1PwVS4k4WIa/asYlNCGXFd3Q9zHSTloGdPuiKg= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a25:1fd4:0:b0:7d4:712e:2852 with SMTP id f203-20020a251fd4000000b007d4712e2852mr4289060ybf.628.1674848526673; Fri, 27 Jan 2023 11:42:06 -0800 (PST) Date: Fri, 27 Jan 2023 11:40:59 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-23-surenb@google.com> Subject: [PATCH v2 22/33] mm/mmap: prevent pagefault handler from racing with mmu_notifier registration From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Page fault handlers might need to fire MMU notifications while a new notifier is being registered. Modify mm_take_all_locks to write-lock all VMAs and prevent this race with page fault handlers that would hold VMA locks. VMAs are locked before i_mmap_rwsem and anon_vma to keep the same locking order as in page fault handlers. Signed-off-by: Suren Baghdasaryan --- mm/mmap.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 3baf218836bb..3d0cfbc92745 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3501,6 +3501,7 @@ static void vm_lock_mapping(struct mm_struct *mm, str= uct address_space *mapping) * of mm/rmap.c: * - all hugetlbfs_i_mmap_rwsem_key locks (aka mapping->i_mmap_rwsem for * hugetlb mapping); + * - all vmas marked locked * - all i_mmap_rwsem locks; * - all anon_vma->rwseml * @@ -3523,6 +3524,13 @@ int mm_take_all_locks(struct mm_struct *mm) =20 mutex_lock(&mm_all_locks_mutex); =20 + mas_for_each(&mas, vma, ULONG_MAX) { + if (signal_pending(current)) + goto out_unlock; + vma_start_write(vma); + } + + mas_set(&mas, 0); mas_for_each(&mas, vma, ULONG_MAX) { if (signal_pending(current)) goto out_unlock; @@ -3612,6 +3620,7 @@ void mm_drop_all_locks(struct mm_struct *mm) if (vma->vm_file && vma->vm_file->f_mapping) vm_unlock_mapping(vma->vm_file->f_mapping); } + vma_end_write_all(mm); =20 mutex_unlock(&mm_all_locks_mutex); } --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C4E21C38142 for ; Fri, 27 Jan 2023 19:45:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231879AbjA0Tp1 (ORCPT ); Fri, 27 Jan 2023 14:45:27 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50640 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229828AbjA0ToC (ORCPT ); Fri, 27 Jan 2023 14:44:02 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C2CDB1E1FF for ; Fri, 27 Jan 2023 11:43:06 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-4b34cf67fb6so65252337b3.6 for ; Fri, 27 Jan 2023 11:43:06 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=5PvsAEkfbXSvdKN+lTJ9M9+vqfLQVBlihpxSKUw/t/s=; b=kp92vFUO2N4rCYUenom2PgB/dU7qqaylHAcH6HVVMYT5QMpL/oqQ/DdOm4bVNlYLNw 9rNSkse+dfgOJKwj09yKdpzV2qc7upA8mjHmEpxt3RKsaa3pDRks1QGh3Nb2hG4bp/jU 3eSArvNsdqtxNb6zpVi6gl2vf0iZr8y+PtaslVXMeV9Pu9xS/EmrXvCiajKJa9iCZe4S LpK/JO6vbmew4JxbSzbZQyffZL4sOt9sBlPWbT9WmPY7+pp+Ff0i+9BT7l4WdUaIrrgA btwhF2XkX8IUP7VpoyJ9OAYm06ddrZftAFhV0gBzofVRmYYEJ8dT0O9gMwk7XMh0b2Ct qw3A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=5PvsAEkfbXSvdKN+lTJ9M9+vqfLQVBlihpxSKUw/t/s=; b=XqblgBp0ZkG/sgPug/J34bMEfsbtHxL6yOVv+f7o/BSbKodPd6q7cCfA8u461LY5Pv dlK2okGts7uCHAyqMC96o+jPafmSkVuCMIsP65hwWTj08ZfNYWALbmOqYiy1bBRMkQFk W1WRaPTar3im9hcu5qFc4bRz9/S+068dUnpH18sk5TuaRQdc9aWhZwADGBSbGcbUj5ii UV4NGTciJVwmi9KsnDHDS8AxetTQ1AsJhntQfOcQpG12y1r+3am15vYq9gaPJzVouu9O ijiY8n7styqBu+E/wXo8t9aD6owrLAttN8XZ6sQ3xeSZD/vWfnOr0ogsBJ2nkhve/QG+ D2Uw== X-Gm-Message-State: AO0yUKWye3Is3CNTLv9r7Ip5yfYI4YB4Ybd6JRFHC6sCBuZVWc10Su/T JS1vnUejk8RzDriL5IK+Q+EjBwebZN4= X-Google-Smtp-Source: AK7set8zsvT5wkP/7moSYA7dIHSCpHQad2RzXlV5c0Wh0zEZrq5yaOQZsQ7iykW9DnqHdXZz8PLHMfkZLc8= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:6d2:0:b0:50e:e91d:f47 with SMTP id 201-20020a8106d2000000b0050ee91d0f47mr27189ywg.61.1674848529047; Fri, 27 Jan 2023 11:42:09 -0800 (PST) Date: Fri, 27 Jan 2023 11:41:00 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-24-surenb@google.com> Subject: [PATCH v2 23/33] mm: introduce lock_vma_under_rcu to be used from arch-specific code From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Introduce lock_vma_under_rcu function to lookup and lock a VMA during page fault handling. When VMA is not found, can't be locked or changes after being locked, the function returns NULL. The lookup is performed under RCU protection to prevent the found VMA from being destroyed before the VMA lock is acquired. VMA lock statistics are updated according to the results. For now only anonymous VMAs can be searched this way. In other cases the function returns NULL. Signed-off-by: Suren Baghdasaryan --- include/linux/mm.h | 3 +++ mm/memory.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 66dca140695e..fa2b9d6e665e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -687,6 +687,9 @@ static inline void vma_assert_write_locked(struct vm_ar= ea_struct *vma) VM_BUG_ON_VMA(vma->vm_lock_seq !=3D READ_ONCE(vma->vm_mm->mm_lock_seq), v= ma); } =20 +struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, + unsigned long address); + #else /* CONFIG_PER_VMA_LOCK */ =20 static inline void vma_init_lock(struct vm_area_struct *vma) {} diff --git a/mm/memory.c b/mm/memory.c index d48c76e9fa57..5568fcb0a46b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5222,6 +5222,57 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vm= a, unsigned long address, } EXPORT_SYMBOL_GPL(handle_mm_fault); =20 +#ifdef CONFIG_PER_VMA_LOCK +/* + * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed = to be + * stable and not isolated. If the VMA is not found or is being modified t= he + * function returns NULL. + */ +struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, + unsigned long address) +{ + MA_STATE(mas, &mm->mm_mt, address, address); + struct vm_area_struct *vma, *validate; + + rcu_read_lock(); + vma =3D mas_walk(&mas); +retry: + if (!vma) + goto inval; + + /* Only anonymous vmas are supported for now */ + if (!vma_is_anonymous(vma)) + goto inval; + + if (!vma_start_read(vma)) + goto inval; + + /* Check since vm_start/vm_end might change before we lock the VMA */ + if (unlikely(address < vma->vm_start || address >=3D vma->vm_end)) { + vma_end_read(vma); + goto inval; + } + + /* Check if the VMA got isolated after we found it */ + mas.index =3D address; + validate =3D mas_walk(&mas); + if (validate !=3D vma) { + vma_end_read(vma); + count_vm_vma_lock_event(VMA_LOCK_MISS); + /* The area was replaced with another one. */ + vma =3D validate; + goto retry; + } + + rcu_read_unlock(); + return vma; +inval: + rcu_read_unlock(); + count_vm_vma_lock_event(VMA_LOCK_ABORT); + return NULL; +} +#endif /* CONFIG_PER_VMA_LOCK */ + #ifndef __PAGETABLE_P4D_FOLDED /* * Allocate p4d page table. --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1B6BDC61DA4 for ; Fri, 27 Jan 2023 19:45:50 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232239AbjA0Tpr (ORCPT ); Fri, 27 Jan 2023 14:45:47 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51668 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232372AbjA0ToU (ORCPT ); Fri, 27 Jan 2023 14:44:20 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5E5BF53B0C for ; Fri, 27 Jan 2023 11:43:13 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id d21-20020a25add5000000b007ff8112e8b8so6430225ybe.17 for ; Fri, 27 Jan 2023 11:43:13 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=gSaEMik01Wf1+Bdt2RmdUMjgYY7lDp9hLxJUYqXXyyM=; b=rcBVm8w95my/f0ebkBHOBSdk/g3dXIBnrvyKhE+0lfp8SBBL9UDohndGfm9prz69Nb 1VsGdk5hVjbZdd99cfs1kRzqndkhM72YtTrleRtcoKwDWYV7T956W4hi9P14DNVnfcTE 4dzd8voX/MwzX+niWcYh6zC2gtPII1RsQu2W+o1SzbH/VUzyZkZ+K0mbi6bCuvIO2r/9 q8JUw2FMeYJjbLjibdrizaV+CI77RGRlLfF03fxNpzuIRE1ymkIpYCAH7a7Vh5p4LTWM djOGRW9GCrBrxw2Z+lpBSKPiwjL/b62fESDSi0yRTpPNMfCaBkk0gvqmlpNjM1674+ZN MkCw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=gSaEMik01Wf1+Bdt2RmdUMjgYY7lDp9hLxJUYqXXyyM=; b=r15GmN1poUZ4dQgu9keNOfHmA9pK8DE5sJA1vaiMC5qTL247YoHvDo7oijSFx/35bd JZp4584jIVWImblKL97t9Yurl7aJFXrxohImUMRERGgABrzmPeGakcBKkd3BfR8AoF/Q 1WTcHq7Kv9pRfr1hAErsHoMhrGQrTQHoT75UaPzIjwrCEwcjTM+5K8uNVyeD1iI/6wIc w7yPO1JSxH7hXSiEwz4pSRzTpNLH0Vmr8kohycaCya4GS7jTDWoURVmIy842vrj7AIG0 GRv4sO29D9bM2ThxDQf9xC+DH4EtAqFtXoonRV3XHrEQ7pKOwheMbmCwr7kSUVXoz1Pn ydmg== X-Gm-Message-State: AO0yUKX0Pkwt6LNpxFwq8GsuuFWMUovf3ja40//j+NJeT2eRrZTWL95I 4KXGpjQvMO3HClo9LM4f0VhRaI/EMY0= X-Google-Smtp-Source: AK7set9ZuZbDGpVLmgenU71+ffHh46VvnF49YmmPOU0QZTkBBursiC5h0isQQWJJC0fjjZLxRphOeOdStg0= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a0d:d8d5:0:b0:508:1348:3e54 with SMTP id a204-20020a0dd8d5000000b0050813483e54mr1003857ywe.440.1674848531551; Fri, 27 Jan 2023 11:42:11 -0800 (PST) Date: Fri, 27 Jan 2023 11:41:01 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-25-surenb@google.com> Subject: [PATCH v2 24/33] mm: fall back to mmap_lock if vma->anon_vma is not yet set From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" When vma->anon_vma is not set, page fault handler will set it by either reusing anon_vma of an adjacent VMA if VMAs are compatible or by allocating a new one. find_mergeable_anon_vma() walks VMA tree to find a compatible adjacent VMA and that requires not only the faulting VMA to be stable but also the tree structure and other VMAs inside that tree. Therefore locking just the faulting VMA is not enough for this search. Fall back to taking mmap_lock when vma->anon_vma is not set. This situation happens only on the first page fault and should not affect overall performance. Signed-off-by: Suren Baghdasaryan --- mm/memory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 5568fcb0a46b..593548f24007 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5244,6 +5244,10 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_= struct *mm, if (!vma_is_anonymous(vma)) goto inval; =20 + /* find_mergeable_anon_vma uses adjacent vmas which are not locked */ + if (!vma->anon_vma) + goto inval; + if (!vma_start_read(vma)) goto inval; =20 --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 225A0C38142 for ; Fri, 27 Jan 2023 19:45:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232093AbjA0Tpk (ORCPT ); Fri, 27 Jan 2023 14:45:40 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51634 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232096AbjA0ToH (ORCPT ); Fri, 27 Jan 2023 14:44:07 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E399459249 for ; Fri, 27 Jan 2023 11:43:09 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id x8-20020a25e008000000b007cb4f1e3e57so1753144ybg.8 for ; Fri, 27 Jan 2023 11:43:09 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=2v/Evza9dkU4Ne98Lm6HAWxL8/YazIwD41pmx7joRkE=; b=mLwUPJ0nc0ulmmwFoyR9amwtEYn4GV75pogWN7ijAK31MqrZCBgfhijsONjhepzopP 2wYY2SkQatHOd+vKpDSxYxHFT1IvhzZTwDWuTcrFWygojUbcZe7xPlEiO9BQnq0w9fKe z4cHjJJqSwoele+PetnaYF8lewRv+be+h1Bgt+SNNsmNqcyEHhiyYbNRieovHRIQnY7Q KgzOHSrbk4QlZX3GRmHUucd8wrsetfG2ey7u2AbUsjqg+O7jZuWXERmenSwIYYomTRZT 0AcxNZ8Sif0L8fvqzJHXb63zST/Gg9+vMF9oOa1KQZBohhw7ZSFfQ9eLw7FWbw/nE4Qf wodg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=2v/Evza9dkU4Ne98Lm6HAWxL8/YazIwD41pmx7joRkE=; b=QBkDh/fgibMqfPYkzOeT4m1VgVC0juOZbAcCoOZ57g5udLbkyN890b1oCsdQAxOM3f gTddq7va9/XEZLpWB0rvdUzsS1Ez7x6STtZbhOs6vp5q1Z5Tw2k4gEaJ4Y5rs2Lvd04V Gqzf5TK47TTQ0C7fXMm/2lyz87dWNrgkF8Q4mSNgF895XWd+mrilrNgU77Mgvkelnrx+ Gcam0vhfYHHtRkPHRP8TUUxeD5zme/V42WJAtm7R8CvetJ1l9zWBc9g80X1RuPo1udEq Z+RSyOSXRn4Q48tvMDOb73cjBfdHRveDTo3vFwDGFwR8uaNC6YqRuvBDXkXse3Yf09LQ Izsw== X-Gm-Message-State: AO0yUKWk9EJ52y4Xbqc+d5p7fidh16VmtoDUmMa4x23EsKdkEbGIW74d 2SSj60s/svTQmHfShakIYk8cjkmdSS0= X-Google-Smtp-Source: AK7set9kKqDiJe5U7EOGuYIrwgRntRqTHzt+zr2t5qOdc5+YeQS/lL3tZ/xnNoww1dZdAcqE9B1Pln9KCVk= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:b2c6:0:b0:509:5b20:ff22 with SMTP id q189-20020a81b2c6000000b005095b20ff22mr917640ywh.328.1674848534136; Fri, 27 Jan 2023 11:42:14 -0800 (PST) Date: Fri, 27 Jan 2023 11:41:02 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-26-surenb@google.com> Subject: [PATCH v2 25/33] mm: add FAULT_FLAG_VMA_LOCK flag From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com, Laurent Dufour Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a new flag to distinguish page faults handled under protection of per-vma lock. Signed-off-by: Suren Baghdasaryan Reviewed-by: Laurent Dufour --- include/linux/mm.h | 3 ++- include/linux/mm_types.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index fa2b9d6e665e..a6de58bb40c7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -472,7 +472,8 @@ static inline bool fault_flag_allow_retry_first(enum fa= ult_flag flags) { FAULT_FLAG_USER, "USER" }, \ { FAULT_FLAG_REMOTE, "REMOTE" }, \ { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \ - { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } + { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }, \ + { FAULT_FLAG_VMA_LOCK, "VMA_LOCK" } =20 /* * vm_fault is filled by the pagefault handler and passed to the vma's diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 88619c6a29a3..c4c43f10344a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1053,6 +1053,7 @@ enum fault_flag { FAULT_FLAG_INTERRUPTIBLE =3D 1 << 9, FAULT_FLAG_UNSHARE =3D 1 << 10, FAULT_FLAG_ORIG_PTE_VALID =3D 1 << 11, + FAULT_FLAG_VMA_LOCK =3D 1 << 12, }; =20 typedef unsigned int __bitwise zap_flags_t; --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2DCECC54EAA for ; Fri, 27 Jan 2023 19:46:59 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232625AbjA0Tq5 (ORCPT ); Fri, 27 Jan 2023 14:46:57 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51614 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232574AbjA0TqY (ORCPT ); Fri, 27 Jan 2023 14:46:24 -0500 Received: from mail-yb1-f202.google.com (mail-yb1-f202.google.com [209.85.219.202]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8841A91FA5 for ; Fri, 27 Jan 2023 11:44:06 -0800 (PST) Received: by mail-yb1-f202.google.com with SMTP id g138-20020a25db90000000b0080c27bde887so2635506ybf.18 for ; Fri, 27 Jan 2023 11:44:06 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=sBLmwmiYmTj+OGGVUcLxhx0PU/jBUoUEJpANG8soAfU=; b=KWtfJ/7xIlPNyMbJgMly5Sn2+77Pi3kT3bIKcXhSEYdAbccLPUNTncstNEX89dvAhW NM9ImIflxrHOdJ1Mj5fWozRK69MXYGw+crQDfh9Oftap/iRU1kAVPiBZhr/+UuvLj4+U 4LLcR3mdcWgS75oxY+Q2vGwsrXQ6jEpwNWpRZ0m4MKHrCT8iGAtEZyFhrSxdE7CjPr88 djL5aWWS3cMBdV/JlcmaRg0LM4Y33l8LwLfoSQoWCllE6I7RGCcpBFSNwKknGwVlLQii i5R/yajGZAULuwac7zP2AeGWc0Ux4Tm1rMqU81PAQU3nWhlQVK4py4JD6fzjGBwa6RFE gFWQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=sBLmwmiYmTj+OGGVUcLxhx0PU/jBUoUEJpANG8soAfU=; b=EScMkfmTrvjSIQhckS3et5hFdVMdAQ7t4NJZWmp79+IeUbRbSrqb2B0WABMWgBOpdG J2E0Wa+I4RowSzWJnaJ7rQHtpz8WzjPBdjAHHM4YdJ59cu1e3m3qFO4i/MKt4QNkXivd ZXWAYvICxo8UububZK1L9JiHP4r+5TFbesLHIamHVSzqdf3ULzXGdQUqdFa6PsEjEwZP 2TDd8gZq+nLNft2mMMeCsQzwBED3wQhhZFDLbhws9wAWP+UBFmCBe5su9Vi2mVi8vBcm G1Roq8+MxAuKZ9aLnpUtbk1cstW3YlsFStCeusgKsDqef4uYOIH657uh54SUEin1xs8j H8fw== X-Gm-Message-State: AFqh2krZtpaFVvgH8g42FNH/X+FR9Z6+3Eks08phyrv5mHqSg77s5lbJ rXT7yfH8enFdw3wPudX4v2Y5Kdz34p8= X-Google-Smtp-Source: AMrXdXtj44VmRN6kQLB49pXglCfjpmg9iXCqYgU9gl6+Dl4v+hAma4/0PMDtIg4t8IMM3q4b9OalQWjETEg= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:cd7:0:b0:4dd:9ed1:fba0 with SMTP id 206-20020a810cd7000000b004dd9ed1fba0mr5011323ywm.265.1674848536283; Fri, 27 Jan 2023 11:42:16 -0800 (PST) Date: Fri, 27 Jan 2023 11:41:03 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-27-surenb@google.com> Subject: [PATCH v2 26/33] mm: prevent do_swap_page from handling page faults under VMA lock From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com, Laurent Dufour Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Due to the possibility of do_swap_page dropping mmap_lock, abort fault handling under VMA lock and retry holding mmap_lock. This can be handled more gracefully in the future. Signed-off-by: Suren Baghdasaryan Reviewed-by: Laurent Dufour --- mm/memory.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 593548f24007..33ecc850d3cb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3690,6 +3690,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!pte_unmap_same(vmf)) goto out; =20 + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + ret =3D VM_FAULT_RETRY; + goto out; + } + entry =3D pte_to_swp_entry(vmf->orig_pte); if (unlikely(non_swap_entry(entry))) { if (is_migration_entry(entry)) { --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2B69FC38142 for ; Fri, 27 Jan 2023 19:45:35 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232021AbjA0Tpe (ORCPT ); Fri, 27 Jan 2023 14:45:34 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50906 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232062AbjA0ToH (ORCPT ); Fri, 27 Jan 2023 14:44:07 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 32A505CD1A for ; Fri, 27 Jan 2023 11:43:09 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id x8-20020a25e008000000b007cb4f1e3e57so1753366ybg.8 for ; Fri, 27 Jan 2023 11:43:09 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=dtbb/rgJDVPG5hb8ydKzjEL35WMZYcFrpyL4j03oF8s=; b=GSbfUJd90yi2xMLpxJwkNIQoO7AYaRuo1p0W3egBrsgFnTlql/QObCOWtQyWcyCJri L9IkPEewPotUja2mT2dWXCpgA9vVmlCVPbr2KFEVViPcTyej+SHFX8+fhAEaOP6xtrM/ x5eCKOKsLW06Glc5jxntDrg/hIgANLyaiKO4UzofC0FOHh/fDzJT0Gg9GuJf/nEgmEZV hix6GjSSYXSWndo4QypjFazRowiaazjUscTvm2uHMbtPJDABgUnEX51b4PI7vURt3LgP X4xLvpH0mH+5Sv/7D+0hQE/rAXv1nGKdGusu8zdXbWRFOMqJp4iPgA6p21w37iQT/eTP bctw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=dtbb/rgJDVPG5hb8ydKzjEL35WMZYcFrpyL4j03oF8s=; b=qOUIpBPL+rpft3hZySD7KKTVnepWMqlG6hLPEcKmV4cggdRv1DCq+7V1plUbCs951X lxMH/szkt80Br8JxeG/hL0bKBtnz+4Box8316nGao3F1fjd8lEUK6ofTduGEO1YctdHS ejvqbRHp/TIV1v5Q+Y2zmBbFzAG4QwyhrGqUObzpVIDH8EeMjkhYzAA6rZGeS/yA+JBc ggcv7ztumPD6pY+APTxeGzXS2QpTmAJjOqmhtZuLAZnWLN7He9ZsbLfSZO0lfcYZeC1a NKMWQh7o2pLJ9oL98hwmBqsiH5GimfUl5sNpxN1iOqz//I99LkjJt4JRNxQV2DecaMl4 oMSw== X-Gm-Message-State: AFqh2kqfZfJYOvJouHXBZLzvGYCzq190xmj8L7HZdkMWzzivaLCejg0S KEZOYTGRFj2I+TAVWWInPflYWhwxsJM= X-Google-Smtp-Source: AMrXdXszwUWrk3+ezT3vpCagEl0mvWZa9sEHax26SYGf7QapJl4fBzpxwL0/6TcVCB7ktvgIaczCkSx6r1s= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a0d:fc86:0:b0:4db:3ac0:78a3 with SMTP id m128-20020a0dfc86000000b004db3ac078a3mr5702708ywf.266.1674848538548; Fri, 27 Jan 2023 11:42:18 -0800 (PST) Date: Fri, 27 Jan 2023 11:41:04 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-28-surenb@google.com> Subject: [PATCH v2 27/33] mm: prevent userfaults to be handled under per-vma lock From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Due to the possibility of handle_userfault dropping mmap_lock, avoid fault handling under VMA lock and retry holding mmap_lock. This can be handled more gracefully in the future. Signed-off-by: Suren Baghdasaryan Suggested-by: Peter Xu --- mm/memory.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 33ecc850d3cb..55582c6fa2fd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5256,6 +5256,15 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_= struct *mm, if (!vma_start_read(vma)) goto inval; =20 + /* + * Due to the possibility of userfault handler dropping mmap_lock, avoid + * it for now and fall back to page fault handling under mmap_lock. + */ + if (userfaultfd_armed(vma)) { + vma_end_read(vma); + goto inval; + } + /* Check since vm_start/vm_end might change before we lock the VMA */ if (unlikely(address < vma->vm_start || address >=3D vma->vm_end)) { vma_end_read(vma); --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 507F6C38142 for ; Fri, 27 Jan 2023 19:45:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232048AbjA0Tph (ORCPT ); Fri, 27 Jan 2023 14:45:37 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50934 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231892AbjA0ToH (ORCPT ); Fri, 27 Jan 2023 14:44:07 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4103E2E811 for ; Fri, 27 Jan 2023 11:43:10 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-50660dd3263so65585197b3.19 for ; Fri, 27 Jan 2023 11:43:09 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=QTC+ocJD/ZK0d8AWah1kQLEh3QM8J68abwl+vLpX/MY=; b=mU8gu/pWf6zIsQFo9qZJmzO+C9Rz2zywIzJAt0hiLsBb0sfMScn0ga1WT0lJP4+cuU jnlE49/hYrnAMYh/RLEYzocL5Jy7mfCziMb33lJyjegHt186LkbV/WCwUSBxzV0FGK1I xrqhc33jx9MJLtdss02+APSdeRf2XUnFG09IUKfXCIr+hkTGBQo7qLvcD8/EZZarD0c6 9RiTDx0vtERha59paJtaKyAUCno0Vzl4l7lvqpJzE2nTjhpBkjOXcTvShnV68JU8Wm5n IoRxYa+gg9A7UVOaBxUJpIYlD0HzhsBk7iegMRGVNF5IQVYNK1FSZGz91UjWgsue8k5N kYLA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=QTC+ocJD/ZK0d8AWah1kQLEh3QM8J68abwl+vLpX/MY=; b=n9nUVyn4N6fMDpHmkxC+XdURepWGHSxPuHIFb3iKjAdaDV6xGuNdCfPfgGkiTDt66J wZJqzfJZU6i4HdFuWDrywN9Zj5miz93KzTq6Tqt9As08EMSG5T5WyWf0c0vj52fqVrGp kOhOLZjJ1KwV3+NhZIcTHtHfumls35Wizc3IGJBg79AgWTFugIo9rzq+QY+Cc85/9GF6 dOKEB7Pf7bmz+q1Ounfx5XkCmKIkksfhTtKEWzUUYdV/PAS0qbpTfPPJXZztpBZ+FIB2 W4HH0glsLr5jySTjGCggSCeDtl+9LfA+6Qqs0YPZ/eb4oaOTVFvOokhfClkPlEmBla2U Y4nA== X-Gm-Message-State: AO0yUKVa6UcDtteJJQ+yq6F6KSQhUaXi8ZiuIUcwnAZK1WR2DKjLHKol tXa82CvKE247czvBGnQ+3zTQ9QZpE7w= X-Google-Smtp-Source: AK7set93uG4w1kFwIB1zyvlDAWxZcHXpTslQnEHpzoDHLwlIEt0PkMVZGLSp9rtobNPjvtDDJk1UuyFV7ts= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a25:e705:0:b0:80b:bdc5:5060 with SMTP id e5-20020a25e705000000b0080bbdc55060mr930533ybh.27.1674848540634; Fri, 27 Jan 2023 11:42:20 -0800 (PST) Date: Fri, 27 Jan 2023 11:41:05 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-29-surenb@google.com> Subject: [PATCH v2 28/33] mm: introduce per-VMA lock statistics From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a new CONFIG_PER_VMA_LOCK_STATS config option to dump extra statistics about handling page fault under VMA lock. Signed-off-by: Suren Baghdasaryan --- include/linux/vm_event_item.h | 6 ++++++ include/linux/vmstat.h | 6 ++++++ mm/Kconfig.debug | 7 +++++++ mm/vmstat.c | 6 ++++++ 4 files changed, 25 insertions(+) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 7f5d1caf5890..8abfa1240040 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -149,6 +149,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, DIRECT_MAP_LEVEL3_SPLIT, +#endif +#ifdef CONFIG_PER_VMA_LOCK_STATS + VMA_LOCK_SUCCESS, + VMA_LOCK_ABORT, + VMA_LOCK_RETRY, + VMA_LOCK_MISS, #endif NR_VM_EVENT_ITEMS }; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 19cf5b6892ce..fed855bae6d8 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -125,6 +125,12 @@ static inline void vm_events_fold_cpu(int cpu) #define count_vm_tlb_events(x, y) do { (void)(y); } while (0) #endif =20 +#ifdef CONFIG_PER_VMA_LOCK_STATS +#define count_vm_vma_lock_event(x) count_vm_event(x) +#else +#define count_vm_vma_lock_event(x) do {} while (0) +#endif + #define __count_zid_vm_events(item, zid, delta) \ __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta) =20 diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 9191897e76af..4965a7333a3f 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -278,3 +278,10 @@ config DEBUG_KMEMLEAK_AUTO_SCAN memory leaks. =20 If unsure, say Y. + +config PER_VMA_LOCK_STATS + bool "Statistics for per-vma locks" + depends on PER_VMA_LOCK + default y + help + Statistics for per-vma locks. diff --git a/mm/vmstat.c b/mm/vmstat.c index 1ea6a5ce1c41..4f1089a1860e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1399,6 +1399,12 @@ const char * const vmstat_text[] =3D { "direct_map_level2_splits", "direct_map_level3_splits", #endif +#ifdef CONFIG_PER_VMA_LOCK_STATS + "vma_lock_success", + "vma_lock_abort", + "vma_lock_retry", + "vma_lock_miss", +#endif #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ }; #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */ --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id CED77C61DA4 for ; Fri, 27 Jan 2023 19:45:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232155AbjA0Tpo (ORCPT ); Fri, 27 Jan 2023 14:45:44 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51650 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231825AbjA0ToI (ORCPT ); Fri, 27 Jan 2023 14:44:08 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B8C2859769 for ; Fri, 27 Jan 2023 11:43:10 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-4c11ae6ab25so65258697b3.8 for ; Fri, 27 Jan 2023 11:43:10 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=zr26Tmb2M01sb1afZfnklHdnYeWn0/HWZzrpjXJYooI=; b=I2P3olZBrDoCFDP1jHcaHXqtsXyUzxQr/ws4wX6bBu3CUipyuxA31yRpWa3MnFa/SU pcf5+CCx+WnMbozlzDdIg2ALFwq/OFj8s/q24FIJwL7Tizh8I6eeGtbBFib4zJzLB5md /zjFFlwtWXGghsSwviKTLUSRcwBRmcJaXn6b7nEXFweXpvU3ZyCKtcIxikLC8+UQ9pUQ JujyWegWeeB7qz6qsENh+jKg1foSfeMqF01FuGu+Yb/GyFYoIS6vq3ynrguPkj0pOoLx BYJ8cK1I9Z+jOMo/TqTeh+dlSH9FVfZfZ84FkijhY572Ij8GlcP0ii8VORVWe0sgl8Ck Cp6w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=zr26Tmb2M01sb1afZfnklHdnYeWn0/HWZzrpjXJYooI=; b=Wt59uesTdyZ7RecOO9SwzT8Xwl6yO2QvhuTj8m7BoEQaWSRQOjKdcI3IjZ/g+rUr3j bNenf9ptvCrnBNMwf74exRSsJ5Q3T5uGAYdAO6b4YEMlLY1WX1d81wh/f2x6I5U6GAIF Hjk4PGttpM7orqJ4mnvziEG6ckWhVNvq5KQgrRXtYE7GJbSF2l+2Hm9TudC6/udIAAjw MLK85E7LMHI/cXdlBMI+e6A+HZSuSDzenLCmj9RlUhIyXdaoZ1z6hOC+qGGuSVzQx2Vo 3GDtR25Z++F+NE7B0Ga/hKu/1WPlPUtK+gTlzHuuQHbfk0cvpR4TifYVSfLglVU67BpZ Khkw== X-Gm-Message-State: AO0yUKWipz/yH/5hlrIGEDO6lM+lbHKeq0yj//Qi6W4YsOW0mLAWaveZ rNPT32humK10JIhg5Kgj02jYslr4d7E= X-Google-Smtp-Source: AK7set8yRf6g7ktuBKP0CQzBeFyUCv/mUJgTqrv00u2WfcKZqCCOgg0GVy3Vw6WGmXU8JBT03ehy/weY7OA= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a0d:eb09:0:b0:506:391c:6d37 with SMTP id u9-20020a0deb09000000b00506391c6d37mr2021270ywe.88.1674848542935; Fri, 27 Jan 2023 11:42:22 -0800 (PST) Date: Fri, 27 Jan 2023 11:41:06 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-30-surenb@google.com> Subject: [PATCH v2 29/33] x86/mm: try VMA lock-based page fault handling first From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Attempt VMA lock-based page fault handling first, and fall back to the existing mmap_lock-based handling if that fails. Signed-off-by: Suren Baghdasaryan --- arch/x86/Kconfig | 1 + arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3604074a878b..3647f7bdb110 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86_64 # Options that are inherently 64-bit kernel only: select ARCH_HAS_GIGANTIC_PAGE select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 + select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY select MODULES_USE_ELF_RELA diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a498ae1fbe66..122c8fe4af12 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -19,6 +19,7 @@ #include /* faulthandler_disabled() */ #include /* efi_crash_gracefully_on_page_fault()*/ #include +#include /* find_and_lock_vma() */ =20 #include /* boot_cpu_has, ... */ #include /* dotraplinkage, ... */ @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs, } #endif =20 +#ifdef CONFIG_PER_VMA_LOCK + if (!(flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) =3D=3D 1) + goto lock_mmap; + + vma =3D lock_vma_under_rcu(mm, address); + if (!vma) + goto lock_mmap; + + if (unlikely(access_error(error_code, vma))) { + vma_end_read(vma); + goto lock_mmap; + } + fault =3D handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs= ); + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + kernelmode_fixup_or_oops(regs, error_code, address, + SIGBUS, BUS_ADRERR, + ARCH_DEFAULT_PKEY); + return; + } +lock_mmap: +#endif /* CONFIG_PER_VMA_LOCK */ + /* * Kernel-mode access to the user address space should only occur * on well-defined single instructions listed in the exception @@ -1433,6 +1466,9 @@ void do_user_addr_fault(struct pt_regs *regs, } =20 mmap_read_unlock(mm); +#ifdef CONFIG_PER_VMA_LOCK +done: +#endif if (likely(!(fault & VM_FAULT_ERROR))) return; =20 --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id D7C96C54EAA for ; Fri, 27 Jan 2023 19:47:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232704AbjA0TrH (ORCPT ); Fri, 27 Jan 2023 14:47:07 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50660 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232683AbjA0Tq3 (ORCPT ); Fri, 27 Jan 2023 14:46:29 -0500 Received: from mail-yw1-f202.google.com (mail-yw1-f202.google.com [209.85.128.202]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A25A08D0BD for ; Fri, 27 Jan 2023 11:44:10 -0800 (PST) Received: by mail-yw1-f202.google.com with SMTP id 00721157ae682-482d3bf0266so65384517b3.3 for ; Fri, 27 Jan 2023 11:44:10 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=MNwo0rhG4y8C+tQG7LPM4nLJWTbIHprc8uVQsZA2GSc=; b=JxxlO5586M0z4sVDSxHugqEEADB1cAjEHo2xluVRFI46WqHTOUsszCaJjysGoPqCq8 ztKkJGmntymAGuEGxdM2A7EX8Sykw/reCFrQFVsgm7kaRoHFsG9BNF/PmmjyzPElNA3A u2zVx465PKwxuuDkWTqvf70dFDsT4sE76+Q1i+nyNMyErq//frz9WqLOph65UMZVM1Q3 1JnBBBOhnZpIDf2DrbbDSdhADbiUH2oov3flacw7sejkvu/YDNPNLxViqgxSV8aAiDpN WEe8RN23e0S32q4tATcZ7ZhVfhZ7+NOeU1bzW/fjTQ6HzW6TJlQUp4mHpwvwuOr4rhfO QkKg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=MNwo0rhG4y8C+tQG7LPM4nLJWTbIHprc8uVQsZA2GSc=; b=4FF1nk0tYt6b6Z3EunkU6EU6kG6eBxGs3ME3FwLhXpLxF38mP7rovlU2by3mKOGy6q Pd6kgN1MTV666QKgtZC1AjGMzWq16tFNdkQPh7HA08EfNbj8a+olr6EkkGBzvh6H6d6d FbyfXvqsFapeMOWOMmnxgxEt6GFsd9YfYJhEh8EuoQLvWSfzFbB4ju7zkXgMKu7BqXdt pDRGrPN7Mo6q1DGjH3roDtkRJKRTFhLVdeX9EX4JnzrXj2tHztIISYOz2fAL4NOTj8sW EdqZWU25C/LfRDN1fYPWZGvWUB8MD5GkaryBxI/Ke0Bkzs8H+gCn0zvglAVA250/yfO0 0bCg== X-Gm-Message-State: AFqh2kpB9jhSGcccYp0gXXc8NN57H5pBxnTR9OBDe2wFuS6D1MPKBouZ FduU3do8ldCdXsDfGJ+nsPevuudIYm0= X-Google-Smtp-Source: AMrXdXv6cPUTMY06wj7LHpX7SofIKh9xmcnJ1P5+6wR9m3ToK9ikAh4HiQMQbcnL8It7MYI0W5hAcMnIURQ= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:5b88:0:b0:462:e278:1a08 with SMTP id p130-20020a815b88000000b00462e2781a08mr5361156ywb.243.1674848545116; Fri, 27 Jan 2023 11:42:25 -0800 (PST) Date: Fri, 27 Jan 2023 11:41:07 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-31-surenb@google.com> Subject: [PATCH v2 30/33] arm64/mm: try VMA lock-based page fault handling first From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Attempt VMA lock-based page fault handling first, and fall back to the existing mmap_lock-based handling if that fails. Signed-off-by: Suren Baghdasaryan --- arch/arm64/Kconfig | 1 + arch/arm64/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c5ccca26a408..9f2c0e352da3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -95,6 +95,7 @@ config ARM64 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_PAGE_TABLE_CHECK + select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f4cb0f85ccf4..16588dda9258 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -535,6 +535,9 @@ static int __kprobes do_page_fault(unsigned long far, u= nsigned long esr, unsigned long vm_flags; unsigned int mm_flags =3D FAULT_FLAG_DEFAULT; unsigned long addr =3D untagged_addr(far); +#ifdef CONFIG_PER_VMA_LOCK + struct vm_area_struct *vma; +#endif =20 if (kprobe_page_fault(regs, esr)) return 0; @@ -585,6 +588,36 @@ static int __kprobes do_page_fault(unsigned long far, = unsigned long esr, =20 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); =20 +#ifdef CONFIG_PER_VMA_LOCK + if (!(mm_flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) =3D=3D 1) + goto lock_mmap; + + vma =3D lock_vma_under_rcu(mm, addr); + if (!vma) + goto lock_mmap; + + if (!(vma->vm_flags & vm_flags)) { + vma_end_read(vma); + goto lock_mmap; + } + fault =3D handle_mm_fault(vma, addr & PAGE_MASK, + mm_flags | FAULT_FLAG_VMA_LOCK, regs); + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + goto no_context; + return 0; + } +lock_mmap: +#endif /* CONFIG_PER_VMA_LOCK */ /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -628,6 +661,9 @@ static int __kprobes do_page_fault(unsigned long far, u= nsigned long esr, } mmap_read_unlock(mm); =20 +#ifdef CONFIG_PER_VMA_LOCK +done: +#endif /* * Handle the "normal" (no error) case first. */ --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 96543C61DA4 for ; Fri, 27 Jan 2023 19:45:57 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232446AbjA0Tpz (ORCPT ); Fri, 27 Jan 2023 14:45:55 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51148 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232399AbjA0ToV (ORCPT ); Fri, 27 Jan 2023 14:44:21 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 61CF17750B for ; Fri, 27 Jan 2023 11:43:14 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-50660dd3263so65587577b3.19 for ; Fri, 27 Jan 2023 11:43:14 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=kzxeHm8qCtyYeQADp+m05TsFOqarDXtVrIERGy2x4bE=; b=Qw/TzqObuxKUrxtkILJdFMJrnRXDl7IhHsfqWYsg15Fht0zJymOStyW6umwGn696mL /vz0L3L+AxNuyNMgLOdykFyo3ffxujr1NJ9PZ/3TCzeykyM3g1PjhFRGoo/e9FoO8fmJ 0gJOei12QFUXaa/Oa6hhy8ENMFQY5aEUa+SKaoSJiwpGU68x3ruM/ipRGHnS2jScV2XA EPWUY+rZwtRmlqA25G6oa8LHcJDQM3bZQ4diO+PFB7fofYZtcMVMGn5EMXtqF1YwdUnI Yur83PIWndlmZU2J2xMV4at0clCg0WXiP4a0wm/tFrIYOxaiWzueFhV10wBQ62TvKtYz 5Efg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=kzxeHm8qCtyYeQADp+m05TsFOqarDXtVrIERGy2x4bE=; b=AtQzOB9vPTpJz+EfztU1XkFKoZXi3jM+jIBHL9Jv/WFjn0kPHtGdjxX0hOQMylJMMb qk/eM04IFp7xl5zrinPztwfBqfAtNpfjiTUCfdKBi8zIPv93OKnwVmduvgruRLfXkya9 mYBd2QHacziFw8IhcXs3wHdgzIJLYnxWbleKoZvw8ys6ZrMS+/7tz3m79gpGnrqL0TcV YP6+FTJ7oh1qDXoaBEAjXojDgG+NkD4QUnKayutS+MjG3MoTtn4f1t9hHrRWsWeMYXDP PWTlGWIzA+cphDSXBzZsfMZvy1YcIltSr+OXPRt3H1ec0f7sdsT4uOUlBt2jBL4opNUt oNVQ== X-Gm-Message-State: AFqh2kpKMRZDQ+wNUxiI3DoVJF9BUC5CkeSGJUE1iKnIFk4RbhUBhNLy SuRbyqz33YK9hA4+jzFBF9+v62pUt4c= X-Google-Smtp-Source: AMrXdXvuIF+ftjjZVhAq6LLQlzBTotqsaAzARNS7Nb+n90q796c/W7VGIXChRGDMsXog66D2ETjcV07ygFs= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:1754:0:b0:4fd:ff18:de6c with SMTP id 81-20020a811754000000b004fdff18de6cmr2568712ywx.227.1674848547503; Fri, 27 Jan 2023 11:42:27 -0800 (PST) Date: Fri, 27 Jan 2023 11:41:08 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-32-surenb@google.com> Subject: [PATCH v2 31/33] powerc/mm: try VMA lock-based page fault handling first From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Laurent Dufour Attempt VMA lock-based page fault handling first, and fall back to the existing mmap_lock-based handling if that fails. Copied from "x86/mm: try VMA lock-based page fault handling first" Signed-off-by: Laurent Dufour Signed-off-by: Suren Baghdasaryan --- arch/powerpc/mm/fault.c | 41 ++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/Kconfig | 1 + arch/powerpc/platforms/pseries/Kconfig | 1 + 3 files changed, 43 insertions(+) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 2bef19cc1b98..cab229e75018 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -469,6 +469,44 @@ static int ___do_page_fault(struct pt_regs *regs, unsi= gned long address, if (is_exec) flags |=3D FAULT_FLAG_INSTRUCTION; =20 +#ifdef CONFIG_PER_VMA_LOCK + if (!(flags & FAULT_FLAG_USER) || atomic_read(&mm->mm_users) =3D=3D 1) + goto lock_mmap; + + vma =3D lock_vma_under_rcu(mm, address); + if (!vma) + goto lock_mmap; + + if (unlikely(access_pkey_error(is_write, is_exec, + (error_code & DSISR_KEYFAULT), vma))) { + int rc =3D bad_access_pkey(regs, address, vma); + + vma_end_read(vma); + return rc; + } + + if (unlikely(access_error(is_write, is_exec, vma))) { + int rc =3D bad_access(regs, address); + + vma_end_read(vma); + return rc; + } + + fault =3D handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs= ); + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + + if (fault_signal_pending(fault, regs)) + return user_mode(regs) ? 0 : SIGBUS; + +lock_mmap: +#endif /* CONFIG_PER_VMA_LOCK */ + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -545,6 +583,9 @@ static int ___do_page_fault(struct pt_regs *regs, unsig= ned long address, =20 mmap_read_unlock(current->mm); =20 +#ifdef CONFIG_PER_VMA_LOCK +done: +#endif if (unlikely(fault & VM_FAULT_ERROR)) return mm_fault_error(regs, address, fault); =20 diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platform= s/powernv/Kconfig index ae248a161b43..70a46acc70d6 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -16,6 +16,7 @@ config PPC_POWERNV select PPC_DOORBELL select MMU_NOTIFIER select FORCE_SMP + select ARCH_SUPPORTS_PER_VMA_LOCK default y =20 config OPAL_PRD diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platform= s/pseries/Kconfig index a3b4d99567cb..e036a04ff1ca 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -21,6 +21,7 @@ config PPC_PSERIES select HOTPLUG_CPU select FORCE_SMP select SWIOTLB + select ARCH_SUPPORTS_PER_VMA_LOCK default y =20 config PARAVIRT --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id CCD33C38142 for ; Fri, 27 Jan 2023 19:46:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232491AbjA0Tp7 (ORCPT ); Fri, 27 Jan 2023 14:45:59 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51706 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232148AbjA0ToV (ORCPT ); Fri, 27 Jan 2023 14:44:21 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D1C1923859 for ; Fri, 27 Jan 2023 11:43:14 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id o1-20020a252801000000b0080b8600bdc9so6363168ybo.3 for ; Fri, 27 Jan 2023 11:43:14 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=y3NZtkliWQhGwQQ6J+V08tBPQFOX7N0W14jS8GdMVSo=; b=R8vm5sGePmLU1Pe4mKnKd7BNiKDqaMQRSE2Uxaf9jHVe/UX+8wZaZnli/NAPrvHh5D Gp2YVdh1UhOUz23uxWg1VsM9bUD5cup3/ZbXShp0DX2DQS4OYSwXrDDdrwmKhIAFH0Hw 74ENSW8vtZuV6jUaLsmkcaEy+yDT+z+95V7siTwW55EWIBuns5aniQb/TRczPRcRq3g4 VhtuB5yMK58BxiemGZ2CqsONqT2FuthDA4119d3zQhl7jpXryXulujWcH7o4rSRWH8Tl w610sqrsMHevwvr509uliYrTdDC7Uzh2VKUBma4LjNdxi7kycc6HPZnQZbUV0crFM3wd Zbgw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=y3NZtkliWQhGwQQ6J+V08tBPQFOX7N0W14jS8GdMVSo=; b=U2RH4n3XmSgimihjQh1g+eLe9eLxm8x90wjyM8hU4EiCena1tnn50vrVzQxcLIq1Tf CvuAkAy7nNUkhix2kX0rTB7Aaf0dPW6qMFWGEewhH4ZKPvFt+cjzs7s8Elb+p32/Jfyl wtvKAjgF9+2RfNM0B1If+6qgLnVudCtyvw1Xxs99HSTBkq+BEQ7nDO4LWhrwtlqqSCkD H8ENE4a9cMpSInHOX01ZdDGg2tjDjJnAu4uwdsX2dmE6Q4Sv5QFFs9qm/WRMEDamnfxr ujwnlLxPjg5LFAvD1HpA1bebMGs/X4lKv+RlMuafUyv6jH3nO5LhW7hmEcUsLlhDBijq UeCg== X-Gm-Message-State: AO0yUKXBZn2bXYeyQbUK3ENULzq+MLWv/UFV4b4wSktOqoeY8kmA9e11 xfnOTApstaZKaIrO8fz6frgVoIBvYBw= X-Google-Smtp-Source: AK7set9ZwBhHaiq8PgHnHZpi30bVFoqemTYpH4CqkWTR3ntjpG4ZYW0UY93Gsvx11KtEAuMPsZpv9FPCPgs= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a25:db07:0:b0:80b:8dd0:7b35 with SMTP id g7-20020a25db07000000b0080b8dd07b35mr1614607ybf.322.1674848549978; Fri, 27 Jan 2023 11:42:29 -0800 (PST) Date: Fri, 27 Jan 2023 11:41:09 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-33-surenb@google.com> Subject: [PATCH v2 32/33] mm/mmap: free vm_area_struct without call_rcu in exit_mmap From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" call_rcu() can take a long time when callback offloading is enabled. Its use in the vm_area_free can cause regressions in the exit path when multiple VMAs are being freed. Because exit_mmap() is called only after the last mm user drops its refcount, the page fault handlers can't be racing with it. Any other possible user like oom-reaper or process_mrelease are already synchronized using mmap_lock. Therefore exit_mmap() can free VMAs directly, without the use of call_rcu(). Expose __vm_area_free() and use it from exit_mmap() to avoid possible call_rcu() floods and performance regressions caused by it. Signed-off-by: Suren Baghdasaryan --- include/linux/mm.h | 2 ++ kernel/fork.c | 2 +- mm/mmap.c | 11 +++++++---- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index a6de58bb40c7..1c4ddcd6fd84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -250,6 +250,8 @@ void setup_initial_init_mm(void *start_code, void *end_= code, struct vm_area_struct *vm_area_alloc(struct mm_struct *); struct vm_area_struct *vm_area_dup(struct vm_area_struct *); void vm_area_free(struct vm_area_struct *); +/* Use only if VMA has no other users */ +void __vm_area_free(struct vm_area_struct *vma); =20 #ifndef CONFIG_MMU extern struct rb_root nommu_region_tree; diff --git a/kernel/fork.c b/kernel/fork.c index a08cc0e2bfde..d0999de82f94 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -480,7 +480,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struc= t *orig) return new; } =20 -static void __vm_area_free(struct vm_area_struct *vma) +void __vm_area_free(struct vm_area_struct *vma) { free_anon_vma_name(vma); kmem_cache_free(vm_area_cachep, vma); diff --git a/mm/mmap.c b/mm/mmap.c index 3d0cfbc92745..1028fe131bb7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -133,7 +133,7 @@ void unlink_file_vma(struct vm_area_struct *vma) /* * Close a vm structure and free it. */ -static void remove_vma(struct vm_area_struct *vma) +static void remove_vma(struct vm_area_struct *vma, bool unreachable) { might_sleep(); if (vma->vm_ops && vma->vm_ops->close) @@ -141,7 +141,10 @@ static void remove_vma(struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); - vm_area_free(vma); + if (unreachable) + __vm_area_free(vma); + else + vm_area_free(vma); } =20 static inline struct vm_area_struct *vma_prev_limit(struct vma_iterator *v= mi, @@ -2134,7 +2137,7 @@ static inline void remove_mt(struct mm_struct *mm, st= ruct ma_state *mas) if (vma->vm_flags & VM_ACCOUNT) nr_accounted +=3D nrpages; vm_stat_account(mm, vma->vm_flags, -nrpages); - remove_vma(vma); + remove_vma(vma, false); } vm_unacct_memory(nr_accounted); validate_mm(mm); @@ -3083,7 +3086,7 @@ void exit_mmap(struct mm_struct *mm) do { if (vma->vm_flags & VM_ACCOUNT) nr_accounted +=3D vma_pages(vma); - remove_vma(vma); + remove_vma(vma, true); count++; cond_resched(); } while ((vma =3D mas_find(&mas, ULONG_MAX)) !=3D NULL); --=20 2.39.1 From nobody Sun Sep 14 00:06:35 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8EE88C54EAA for ; Fri, 27 Jan 2023 19:47:14 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232903AbjA0TrM (ORCPT ); Fri, 27 Jan 2023 14:47:12 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:51706 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232739AbjA0Tqc (ORCPT ); Fri, 27 Jan 2023 14:46:32 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4B5A04C21 for ; Fri, 27 Jan 2023 11:44:14 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id k15-20020a5b0a0f000000b007eba3f8e3baso6329126ybq.4 for ; Fri, 27 Jan 2023 11:44:14 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=Ud3T37IGHP0g1lmdTa/tq8X3xMhLxr1AXmnGvd1U4CI=; b=FNDN2+9L1Vmxr+/mMF6eML5JcVJbNLvQW3fVgvwdePzy3lehe5YuSr9HWsNaMC4a8J FUhI3Xak6vLO90aUXXpIFs6adKFN80x7s3bJgLZLYfmaGEKhkkZLbtXJLX0N/ZyPrSOD vT5YWUMa6D2YWUL+XcKLQxhxuZ8Qre6a0DkbEZ1ct6ULdZqWxPdE1UEo8jeDeLLPMaSB b3W+W/qkByIa+a5SR1bxuJVPYAEoRSrded/1J6aiwU0I/kursScnVvXrI1MYVCgC0Qld xz7h6VUcKpIFgOOniSgMFSpFb+kGVYjmhq6wkneKC/yuMYuw9ACcdFQ59agxDWycdAy0 DJpA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=Ud3T37IGHP0g1lmdTa/tq8X3xMhLxr1AXmnGvd1U4CI=; b=BgTUrJZRUJmi0hoEVwXVo0m2ZC9fA9u0snfUMNK34bekz+yrU4F3evsVe77MqX9mFL E+aJkMudSBlpfhNonRj4ruMpNuEE3sMYyPGSSr0cLW06/xWkWii2+K7agPKUnC0ZRP1r nt2wGbVaAGCxw/jjzcteXFpyq5iV1y+jIWTiKyGZQqnygrcTKfpXOl6r5g4uxDiEgQxb 9E59Qb3BY7DnRm1gzZTt5EuS1D04Ye8uaUzBA9W4z6LcnqCJBDd6lPmyI4YBmIDzslgm WVYrWugEcyfXObErrTe5UR92peWWGzHRhBHHNV+ehroWG8jO2YmnWF3aR4XbEnh5WBj8 1rbA== X-Gm-Message-State: AFqh2kqZoRkzE+0odlkQSkxUR9Ky0XPbQ+Viyxm7b6qAuNGgJB3Kfq7v pw2xWu0DQWaZDMAfnFhNPosl0Yrr+9g= X-Google-Smtp-Source: AMrXdXuWd7O6+z302DT+NWEpQuMGblny5VWrJhlz+cYeowewBMKBJvug6bhWSAlG5JyuNtY5PTSWQDkfixM= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:4e19:be9:c5d0:8483]) (user=surenb job=sendgmr) by 2002:a81:be06:0:b0:4b2:fa7c:8836 with SMTP id i6-20020a81be06000000b004b2fa7c8836mr5004644ywn.195.1674848552353; Fri, 27 Jan 2023 11:42:32 -0800 (PST) Date: Fri, 27 Jan 2023 11:41:10 -0800 In-Reply-To: <20230127194110.533103-1-surenb@google.com> Mime-Version: 1.0 References: <20230127194110.533103-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.456.gfc5497dd1b-goog Message-ID: <20230127194110.533103-34-surenb@google.com> Subject: [PATCH v2 33/33] mm: separate vma->lock from vm_area_struct From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, surenb@google.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" vma->lock being part of the vm_area_struct causes performance regression during page faults because during contention its count and owner fields are constantly updated and having other parts of vm_area_struct used during page fault handling next to them causes constant cache line bouncing. Fix that by moving the lock outside of the vm_area_struct. All attempts to keep vma->lock inside vm_area_struct in a separate cache line still produce performance regression especially on NUMA machines. Smallest regression was achieved when lock is placed in the fourth cache line but that bloats vm_area_struct to 256 bytes. Considering performance and memory impact, separate lock looks like the best option. It increases memory footprint of each VMA but that can be optimized later if the new size causes issues. Note that after this change vma_init() does not allocate or initialize vma->lock anymore. A number of drivers allocate a pseudo VMA on the stack but they never use the VMA's lock, therefore it does not need to be allocated. The future drivers which might need the VMA lock should use vm_area_alloc()/vm_area_free() to allocate the VMA. Signed-off-by: Suren Baghdasaryan --- include/linux/mm.h | 23 ++++++------- include/linux/mm_types.h | 6 +++- kernel/fork.c | 73 ++++++++++++++++++++++++++++++++-------- 3 files changed, 74 insertions(+), 28 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 1c4ddcd6fd84..52e048c31239 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -621,12 +621,6 @@ struct vm_operations_struct { }; =20 #ifdef CONFIG_PER_VMA_LOCK -static inline void vma_init_lock(struct vm_area_struct *vma) -{ - init_rwsem(&vma->lock); - vma->vm_lock_seq =3D -1; -} - /* * Try to read-lock a vma. The function is allowed to occasionally yield f= alse * locked result to avoid performance overhead, in which case we fall back= to @@ -638,17 +632,17 @@ static inline bool vma_start_read(struct vm_area_stru= ct *vma) if (vma->vm_lock_seq =3D=3D READ_ONCE(vma->vm_mm->mm_lock_seq)) return false; =20 - if (unlikely(down_read_trylock(&vma->lock) =3D=3D 0)) + if (unlikely(down_read_trylock(&vma->vm_lock->lock) =3D=3D 0)) return false; =20 /* * Overflow might produce false locked result. * False unlocked result is impossible because we modify and check - * vma->vm_lock_seq under vma->lock protection and mm->mm_lock_seq + * vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq * modification invalidates all existing locks. */ if (unlikely(vma->vm_lock_seq =3D=3D READ_ONCE(vma->vm_mm->mm_lock_seq)))= { - up_read(&vma->lock); + up_read(&vma->vm_lock->lock); return false; } return true; @@ -657,7 +651,7 @@ static inline bool vma_start_read(struct vm_area_struct= *vma) static inline void vma_end_read(struct vm_area_struct *vma) { rcu_read_lock(); /* keeps vma alive till the end of up_read */ - up_read(&vma->lock); + up_read(&vma->vm_lock->lock); rcu_read_unlock(); } =20 @@ -675,9 +669,9 @@ static inline void vma_start_write(struct vm_area_struc= t *vma) if (vma->vm_lock_seq =3D=3D mm_lock_seq) return; =20 - down_write(&vma->lock); + down_write(&vma->vm_lock->lock); vma->vm_lock_seq =3D mm_lock_seq; - up_write(&vma->lock); + up_write(&vma->vm_lock->lock); } =20 static inline void vma_assert_write_locked(struct vm_area_struct *vma) @@ -704,6 +698,10 @@ static inline void vma_assert_write_locked(struct vm_a= rea_struct *vma) {} =20 #endif /* CONFIG_PER_VMA_LOCK */ =20 +/* + * WARNING: vma_init does not initialize vma->vm_lock. + * Use vm_area_alloc()/vm_area_free() if vma needs locking. + */ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *= mm) { static const struct vm_operations_struct dummy_vm_ops =3D {}; @@ -712,7 +710,6 @@ static inline void vma_init(struct vm_area_struct *vma,= struct mm_struct *mm) vma->vm_mm =3D mm; vma->vm_ops =3D &dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); - vma_init_lock(vma); } =20 /* Use when VMA is not part of the VMA tree and needs no locking */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c4c43f10344a..1e97bb98197c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -471,6 +471,10 @@ struct anon_vma_name { char name[]; }; =20 +struct vma_lock { + struct rw_semaphore lock; +}; + /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -510,7 +514,7 @@ struct vm_area_struct { =20 #ifdef CONFIG_PER_VMA_LOCK int vm_lock_seq; - struct rw_semaphore lock; + struct vma_lock *vm_lock; #endif =20 /* diff --git a/kernel/fork.c b/kernel/fork.c index d0999de82f94..a152804faa14 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -451,13 +451,49 @@ static struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; =20 +#ifdef CONFIG_PER_VMA_LOCK + +/* SLAB cache for vm_area_struct.lock */ +static struct kmem_cache *vma_lock_cachep; + +static bool vma_lock_alloc(struct vm_area_struct *vma) +{ + vma->vm_lock =3D kmem_cache_alloc(vma_lock_cachep, GFP_KERNEL); + if (!vma->vm_lock) + return false; + + init_rwsem(&vma->vm_lock->lock); + vma->vm_lock_seq =3D -1; + + return true; +} + +static inline void vma_lock_free(struct vm_area_struct *vma) +{ + kmem_cache_free(vma_lock_cachep, vma->vm_lock); +} + +#else /* CONFIG_PER_VMA_LOCK */ + +static inline bool vma_lock_alloc(struct vm_area_struct *vma) { return tru= e; } +static inline void vma_lock_free(struct vm_area_struct *vma) {} + +#endif /* CONFIG_PER_VMA_LOCK */ + struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) { struct vm_area_struct *vma; =20 vma =3D kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); - if (vma) - vma_init(vma, mm); + if (!vma) + return NULL; + + vma_init(vma, mm); + if (!vma_lock_alloc(vma)) { + kmem_cache_free(vm_area_cachep, vma); + return NULL; + } + return vma; } =20 @@ -465,24 +501,30 @@ struct vm_area_struct *vm_area_dup(struct vm_area_str= uct *orig) { struct vm_area_struct *new =3D kmem_cache_alloc(vm_area_cachep, GFP_KERNE= L); =20 - if (new) { - ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); - ASSERT_EXCLUSIVE_WRITER(orig->vm_file); - /* - * orig->shared.rb may be modified concurrently, but the clone - * will be reinitialized. - */ - data_race(memcpy(new, orig, sizeof(*new))); - INIT_LIST_HEAD(&new->anon_vma_chain); - vma_init_lock(new); - dup_anon_vma_name(orig, new); + if (!new) + return NULL; + + ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); + ASSERT_EXCLUSIVE_WRITER(orig->vm_file); + /* + * orig->shared.rb may be modified concurrently, but the clone + * will be reinitialized. + */ + data_race(memcpy(new, orig, sizeof(*new))); + if (!vma_lock_alloc(new)) { + kmem_cache_free(vm_area_cachep, new); + return NULL; } + INIT_LIST_HEAD(&new->anon_vma_chain); + dup_anon_vma_name(orig, new); + return new; } =20 void __vm_area_free(struct vm_area_struct *vma) { free_anon_vma_name(vma); + vma_lock_free(vma); kmem_cache_free(vm_area_cachep, vma); } =20 @@ -493,7 +535,7 @@ static void vm_area_free_rcu_cb(struct rcu_head *head) vm_rcu); =20 /* The vma should not be locked while being destroyed. */ - VM_BUG_ON_VMA(rwsem_is_locked(&vma->lock), vma); + VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock->lock), vma); __vm_area_free(vma); } #endif @@ -3089,6 +3131,9 @@ void __init proc_caches_init(void) NULL); =20 vm_area_cachep =3D KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); +#ifdef CONFIG_PER_VMA_LOCK + vma_lock_cachep =3D KMEM_CACHE(vma_lock, SLAB_PANIC|SLAB_ACCOUNT); +#endif mmap_init(); nsproxy_cache_init(); } --=20 2.39.1