From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 56ACFC636D7 for ; Thu, 16 Feb 2023 05:18:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229573AbjBPFSG (ORCPT ); Thu, 16 Feb 2023 00:18:06 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44612 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229516AbjBPFSA (ORCPT ); Thu, 16 Feb 2023 00:18:00 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 90BAF13DD6 for ; Wed, 15 Feb 2023 21:17:57 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-50e79ffba49so8474537b3.9 for ; Wed, 15 Feb 2023 21:17:57 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=0nC1+Z5xJ7W1lTs/T0LqT1g+dKiQp9Lc6m1pbhrJE6M=; b=DtrBb0OEqSibZGKM+tGrekgia+czdJCmNvwxB9nTfvwGTIHXKlm6TS9Q1D5NfIR4hg c3apVnOjZnfZCRE1YFzTHLthbM0S/IVW1BccnLNsJf21OgviYfhBVHNRB8wtAupWJwTS 1TEYfIbKZCSoMpt+Js0TzB9iPZbPOfnvL//puIai0IRqYdE7nHNdWFvMQyJ5Dt5csXIh 43aWlLJbXNtNaZid0iRHPyqJRyFRPoBCCZ5nKLzs/n4kbMJKA8/P4QRR3m4y665oyagy kgaLTr7SQL3GQMCbK2ePGMttOB2X0C/PFJbSA9edFZkog1VQvtGlD+HG94g+afV+vkdw M8NA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=0nC1+Z5xJ7W1lTs/T0LqT1g+dKiQp9Lc6m1pbhrJE6M=; b=ciuTs2y3Dend0HO2GHItrgf4S0BR59O2hkff5KfbdNACW7+0Xo06RXu1PG/1S6Q7Zr kLwrX3CUDsSfQqE/ncqosKBfmLCIm+oSdw52jM187UHL3kUQMp9aIVPPyFdgNEyozrGx Zk5czzBFQvMIiMZ1VdgpdaVtyOJS9j6ymMgv8r1ig0sFDqfrM0DmZjKYzEmyg7ohZtvD 0HO3anGJKVxgeQpj8KCqTmTm1Yor9Aa+k3Ia/sy0Zh2MryWF6ZI2MoZjmzV3nfgyktOc 4dP8ghq2izOcG5S7/rFDDm3sTM6Imb9w6uu8F9GGO0mTIg/47ll7rvdsquWEdQOUFDNE PxgQ== X-Gm-Message-State: AO0yUKX0acTU6DI94CZR04tkVehrXQlUs7EqKTZClkpUDNhH445f++/j Ptuq6BJpLE11QbQ2yevWBoS+44zbUho= X-Google-Smtp-Source: AK7set+Y8Yn0V0huCQq66Ej7hgCVLxUYKYuorVk92zLqUUDZhSEdy+eB6kBj2xP313LsIxhb2Sq52k6rqsw= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a0d:d88c:0:b0:533:86d8:a551 with SMTP id a134-20020a0dd88c000000b0053386d8a551mr60897ywe.11.1676524676787; Wed, 15 Feb 2023 21:17:56 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:16 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-2-surenb@google.com> Subject: [PATCH v3 01/35] maple_tree: Be more cautious about dead nodes From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Liam Howlett , Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Liam Howlett ma_pivots() and ma_data_end() may be called with a dead node. Ensure to that the node isn't dead before using the returned values. This is necessary for RCU mode of the maple tree. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett Signed-off-by: Suren Baghdasaryan --- lib/maple_tree.c | 52 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 646297cae5d1..cc356b8369ad 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -544,6 +544,7 @@ static inline bool ma_dead_node(const struct maple_node= *node) =20 return (parent =3D=3D node); } + /* * mte_dead_node() - check if the @enode is dead. * @enode: The encoded maple node @@ -625,6 +626,8 @@ static inline unsigned int mas_alloc_req(const struct m= a_state *mas) * @node - the maple node * @type - the node type * + * In the event of a dead node, this array may be %NULL + * * Return: A pointer to the maple node pivots */ static inline unsigned long *ma_pivots(struct maple_node *node, @@ -1096,8 +1099,11 @@ static int mas_ascend(struct ma_state *mas) a_type =3D mas_parent_enum(mas, p_enode); a_node =3D mte_parent(p_enode); a_slot =3D mte_parent_slot(p_enode); - pivots =3D ma_pivots(a_node, a_type); a_enode =3D mt_mk_node(a_node, a_type); + pivots =3D ma_pivots(a_node, a_type); + + if (unlikely(ma_dead_node(a_node))) + return 1; =20 if (!set_min && a_slot) { set_min =3D true; @@ -1401,6 +1407,9 @@ static inline unsigned char ma_data_end(struct maple_= node *node, { unsigned char offset; =20 + if (!pivots) + return 0; + if (type =3D=3D maple_arange_64) return ma_meta_end(node, type); =20 @@ -1436,6 +1445,9 @@ static inline unsigned char mas_data_end(struct ma_st= ate *mas) return ma_meta_end(node, type); =20 pivots =3D ma_pivots(node, type); + if (unlikely(ma_dead_node(node))) + return 0; + offset =3D mt_pivots[type] - 1; if (likely(!pivots[offset])) return ma_meta_end(node, type); @@ -4505,6 +4517,9 @@ static inline int mas_prev_node(struct ma_state *mas,= unsigned long min) node =3D mas_mn(mas); slots =3D ma_slots(node, mt); pivots =3D ma_pivots(node, mt); + if (unlikely(ma_dead_node(node))) + return 1; + mas->max =3D pivots[offset]; if (offset) mas->min =3D pivots[offset - 1] + 1; @@ -4526,6 +4541,9 @@ static inline int mas_prev_node(struct ma_state *mas,= unsigned long min) slots =3D ma_slots(node, mt); pivots =3D ma_pivots(node, mt); offset =3D ma_data_end(node, mt, pivots, mas->max); + if (unlikely(ma_dead_node(node))) + return 1; + if (offset) mas->min =3D pivots[offset - 1] + 1; =20 @@ -4574,6 +4592,7 @@ static inline int mas_next_node(struct ma_state *mas,= struct maple_node *node, struct maple_enode *enode; int level =3D 0; unsigned char offset; + unsigned char node_end; enum maple_type mt; void __rcu **slots; =20 @@ -4597,7 +4616,11 @@ static inline int mas_next_node(struct ma_state *mas= , struct maple_node *node, node =3D mas_mn(mas); mt =3D mte_node_type(mas->node); pivots =3D ma_pivots(node, mt); - } while (unlikely(offset =3D=3D ma_data_end(node, mt, pivots, mas->max))); + node_end =3D ma_data_end(node, mt, pivots, mas->max); + if (unlikely(ma_dead_node(node))) + return 1; + + } while (unlikely(offset =3D=3D node_end)); =20 slots =3D ma_slots(node, mt); pivot =3D mas_safe_pivot(mas, pivots, ++offset, mt); @@ -4613,6 +4636,9 @@ static inline int mas_next_node(struct ma_state *mas,= struct maple_node *node, mt =3D mte_node_type(mas->node); slots =3D ma_slots(node, mt); pivots =3D ma_pivots(node, mt); + if (unlikely(ma_dead_node(node))) + return 1; + offset =3D 0; pivot =3D pivots[0]; } @@ -4659,11 +4685,14 @@ static inline void *mas_next_nentry(struct ma_state= *mas, return NULL; } =20 - pivots =3D ma_pivots(node, type); slots =3D ma_slots(node, type); - mas->index =3D mas_safe_min(mas, pivots, mas->offset); + pivots =3D ma_pivots(node, type); count =3D ma_data_end(node, type, pivots, mas->max); - if (ma_dead_node(node)) + if (unlikely(ma_dead_node(node))) + return NULL; + + mas->index =3D mas_safe_min(mas, pivots, mas->offset); + if (unlikely(ma_dead_node(node))) return NULL; =20 if (mas->index > max) @@ -4817,6 +4846,11 @@ static inline void *mas_prev_nentry(struct ma_state = *mas, unsigned long limit, =20 slots =3D ma_slots(mn, mt); pivots =3D ma_pivots(mn, mt); + if (unlikely(ma_dead_node(mn))) { + mas_rewalk(mas, index); + goto retry; + } + if (offset =3D=3D mt_pivots[mt]) pivot =3D mas->max; else @@ -6631,11 +6665,11 @@ static inline void *mas_first_entry(struct ma_state= *mas, struct maple_node *mn, while (likely(!ma_is_leaf(mt))) { MT_BUG_ON(mas->tree, mte_dead_node(mas->node)); slots =3D ma_slots(mn, mt); - pivots =3D ma_pivots(mn, mt); - max =3D pivots[0]; entry =3D mas_slot(mas, slots, 0); + pivots =3D ma_pivots(mn, mt); if (unlikely(ma_dead_node(mn))) return NULL; + max =3D pivots[0]; mas->node =3D entry; mn =3D mas_mn(mas); mt =3D mte_node_type(mas->node); @@ -6655,13 +6689,13 @@ static inline void *mas_first_entry(struct ma_state= *mas, struct maple_node *mn, if (likely(entry)) return entry; =20 - pivots =3D ma_pivots(mn, mt); - mas->index =3D pivots[0] + 1; mas->offset =3D 1; entry =3D mas_slot(mas, slots, 1); + pivots =3D ma_pivots(mn, mt); if (unlikely(ma_dead_node(mn))) return NULL; =20 + mas->index =3D pivots[0] + 1; if (mas->index > limit) goto none; =20 --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 342C7C636CC for ; Thu, 16 Feb 2023 05:18:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229598AbjBPFSL (ORCPT ); Thu, 16 Feb 2023 00:18:11 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44620 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229528AbjBPFSC (ORCPT ); Thu, 16 Feb 2023 00:18:02 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B405A1BACE for ; Wed, 15 Feb 2023 21:18:00 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-52ee93d7863so8353547b3.18 for ; Wed, 15 Feb 2023 21:18:00 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=eiC+Iexfup+RTroKLCxxsKt1sRroZAPvjqioHV2wMiw=; b=gSbKQNPX+j8W3tzxYZsRNAblkFTxWANtMyo2LSqGNkakVHWziV6JTyqlohrqDf6nLO 7F5N7WpIKnXsbFPEflJy25rhat2iqZIJRLZvV94iATncucILtc8zFlPzaxOQpEoApKoJ RWEtB6lTCeOcIkp8qbIxk4V683W1L3zeK3vTsHv9Yla4n9gJP1xuJQs99Ms5w5HF3olN Kli7prX+XczaCsAJ+8hVggWwSYtxp34C+A6/4yuDSchSQcAT4VW0lygmuaxa708qKR/5 zV/LK3WvWxvtgplCG5tRDu/7iPKMsNWXdzFiEgVvRQ3tE0ZzhzdCi73gDraIVPZ+cD1H 1RBQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=eiC+Iexfup+RTroKLCxxsKt1sRroZAPvjqioHV2wMiw=; b=HtBka/XnXeDgIdXq2ejdDwuy3vY3xy3ojb8+iyaKytdNa/2lc2izy2fcHN4lzJv2cg c4W+fmKeRC3bXmeGMgxQY/YAKJNx14LEamTrR7icC+gau94X/NyfyGl/HpNVxb4P4saE pO5j9VnaED+mDlRsAQTLOf5m5gamElsIps6q+u0DgIVdBtpa9wdGV77s/YrnqPis5ZnP nm+yHrcG5Nu0fXHs21Gd/qV/pMN7Puv1FzPP8KlZKntuKoXreDFpKN9RCMkyk6gq/fQs xxS/+lwvIEu5pGFdAlApZnNwjGveYSE+GnPkMi2FH7cM/sZdOA9dWuVxrH4te5ZnjaOv dEWQ== X-Gm-Message-State: AO0yUKV9jFICdSO6w4epQYDcm/Ljw362gHscDjCDDbBgZTxRL0MYtcOU PwUaHDALWeBoSP+dM8iXNwUrB47rO5U= X-Google-Smtp-Source: AK7set+h9VKChaGyyozNEu9Udu5uv7zPqiNZXg8WkDwG0C713PEP5x3O9LtVk8xkCPSk1ZyxP2XheyHDpnY= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a05:6902:10f:b0:855:fa17:4f5f with SMTP id o15-20020a056902010f00b00855fa174f5fmr78ybh.1.1676524678798; Wed, 15 Feb 2023 21:17:58 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:17 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-3-surenb@google.com> Subject: [PATCH v3 02/35] maple_tree: Detect dead nodes in mas_start() From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Liam Howlett , Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Liam Howlett When initially starting a search, the root node may already be in the process of being replaced in RCU mode. Detect and restart the walk if this is the case. This is necessary for RCU mode of the maple tree. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett Signed-off-by: Suren Baghdasaryan --- lib/maple_tree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index cc356b8369ad..089cd76ec379 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1360,12 +1360,16 @@ static inline struct maple_enode *mas_start(struct = ma_state *mas) mas->max =3D ULONG_MAX; mas->depth =3D 0; =20 +retry: root =3D mas_root(mas); /* Tree with nodes */ if (likely(xa_is_node(root))) { mas->depth =3D 1; mas->node =3D mte_safe_root(root); mas->offset =3D 0; + if (mte_dead_node(mas->node)) + goto retry; + return NULL; } =20 --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 95E17C636CC for ; Thu, 16 Feb 2023 05:18:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229603AbjBPFSR (ORCPT ); Thu, 16 Feb 2023 00:18:17 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44694 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229564AbjBPFSE (ORCPT ); Thu, 16 Feb 2023 00:18:04 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 65E1F1F5C0 for ; Wed, 15 Feb 2023 21:18:02 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-4cddba76f55so8292917b3.23 for ; Wed, 15 Feb 2023 21:18:02 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=UHRaAOWRO45il7vCWYolP2vMSHNJY77rtpcEDPLreFo=; b=JaAJMQvkQYkAsRG/T0d+zxwkrl7mq+q9FZsokrXfiBXR+R1ZXxx2vVNR2lMSqf4OC/ WN5L5m6K8l41JtRKI2HZbVfjAY+cjnkF1BJGHx4Vp8UHSVK5UGOhTfSvkmRDjsxq+IrD 02V2mM9EdjYNQiXsOwUhDaiCYn5z8ljJQUJeFnF2XY5CF0WF7YFz2opQhOGyVfb59slI zMkZhBxatB6Jt2s9icMmk1QAWL7dqweRI7j0fEknZ3XaTRcFBLrS6UTE7S7Q4BJqI8Vo VXsm9/T42jqn+e5iri473tsxrbcsgusYZ9oNJzgxHuxj6F/D7b6ACgioO/upEa2Rro7t uyJg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=UHRaAOWRO45il7vCWYolP2vMSHNJY77rtpcEDPLreFo=; b=SOijBJvIgOce2EFpNxMyERObgQe9m95TBLQ3pu3qLQXq4wpVCekgjTIMl8Ms850+g2 4lWxnk5yS0+fjV3e9810Yd6Ts7tpOHDuEMgNqrmIx+lFTZ8sawLVux8voJaE8KKrNRVv KWseAChGtAGOz+ulNKYCparpik2ga98nYeZun9wqAlH4ZG7ZhZkiIKlOwT2MaeE2pT0d W2qcMk6JV59pd9oh73knnPj5uPksFNeZ2wnURbjui5B+GfKEbwaVzdrwhOoz6AJDtNlf Msx5YQZhA1WhsgZJnZgOhCi0+ntzImMpesG5a+ulpxa8Yy5l8SUC05WJVk6lvHGLgZcz ACBg== X-Gm-Message-State: AO0yUKUrqKRPpC21ArbpoqtVF4/DpUAZPkeRTxKyrKoWGGEZV1z9JRmQ J/s56K0pz8c0E7mGydiuxLiuoBBv/Q4= X-Google-Smtp-Source: AK7set8Uq/nZgRLZFfJuxWYQ4ij1Dw1PY56gBuOAhM9aSZ73J6nsbhc6qz5TmHaa+yP435YL3FPEkHcMzr0= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a05:6902:10f:b0:855:fa17:4f5f with SMTP id o15-20020a056902010f00b00855fa174f5fmr80ybh.1.1676524681354; Wed, 15 Feb 2023 21:18:01 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:18 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-4-surenb@google.com> Subject: [PATCH v3 03/35] maple_tree: Fix freeing of nodes in rcu mode From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Liam Howlett , Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Liam Howlett The walk to destroy the nodes was not always setting the node type and would result in a destroy method potentially using the values as nodes. Avoid this by setting the correct node types. This is necessary for the RCU mode of the maple tree. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett Signed-off-by: Suren Baghdasaryan --- lib/maple_tree.c | 73 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 11 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 089cd76ec379..44d6ce30b28e 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -902,6 +902,44 @@ static inline void ma_set_meta(struct maple_node *mn, = enum maple_type mt, meta->end =3D end; } =20 +/* + * mas_clear_meta() - clear the metadata information of a node, if it exis= ts + * @mas: The maple state + * @mn: The maple node + * @mt: The maple node type + * @offset: The offset of the highest sub-gap in this node. + * @end: The end of the data in this node. + */ +static inline void mas_clear_meta(struct ma_state *mas, struct maple_node = *mn, + enum maple_type mt) +{ + struct maple_metadata *meta; + unsigned long *pivots; + void __rcu **slots; + void *next; + + switch (mt) { + case maple_range_64: + pivots =3D mn->mr64.pivot; + if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { + slots =3D mn->mr64.slot; + next =3D mas_slot_locked(mas, slots, + MAPLE_RANGE64_SLOTS - 1); + if (unlikely((mte_to_node(next) && mte_node_type(next)))) + return; /* The last slot is a node, no metadata */ + } + fallthrough; + case maple_arange_64: + meta =3D ma_meta(mn, mt); + break; + default: + return; + } + + meta->gap =3D 0; + meta->end =3D 0; +} + /* * ma_meta_end() - Get the data end of a node from the metadata * @mn: The maple node @@ -5455,20 +5493,22 @@ static inline int mas_rev_alloc(struct ma_state *ma= s, unsigned long min, * mas_dead_leaves() - Mark all leaves of a node as dead. * @mas: The maple state * @slots: Pointer to the slot array + * @type: The maple node type * * Must hold the write lock. * * Return: The number of leaves marked as dead. */ static inline -unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots) +unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots, + enum maple_type mt) { struct maple_node *node; enum maple_type type; void *entry; int offset; =20 - for (offset =3D 0; offset < mt_slot_count(mas->node); offset++) { + for (offset =3D 0; offset < mt_slots[mt]; offset++) { entry =3D mas_slot_locked(mas, slots, offset); type =3D mte_node_type(entry); node =3D mte_to_node(entry); @@ -5487,14 +5527,13 @@ unsigned char mas_dead_leaves(struct ma_state *mas,= void __rcu **slots) =20 static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offs= et) { - struct maple_node *node, *next; + struct maple_node *next; void __rcu **slots =3D NULL; =20 next =3D mas_mn(mas); do { - mas->node =3D ma_enode_ptr(next); - node =3D mas_mn(mas); - slots =3D ma_slots(node, node->type); + mas->node =3D mt_mk_node(next, next->type); + slots =3D ma_slots(next, next->type); next =3D mas_slot_locked(mas, slots, offset); offset =3D 0; } while (!ma_is_leaf(next->type)); @@ -5558,11 +5597,14 @@ static inline void __rcu **mas_destroy_descend(stru= ct ma_state *mas, node =3D mas_mn(mas); slots =3D ma_slots(node, mte_node_type(mas->node)); next =3D mas_slot_locked(mas, slots, 0); - if ((mte_dead_node(next))) + if ((mte_dead_node(next))) { + mte_to_node(next)->type =3D mte_node_type(next); next =3D mas_slot_locked(mas, slots, 1); + } =20 mte_set_node_dead(mas->node); node->type =3D mte_node_type(mas->node); + mas_clear_meta(mas, node, node->type); node->piv_parent =3D prev; node->parent_slot =3D offset; offset =3D 0; @@ -5582,13 +5624,18 @@ static void mt_destroy_walk(struct maple_enode *eno= de, unsigned char ma_flags, =20 MA_STATE(mas, &mt, 0, 0); =20 - if (mte_is_leaf(enode)) + mas.node =3D enode; + if (mte_is_leaf(enode)) { + node->type =3D mte_node_type(enode); goto free_leaf; + } =20 + ma_flags &=3D ~MT_FLAGS_LOCK_MASK; mt_init_flags(&mt, ma_flags); mas_lock(&mas); =20 - mas.node =3D start =3D enode; + mte_to_node(enode)->ma_flags =3D ma_flags; + start =3D enode; slots =3D mas_destroy_descend(&mas, start, 0); node =3D mas_mn(&mas); do { @@ -5596,7 +5643,8 @@ static void mt_destroy_walk(struct maple_enode *enode= , unsigned char ma_flags, unsigned char offset; struct maple_enode *parent, *tmp; =20 - node->slot_len =3D mas_dead_leaves(&mas, slots); + node->type =3D mte_node_type(mas.node); + node->slot_len =3D mas_dead_leaves(&mas, slots, node->type); if (free) mt_free_bulk(node->slot_len, slots); offset =3D node->parent_slot + 1; @@ -5620,7 +5668,8 @@ static void mt_destroy_walk(struct maple_enode *enode= , unsigned char ma_flags, } while (start !=3D mas.node); =20 node =3D mas_mn(&mas); - node->slot_len =3D mas_dead_leaves(&mas, slots); + node->type =3D mte_node_type(mas.node); + node->slot_len =3D mas_dead_leaves(&mas, slots, node->type); if (free) mt_free_bulk(node->slot_len, slots); =20 @@ -5630,6 +5679,8 @@ static void mt_destroy_walk(struct maple_enode *enode= , unsigned char ma_flags, free_leaf: if (free) mt_free_rcu(&node->rcu); + else + mas_clear_meta(&mas, node, node->type); } =20 /* --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id EC30AC636CC for ; Thu, 16 Feb 2023 05:18:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229609AbjBPFSW (ORCPT ); Thu, 16 Feb 2023 00:18:22 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44750 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229571AbjBPFSF (ORCPT ); Thu, 16 Feb 2023 00:18:05 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0CBF41BACE for ; Wed, 15 Feb 2023 21:18:05 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id u2-20020a25ab02000000b0090f2c84a6a4so856822ybi.13 for ; Wed, 15 Feb 2023 21:18:05 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=bYTu7nw04Y/eGAN3i9eCzY/Ke38OAVixcrHXlu+jNFw=; b=WNV0WuL9vmXLIvOmuVEVNJHnS2Qfi+AtiXnEC8qLvBZk4nQfRysiQv+n6V2TYm/oOI 8PVrbtoCrNJAR827RYRUq+srgPH7ettT9CkewSYs2c20qjx3DSR6OCNLU8I9VtlWJ1vK BTnTnTmkn3I2d2LJu6019qpcNY3Rtyhwbcu+BTERQU4CpKL9XQ+m6KDQOlEen5oxaVLd mpn44+vsVlTTH8ejjgOPKl/5T26Yr75D1Q+Pk5P8P+e6MTdUqg4koF2LdV4PYx+3j32Q lvmdVkZra/AZy5zB6Ynbz9xE0KdiJ+yp/dRpPvAGFlnQDPvuVze/cOT0f1MBvOftG1xW MgmQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=bYTu7nw04Y/eGAN3i9eCzY/Ke38OAVixcrHXlu+jNFw=; b=Ania1ZLP9q/F4RNkOFXcdEJbZv56ecwm5AGJQ9BhMAx+judzw6GBWo1HF92JV9TOB4 bjILJPSGNkUO7RbZir+GuL9z6gheQem6474yDRPwhbbaDTvHToXpHlX7eQflngYsMFHo XnDDNy2/dZAViLuSu38r5WcJh5Ps4LQY2KXaDOkscenI5vFz8RszXUrGF+cDoLm32rs2 dvrjjr9M9kU34QdZmqTlH9OtRkQCMoI6GbV90CoSPJV25R/UdX//B5ikj55lEXfRdtWU gZgSu81xF/ByoTH9sRWxPFB7FBKiR8BLCTCmbxgIo3OLs4iI+mdoZO6MrJu2uPoXmib2 Usvg== X-Gm-Message-State: AO0yUKVXkf08Q0JCIBm+kndP+PICAZpCLDEP/kIwL2fle1gCqMH2c/dP 0O/EA20CYPlT0fEyvO9HxerCba7I4NY= X-Google-Smtp-Source: AK7set+6hui8S8GzrNNsQxXJRqNjwGdwQ7uNZMDuv5k3QQ7TT5mar5i4p4eV6/CeDDFvXWED43V4tYaI94g= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a05:6902:1024:b0:8fc:686c:cf87 with SMTP id x4-20020a056902102400b008fc686ccf87mr15ybt.4.1676524683783; Wed, 15 Feb 2023 21:18:03 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:19 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-5-surenb@google.com> Subject: [PATCH v3 04/35] maple_tree: remove extra smp_wmb() from mas_dead_leaves() From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Liam Howlett , Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Liam Howlett The call to mte_set_dead_node() before the smp_wmb() already calls smp_wmb() so this is not needed. This is an optimization for the RCU mode of the maple tree. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett Signed-off-by: Suren Baghdasaryan --- lib/maple_tree.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 44d6ce30b28e..3d5ab02f981a 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5517,7 +5517,6 @@ unsigned char mas_dead_leaves(struct ma_state *mas, v= oid __rcu **slots, break; =20 mte_set_node_dead(entry); - smp_wmb(); /* Needed for RCU */ node->type =3D type; rcu_assign_pointer(slots[offset], node); } --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 9A1CBC61DA4 for ; Thu, 16 Feb 2023 05:18:28 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229622AbjBPFS0 (ORCPT ); Thu, 16 Feb 2023 00:18:26 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45070 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229587AbjBPFSK (ORCPT ); Thu, 16 Feb 2023 00:18:10 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AA40A410A1 for ; Wed, 15 Feb 2023 21:18:07 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-507aac99fdfso8543797b3.11 for ; Wed, 15 Feb 2023 21:18:07 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=rDMlMLTEmJU/Y3crV2atsbCBmpKXyma0yO0OAyc8vDw=; b=nzJ7S/CJB/US/rZvNC3fs8J8fXfgfseovx93tCDYzLZ8vkCeR9OXHCG0j7Ki073PZN rg58gpprBwFVBX+ke7DxYuYcgkrSQGYt8YR1LUHO8qrBrJHNZzYIw0fWkCpsF9iepuD5 iMfSL7cYbKTG2WSIKdzRhuGyAIwfPFbrqz4Ug3Wxq2wbPU6P8gVi3bWvRrz9dl4kYYU2 r7gzF5YCRjQDAYOvUYbM1k4rIOQJ+5ffpqKzLjZIEvFTLYyWs4Q62tIpz4Jjx53AOh2l MDbSaQOmeWEl4DEmE5lvlD2J9UkQB4Iedo6XxOd2amw2xyG9CIFDXUp1l5UWlLjXi78n VOeg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=rDMlMLTEmJU/Y3crV2atsbCBmpKXyma0yO0OAyc8vDw=; b=rRcURjHgly6C9phVmCfv/Qwn3KA2i2hsOFAed/MKybTvZZ1H5qmL+FstZVXszmjz0s kYPvlU5p1hQs9pDnu6C5oEPfdFkggH6lKVkenQN83A7Fb6qpJuihbPkpivhCpb+SsubO vxcyXiHc5XDC8gN6s7HDbfI1qrL9P3Qj8W388w3UbL9xW3CKKUoJtYDU3vwKKteLN05p 6pMCdCOTj7+AvpGdTinzZ5M0OL04Cr4bJOagOsR0eVSalILZ9XrKz6fG7ukXGEccCgoH gjXNE47x+mNYQbxhcWB4GzAdKhqJnZAk5eq9jqhiS3qPIWrOgEA0tTLZam4qnx5Jjn2T N/wA== X-Gm-Message-State: AO0yUKWy7RDm+0TL1UvYwALTkYC/7DsIlDeLUdpPxM2TGRzCzr5GpnYL M6YmM0fuL2oQGSVGHrWdRn9fhicOiHI= X-Google-Smtp-Source: AK7set/lb47TNs4WZdV/lfneGa3vlSnF3eCz7HT/YLrrRFQlj7qxX88gXTKYg9jHepFjlMCGZaF+lFhq4LY= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a81:4309:0:b0:52e:c2c5:b75e with SMTP id q9-20020a814309000000b0052ec2c5b75emr19ywa.2.1676524686391; Wed, 15 Feb 2023 21:18:06 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:20 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-6-surenb@google.com> Subject: [PATCH v3 05/35] maple_tree: Fix write memory barrier of nodes once dead for RCU mode From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, "Liam R. Howlett" , Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Liam R. Howlett" During the development of the maple tree, the strategy of freeing multiple nodes changed and, in the process, the pivots were reused to store pointers to dead nodes. To ensure the readers see accurate pivots, the writers need to mark the nodes as dead and call smp_wmb() to ensure any readers can identify the node as dead before using the pivot values. There were two places where the old method of marking the node as dead without smp_wmb() were being used, which resulted in RCU readers seeing the wrong pivot value before seeing the node was dead. Fix this race condition by using mte_set_node_dead() which has the smp_wmb() call to ensure the race is closed. Add a WARN_ON() to the ma_free_rcu() call to ensure all nodes being freed are marked as dead to ensure there are no other call paths besides the two updated paths. This is necessary for the RCU mode of the maple tree. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam R. Howlett Signed-off-by: Suren Baghdasaryan --- lib/maple_tree.c | 7 +++++-- tools/testing/radix-tree/maple.c | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 3d5ab02f981a..6b6eddadd9d2 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -185,7 +185,7 @@ static void mt_free_rcu(struct rcu_head *head) */ static void ma_free_rcu(struct maple_node *node) { - node->parent =3D ma_parent_ptr(node); + WARN_ON(node->parent !=3D ma_parent_ptr(node)); call_rcu(&node->rcu, mt_free_rcu); } =20 @@ -1778,8 +1778,10 @@ static inline void mas_replace(struct ma_state *mas,= bool advanced) rcu_assign_pointer(slots[offset], mas->node); } =20 - if (!advanced) + if (!advanced) { + mte_set_node_dead(old_enode); mas_free(mas, old_enode); + } } =20 /* @@ -4218,6 +4220,7 @@ static inline bool mas_wr_node_store(struct ma_wr_sta= te *wr_mas) done: mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end); if (in_rcu) { + mte_set_node_dead(mas->node); mas->node =3D mt_mk_node(newnode, wr_mas->type); mas_replace(mas, false); } else { diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/ma= ple.c index 958ee9bdb316..4c89ff333f6f 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -108,6 +108,7 @@ static noinline void check_new_node(struct maple_tree *= mt) MT_BUG_ON(mt, mn->slot[1] !=3D NULL); MT_BUG_ON(mt, mas_allocated(&mas) !=3D 0); =20 + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); mas.node =3D MAS_START; mas_nomem(&mas, GFP_KERNEL); @@ -160,6 +161,7 @@ static noinline void check_new_node(struct maple_tree *= mt) MT_BUG_ON(mt, mas_allocated(&mas) !=3D i); MT_BUG_ON(mt, !mn); MT_BUG_ON(mt, not_empty(mn)); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); } =20 @@ -192,6 +194,7 @@ static noinline void check_new_node(struct maple_tree *= mt) MT_BUG_ON(mt, not_empty(mn)); MT_BUG_ON(mt, mas_allocated(&mas) !=3D i - 1); MT_BUG_ON(mt, !mn); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); } =20 @@ -210,6 +213,7 @@ static noinline void check_new_node(struct maple_tree *= mt) mn =3D mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); MT_BUG_ON(mt, mas_allocated(&mas) !=3D j - 1); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); } MT_BUG_ON(mt, mas_allocated(&mas) !=3D 0); @@ -233,6 +237,7 @@ static noinline void check_new_node(struct maple_tree *= mt) MT_BUG_ON(mt, mas_allocated(&mas) !=3D i - j); mn =3D mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); MT_BUG_ON(mt, mas_allocated(&mas) !=3D i - j - 1); } @@ -269,6 +274,7 @@ static noinline void check_new_node(struct maple_tree *= mt) mn =3D mas_pop_node(&mas); /* get the next node. */ MT_BUG_ON(mt, mn =3D=3D NULL); MT_BUG_ON(mt, not_empty(mn)); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); } MT_BUG_ON(mt, mas_allocated(&mas) !=3D 0); @@ -294,6 +300,7 @@ static noinline void check_new_node(struct maple_tree *= mt) mn =3D mas_pop_node(&mas2); /* get the next node. */ MT_BUG_ON(mt, mn =3D=3D NULL); MT_BUG_ON(mt, not_empty(mn)); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); } MT_BUG_ON(mt, mas_allocated(&mas2) !=3D 0); @@ -334,10 +341,12 @@ static noinline void check_new_node(struct maple_tree= *mt) MT_BUG_ON(mt, mas_allocated(&mas) !=3D MAPLE_ALLOC_SLOTS + 2); mn =3D mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); for (i =3D 1; i <=3D MAPLE_ALLOC_SLOTS + 1; i++) { mn =3D mas_pop_node(&mas); MT_BUG_ON(mt, not_empty(mn)); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); } MT_BUG_ON(mt, mas_allocated(&mas) !=3D 0); @@ -375,6 +384,7 @@ static noinline void check_new_node(struct maple_tree *= mt) mas_node_count(&mas, i); /* Request */ mas_nomem(&mas, GFP_KERNEL); /* Fill request */ mn =3D mas_pop_node(&mas); /* get the next node. */ + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); mas_destroy(&mas); =20 @@ -382,10 +392,13 @@ static noinline void check_new_node(struct maple_tree= *mt) mas_node_count(&mas, i); /* Request */ mas_nomem(&mas, GFP_KERNEL); /* Fill request */ mn =3D mas_pop_node(&mas); /* get the next node. */ + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); mn =3D mas_pop_node(&mas); /* get the next node. */ + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); mn =3D mas_pop_node(&mas); /* get the next node. */ + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); mas_destroy(&mas); } @@ -35369,6 +35382,7 @@ static noinline void check_prealloc(struct maple_tr= ee *mt) MT_BUG_ON(mt, allocated !=3D 1 + height * 3); mn =3D mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) !=3D allocated - 1); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) !=3D 0); mas_destroy(&mas); @@ -35386,6 +35400,7 @@ static noinline void check_prealloc(struct maple_tr= ee *mt) mas_destroy(&mas); allocated =3D mas_allocated(&mas); MT_BUG_ON(mt, allocated !=3D 0); + mn->parent =3D ma_parent_ptr(mn); ma_free_rcu(mn); =20 MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) !=3D 0); @@ -35756,6 +35771,7 @@ void farmer_tests(void) tree.ma_root =3D mt_mk_node(node, maple_leaf_64); mt_dump(&tree); =20 + node->parent =3D ma_parent_ptr(node); ma_free_rcu(node); =20 /* Check things that will make lockdep angry */ --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 63070C636CC for ; Thu, 16 Feb 2023 05:18:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229676AbjBPFSr (ORCPT ); Thu, 16 Feb 2023 00:18:47 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45814 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229585AbjBPFS0 (ORCPT ); Thu, 16 Feb 2023 00:18:26 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id F413B42BD8 for ; Wed, 15 Feb 2023 21:18:10 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id e191-20020a2537c8000000b009433a21be0dso835758yba.19 for ; Wed, 15 Feb 2023 21:18:10 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=/TbDrnT4JTA4cMZmAjPRNE76LJNgcWEHHiVobOaA+Y8=; b=WMbcZd2H6xEYgh8jPBjtr3c52H6Pfawu+33AP54ppxn4975bHziKOFepXARIS5jFg+ e6WwvlJoOPdxReXGpPCKc/NAAxfX4rbkO7DDbRqhMho2D2UtZ3EYOUrPrXvxo+yY0eM4 LMQxZ5khHMqm3h0sfllZRxbm/tuunDq1ItomZ38Vc5vIB77wslGrayg6KY3mbC66vptv Do13gHFViUDuIVWARk2CRzZ0n21J2khqCauKfFko2Yy6sWDAGtNFzpXO3m4fv/AYk9g9 lA127TPhorpqkZg7qKdsS4C88Qpbq0ryHT7Lx24g1uyKmp6VAfvqXa/maK1LoDRn6fAx 1DGw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=/TbDrnT4JTA4cMZmAjPRNE76LJNgcWEHHiVobOaA+Y8=; b=InqrOQHPGViOEwqvQJpC8dX2pF5OhmMmiO6svOisN4Zj+QldjOJbPDmP9KL4H3FvWi JAYsllW/hQpCUtUaOx/9tE1k9uhKH85vwc8mu9n3VJp/LVxfxgYbQKgazDrdDvZRGBmj ZL/9AwlvFXqpooQlGFCfBI/9gcAHRCrheGK2xDKJrAtu3P8xxHssid068b0VWitF+iWl INM7WKd85QzIbSnSWaIp9NX/1TPWdM3KzIWd9FTH4TFS5hJI4Xu/1gAkiyaczr//0O8z XKc9SPPK4xqXCQExs7iU02DRfk4e6jt2toTHGosdDKAmDnr/s8SvzgpGLNTjjgJq80Fd Dj3Q== X-Gm-Message-State: AO0yUKXaUWicOVXr0NyTbw6Xz/L02Wuc7Btdwr9dPNBXAp1XmOlyBpBb XfS3ZIR5Ulnza8JuZuFAOhlb3zL0F9U= X-Google-Smtp-Source: AK7set9oeDTHuLB1nfzndbZXzwVBlMHEMtdn96b6e8LnbJxiPKVQLZhG0+zpZpEt4JvzRmnu9oC+9ACRvag= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a25:7805:0:b0:8d7:9497:cbb3 with SMTP id t5-20020a257805000000b008d79497cbb3mr12ybc.5.1676524689233; Wed, 15 Feb 2023 21:18:09 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:21 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-7-surenb@google.com> Subject: [PATCH v3 06/35] maple_tree: Add smp_rmb() to dead node detection From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, "Liam R. Howlett" , Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Liam R. Howlett" Add an smp_rmb() before reading the parent pointer to ensure that anything read from the node prior to the parent pointer hasn't been reordered ahead of this check. The is necessary for RCU mode. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Liam R. Howlett Signed-off-by: Suren Baghdasaryan --- lib/maple_tree.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 6b6eddadd9d2..8ad2d1669fad 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -539,9 +539,11 @@ static inline struct maple_node *mte_parent(const stru= ct maple_enode *enode) */ static inline bool ma_dead_node(const struct maple_node *node) { - struct maple_node *parent =3D (void *)((unsigned long) - node->parent & ~MAPLE_NODE_MASK); + struct maple_node *parent; =20 + /* Do not reorder reads from the node prior to the parent check */ + smp_rmb(); + parent =3D (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); return (parent =3D=3D node); } =20 @@ -556,6 +558,8 @@ static inline bool mte_dead_node(const struct maple_eno= de *enode) struct maple_node *parent, *node; =20 node =3D mte_to_node(enode); + /* Do not reorder reads from the node prior to the parent check */ + smp_rmb(); parent =3D mte_parent(enode); return (parent =3D=3D node); } --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id AE603C6379F for ; Thu, 16 Feb 2023 05:18:57 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229619AbjBPFS4 (ORCPT ); Thu, 16 Feb 2023 00:18:56 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45816 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229632AbjBPFSk (ORCPT ); Thu, 16 Feb 2023 00:18:40 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C7838457D5 for ; Wed, 15 Feb 2023 21:18:12 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-52f233aff21so8594177b3.7 for ; Wed, 15 Feb 2023 21:18:12 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=hX1DUaFSHqHCHFWD5Crp9NnpH/+9VHFxYkECoHgya1E=; b=ZK5oAM6AcKVtxFxzHW9PhD/PdA0+Kv3bSAspA28+ZPD1m3cnM9poVYSoqhonqkIeJU SshDVskllMZgIeSGhnCidLA6ddGXSMxz+S/xoEp08YDrkRg+7RXAXCoS6u4tI7rKulCi ggnzbiGToN5cME4XPhcHuI14WBO6+4EQsud/8F6AxE1deunG+n/R+uM3wq+OYW0Yw/xw 3Mt1GtYUQdIHi9t1kC0vmn2kgnR9+BG6rYf5dtABCjAt9PnVyIFEokbi9zalyZENG7ye R6/8svYYK3xPGp282avPu/RfQWQWw9vT/0uQU8vbTuKv0KxRDKx0B9gWO6ClqS6abquo +1jg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=hX1DUaFSHqHCHFWD5Crp9NnpH/+9VHFxYkECoHgya1E=; b=4lT4jObWMCftKY/lEJ3DG3KxiWu4QIXsWes2ERJUX2WO9NZL+dXBWL5inEStzMehjA rP0MLloHtGHaC2aErKnc7NLhHSi72v61YSNJPvjRCoZNT0v+Arv3FB2Qxb+28idxZuh/ WjsrOCB+Kzwg9MIPKI0G+r3Hmu0h1MRCqHq5XgYTsdoTDnF4R/JP5Bce2b+Ws213CazK 5xqsgimu2Uj4A79/VHM+qQxsJQoj7pMxgdw+16dLgUvO/mBKeQysOeEFrgUGgGx3yjM/ kZ03oWFRviYZ+GSAM2W/0FAzdn32uPVVYgnJj5HwoU+0xvpC8etjZ4vLmFSSmf28zRaU zkNA== X-Gm-Message-State: AO0yUKUjDVVwJ2xtn31VeRoIClNGidaPmL/Ys97+B8pDFcdDfJBN8WNV JjBJe0o5g5x5oj1EgfosgxY7VeGgRaE= X-Google-Smtp-Source: AK7set+BUdfWyfJ4CpYtDrI1pKL/2vKMY8TRPzm0GzgZQonEWMOroSKkOakotgIDyZRMjNiR+HE0zfFqp3Q= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a05:6902:4ea:b0:926:d02a:c18a with SMTP id w10-20020a05690204ea00b00926d02ac18amr43355ybs.233.1676524691938; Wed, 15 Feb 2023 21:18:11 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:22 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-8-surenb@google.com> Subject: [PATCH v3 07/35] maple_tree: Add RCU lock checking to rcu callback functions From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, "Liam R. Howlett" , Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Liam R. Howlett" Dereferencing RCU objects within the RCU callback without the RCU check has caused lockdep to complain. Fix the RCU dereferencing by using the RCU callback lock to ensure the operation is safe. Also stop creating a new lock to use for dereferencing during destruction of the tree or subtree. Instead, pass through a pointer to the tree that has the lock that is held for RCU dereferencing checking. It also does not make sense to use the maple state in the freeing scenario as the tree walk is a special case where the tree no longer has the normal encodings and parent pointers. Fixes: 54a611b60590 ("Maple Tree: add new data structure") Reported-by: Suren Baghdasaryan Signed-off-by: Liam R. Howlett --- lib/maple_tree.c | 188 ++++++++++++++++++++++++----------------------- 1 file changed, 96 insertions(+), 92 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 8ad2d1669fad..2be86368237d 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -824,6 +824,11 @@ static inline void *mt_slot(const struct maple_tree *m= t, return rcu_dereference_check(slots[offset], mt_locked(mt)); } =20 +static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slo= ts, + unsigned char offset) +{ + return rcu_dereference_protected(slots[offset], mt_locked(mt)); +} /* * mas_slot_locked() - Get the slot value when holding the maple tree lock. * @mas: The maple state @@ -835,7 +840,7 @@ static inline void *mt_slot(const struct maple_tree *mt, static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slo= ts, unsigned char offset) { - return rcu_dereference_protected(slots[offset], mt_locked(mas->tree)); + return mt_slot_locked(mas->tree, slots, offset); } =20 /* @@ -907,34 +912,35 @@ static inline void ma_set_meta(struct maple_node *mn,= enum maple_type mt, } =20 /* - * mas_clear_meta() - clear the metadata information of a node, if it exis= ts - * @mas: The maple state + * mt_clear_meta() - clear the metadata information of a node, if it exists + * @mt: The maple tree * @mn: The maple node - * @mt: The maple node type + * @type: The maple node type * @offset: The offset of the highest sub-gap in this node. * @end: The end of the data in this node. */ -static inline void mas_clear_meta(struct ma_state *mas, struct maple_node = *mn, - enum maple_type mt) +static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node = *mn, + enum maple_type type) { struct maple_metadata *meta; unsigned long *pivots; void __rcu **slots; void *next; =20 - switch (mt) { + switch (type) { case maple_range_64: pivots =3D mn->mr64.pivot; if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { slots =3D mn->mr64.slot; - next =3D mas_slot_locked(mas, slots, - MAPLE_RANGE64_SLOTS - 1); - if (unlikely((mte_to_node(next) && mte_node_type(next)))) - return; /* The last slot is a node, no metadata */ + next =3D mt_slot_locked(mt, slots, + MAPLE_RANGE64_SLOTS - 1); + if (unlikely((mte_to_node(next) && + mte_node_type(next)))) + return; /* no metadata, could be node */ } fallthrough; case maple_arange_64: - meta =3D ma_meta(mn, mt); + meta =3D ma_meta(mn, type); break; default: return; @@ -5497,7 +5503,7 @@ static inline int mas_rev_alloc(struct ma_state *mas,= unsigned long min, } =20 /* - * mas_dead_leaves() - Mark all leaves of a node as dead. + * mte_dead_leaves() - Mark all leaves of a node as dead. * @mas: The maple state * @slots: Pointer to the slot array * @type: The maple node type @@ -5507,16 +5513,16 @@ static inline int mas_rev_alloc(struct ma_state *ma= s, unsigned long min, * Return: The number of leaves marked as dead. */ static inline -unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots, - enum maple_type mt) +unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree= *mt, + void __rcu **slots) { struct maple_node *node; enum maple_type type; void *entry; int offset; =20 - for (offset =3D 0; offset < mt_slots[mt]; offset++) { - entry =3D mas_slot_locked(mas, slots, offset); + for (offset =3D 0; offset < mt_slot_count(enode); offset++) { + entry =3D mt_slot(mt, slots, offset); type =3D mte_node_type(entry); node =3D mte_to_node(entry); /* Use both node and type to catch LE & BE metadata */ @@ -5531,162 +5537,160 @@ unsigned char mas_dead_leaves(struct ma_state *ma= s, void __rcu **slots, return offset; } =20 -static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offs= et) +/** + * mte_dead_walk() - Walk down a dead tree to just before the leaves + * @enode: The maple encoded node + * @offset: The starting offset + * + * Note: This can only be used from the RCU callback context. + */ +static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned cha= r offset) { - struct maple_node *next; + struct maple_node *node, *next; void __rcu **slots =3D NULL; =20 - next =3D mas_mn(mas); + next =3D mte_to_node(*enode); do { - mas->node =3D mt_mk_node(next, next->type); - slots =3D ma_slots(next, next->type); - next =3D mas_slot_locked(mas, slots, offset); + *enode =3D ma_enode_ptr(next); + node =3D mte_to_node(*enode); + slots =3D ma_slots(node, node->type); + next =3D rcu_dereference_protected(slots[offset], + lock_is_held(&rcu_callback_map)); offset =3D 0; } while (!ma_is_leaf(next->type)); =20 return slots; } =20 +/** + * mt_free_walk() - Walk & free a tree in the RCU callback context + * @head: The RCU head that's within the node. + * + * Note: This can only be used from the RCU callback context. + */ static void mt_free_walk(struct rcu_head *head) { void __rcu **slots; struct maple_node *node, *start; - struct maple_tree mt; + struct maple_enode *enode; unsigned char offset; enum maple_type type; - MA_STATE(mas, &mt, 0, 0); =20 node =3D container_of(head, struct maple_node, rcu); =20 if (ma_is_leaf(node->type)) goto free_leaf; =20 - mt_init_flags(&mt, node->ma_flags); - mas_lock(&mas); start =3D node; - mas.node =3D mt_mk_node(node, node->type); - slots =3D mas_dead_walk(&mas, 0); - node =3D mas_mn(&mas); + enode =3D mt_mk_node(node, node->type); + slots =3D mte_dead_walk(&enode, 0); + node =3D mte_to_node(enode); do { mt_free_bulk(node->slot_len, slots); offset =3D node->parent_slot + 1; - mas.node =3D node->piv_parent; - if (mas_mn(&mas) =3D=3D node) - goto start_slots_free; - - type =3D mte_node_type(mas.node); - slots =3D ma_slots(mte_to_node(mas.node), type); - if ((offset < mt_slots[type]) && (slots[offset])) - slots =3D mas_dead_walk(&mas, offset); - - node =3D mas_mn(&mas); + enode =3D node->piv_parent; + if (mte_to_node(enode) =3D=3D node) + goto free_leaf; + + type =3D mte_node_type(enode); + slots =3D ma_slots(mte_to_node(enode), type); + if ((offset < mt_slots[type]) && + rcu_dereference_protected(slots[offset], + lock_is_held(&rcu_callback_map))) + slots =3D mte_dead_walk(&enode, offset); + node =3D mte_to_node(enode); } while ((node !=3D start) || (node->slot_len < offset)); =20 slots =3D ma_slots(node, node->type); mt_free_bulk(node->slot_len, slots); =20 -start_slots_free: - mas_unlock(&mas); free_leaf: mt_free_rcu(&node->rcu); } =20 -static inline void __rcu **mas_destroy_descend(struct ma_state *mas, - struct maple_enode *prev, unsigned char offset) +static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, + struct maple_tree *mt, struct maple_enode *prev, unsigned char offset) { struct maple_node *node; - struct maple_enode *next =3D mas->node; + struct maple_enode *next =3D *enode; void __rcu **slots =3D NULL; + enum maple_type type; + unsigned char next_offset =3D 0; =20 do { - mas->node =3D next; - node =3D mas_mn(mas); - slots =3D ma_slots(node, mte_node_type(mas->node)); - next =3D mas_slot_locked(mas, slots, 0); - if ((mte_dead_node(next))) { - mte_to_node(next)->type =3D mte_node_type(next); - next =3D mas_slot_locked(mas, slots, 1); - } + *enode =3D next; + node =3D mte_to_node(*enode); + type =3D mte_node_type(*enode); + slots =3D ma_slots(node, type); + next =3D mt_slot_locked(mt, slots, next_offset); + if ((mte_dead_node(next))) + next =3D mt_slot_locked(mt, slots, ++next_offset); =20 - mte_set_node_dead(mas->node); - node->type =3D mte_node_type(mas->node); - mas_clear_meta(mas, node, node->type); + mte_set_node_dead(*enode); + node->type =3D type; node->piv_parent =3D prev; node->parent_slot =3D offset; - offset =3D 0; - prev =3D mas->node; + offset =3D next_offset; + next_offset =3D 0; + prev =3D *enode; } while (!mte_is_leaf(next)); =20 return slots; } =20 -static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_fl= ags, +static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *= mt, bool free) { void __rcu **slots; struct maple_node *node =3D mte_to_node(enode); struct maple_enode *start; - struct maple_tree mt; - - MA_STATE(mas, &mt, 0, 0); =20 - mas.node =3D enode; if (mte_is_leaf(enode)) { node->type =3D mte_node_type(enode); goto free_leaf; } =20 - ma_flags &=3D ~MT_FLAGS_LOCK_MASK; - mt_init_flags(&mt, ma_flags); - mas_lock(&mas); - - mte_to_node(enode)->ma_flags =3D ma_flags; start =3D enode; - slots =3D mas_destroy_descend(&mas, start, 0); - node =3D mas_mn(&mas); + slots =3D mte_destroy_descend(&enode, mt, start, 0); + node =3D mte_to_node(enode); // Updated in the above call. do { enum maple_type type; unsigned char offset; struct maple_enode *parent, *tmp; =20 - node->type =3D mte_node_type(mas.node); - node->slot_len =3D mas_dead_leaves(&mas, slots, node->type); + node->slot_len =3D mte_dead_leaves(enode, mt, slots); if (free) mt_free_bulk(node->slot_len, slots); offset =3D node->parent_slot + 1; - mas.node =3D node->piv_parent; - if (mas_mn(&mas) =3D=3D node) - goto start_slots_free; + enode =3D node->piv_parent; + if (mte_to_node(enode) =3D=3D node) + goto free_leaf; =20 - type =3D mte_node_type(mas.node); - slots =3D ma_slots(mte_to_node(mas.node), type); + type =3D mte_node_type(enode); + slots =3D ma_slots(mte_to_node(enode), type); if (offset >=3D mt_slots[type]) goto next; =20 - tmp =3D mas_slot_locked(&mas, slots, offset); + tmp =3D mt_slot_locked(mt, slots, offset); if (mte_node_type(tmp) && mte_to_node(tmp)) { - parent =3D mas.node; - mas.node =3D tmp; - slots =3D mas_destroy_descend(&mas, parent, offset); + parent =3D enode; + enode =3D tmp; + slots =3D mte_destroy_descend(&enode, mt, parent, offset); } next: - node =3D mas_mn(&mas); - } while (start !=3D mas.node); + node =3D mte_to_node(enode); + } while (start !=3D enode); =20 - node =3D mas_mn(&mas); - node->type =3D mte_node_type(mas.node); - node->slot_len =3D mas_dead_leaves(&mas, slots, node->type); + node =3D mte_to_node(enode); + node->slot_len =3D mte_dead_leaves(enode, mt, slots); if (free) mt_free_bulk(node->slot_len, slots); =20 -start_slots_free: - mas_unlock(&mas); - free_leaf: if (free) mt_free_rcu(&node->rcu); else - mas_clear_meta(&mas, node, node->type); + mt_clear_meta(mt, node, node->type); } =20 /* @@ -5702,10 +5706,10 @@ static inline void mte_destroy_walk(struct maple_en= ode *enode, struct maple_node *node =3D mte_to_node(enode); =20 if (mt_in_rcu(mt)) { - mt_destroy_walk(enode, mt->ma_flags, false); + mt_destroy_walk(enode, mt, false); call_rcu(&node->rcu, mt_free_walk); } else { - mt_destroy_walk(enode, mt->ma_flags, true); + mt_destroy_walk(enode, mt, true); } } =20 --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 020FDC61DA4 for ; Thu, 16 Feb 2023 05:19:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229674AbjBPFS7 (ORCPT ); Thu, 16 Feb 2023 00:18:59 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45976 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229650AbjBPFSn (ORCPT ); Thu, 16 Feb 2023 00:18:43 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AF4F63E60A for ; Wed, 15 Feb 2023 21:18:15 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-532e96672b3so8513177b3.13 for ; Wed, 15 Feb 2023 21:18:15 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=4opi4a+4wBSqIsfjyFGdOCf+RfFv13YZtxtXh5ocOUM=; b=Xo1jDDViHSHN/alioaTDnw+zuoNPKrF9ZLXUf3ea0NE3bXPLl0z7WkGrQUW7AOG0Nr J8SsODBneQSeaMKO7nEqhtXELAqyVVzyHag3dRajS0MNlWpB1lyOtsy0oB8MM0CrQEnP Moe4iuqLi8ffYInMsJBu3lDYUcEPw1W16t68/pqdVg/PM7yHOfl4JRIe/AAV288dZ063 0OmpnGUH5/0w4iEIelRXBTS4gWduVmNFBkN/5ItORK5tiSjU8ab3rw3adCz9oFddo8Er mGSeo2MZqEM7cxpZTU0mXlOgRPyGjhgPgDiiUmvS//v9NEkdkZgUTyYfpCAeqd0avQoQ 0u1A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=4opi4a+4wBSqIsfjyFGdOCf+RfFv13YZtxtXh5ocOUM=; b=dnVvbQuX0z02PvlE2JzCE/26YFqGqDUuIpfJ30U+NHA3qw9ThWwiofaxF09hYN84uM dfyUfd3bxyoDMkeKPTZvNXwwfrHNNvz4XL6Z08lLHaaIQ9OspmlzCC625YorPrElNk5X UxBkLJcvp1dj+jRSWEDvlt+Jo4UKUUDF2GZe7gA0WMdtmFVA1ghDuEG3Ux4+F1iaLOYZ mx48eW/1E2/I867mvHHxdSyTa7hFofNT8kTgYeTJbEl+ZpPJvJehA0iwWMmv43kvBfs9 +w7AD8pmzTvb2ElHOxk0qpcrcqVkzG+bi8+/3p2eZ4TmpG+2D7u6MgS0WdUGskjnaVyP N8Vw== X-Gm-Message-State: AO0yUKXFojpPCu/ngGizB2aOfBSAirllKfpxT8J7j8c5/+krrKJUcxER n0IP5ZUAWcvq5H/Xo1JC1sW8Ym0AJRw= X-Google-Smtp-Source: AK7set+/DgIBDY3hKjRHmWHVe7yByHJ3rZUqgnqz5mohiI9GciRP+9xmqTdnoEyOpn2/QPX/HT0hsnAfle8= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a05:6902:ca:b0:8f2:9e6:47a4 with SMTP id i10-20020a05690200ca00b008f209e647a4mr3ybs.7.1676524694561; Wed, 15 Feb 2023 21:18:14 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:23 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-9-surenb@google.com> Subject: [PATCH v3 08/35] mm: Enable maple tree RCU mode by default. From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, "Liam R. Howlett" , Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: "Liam R. Howlett" Use the maple tree in RCU mode for VMA tracking. This is necessary for the use of per-VMA locking. RCU mode is enabled by default but disabled when exiting an mm and for the new tree during a fork. Also enable RCU for the tree used in munmap operations to ensure the nodes remain valid for readers. Signed-off-by: Liam R. Howlett Signed-off-by: Suren Baghdasaryan --- include/linux/mm_types.h | 3 ++- kernel/fork.c | 3 +++ mm/mmap.c | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3cd60243c625..3fd5305dbbf9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -768,7 +768,8 @@ struct mm_struct { unsigned long cpu_bitmap[]; }; =20 -#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) +#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ + MT_FLAGS_USE_RCU) extern struct mm_struct init_mm; =20 /* Pointer magic because the dynamic array size confuses some compilers. */ diff --git a/kernel/fork.c b/kernel/fork.c index 5e3029ea8e1e..5f23d5e03362 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -617,6 +617,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *= mm, if (retval) goto out; =20 + mt_clear_in_rcu(vmi.mas.tree); for_each_vma(old_vmi, mpnt) { struct file *file; =20 @@ -700,6 +701,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *= mm, retval =3D arch_dup_mmap(oldmm, mm); loop_out: vma_iter_free(&vmi); + if (!retval) + mt_set_in_rcu(vmi.mas.tree); out: mmap_write_unlock(mm); flush_tlb_mm(oldmm); diff --git a/mm/mmap.c b/mm/mmap.c index 20f21f0949dd..06c0f6686de8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2277,7 +2277,8 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct = vm_area_struct *vma, int count =3D 0; int error =3D -ENOMEM; MA_STATE(mas_detach, &mt_detach, 0, 0); - mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN); + mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & + (MT_FLAGS_LOCK_MASK | MT_FLAGS_USE_RCU)); mt_set_external_lock(&mt_detach, &mm->mmap_lock); =20 /* @@ -3042,6 +3043,7 @@ void exit_mmap(struct mm_struct *mm) */ set_bit(MMF_OOM_SKIP, &mm->flags); mmap_write_lock(mm); + mt_clear_in_rcu(&mm->mm_mt); free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb); --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5BA3AC61DA4 for ; Thu, 16 Feb 2023 05:19:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229715AbjBPFTD (ORCPT ); Thu, 16 Feb 2023 00:19:03 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46064 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229670AbjBPFSp (ORCPT ); Thu, 16 Feb 2023 00:18:45 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 50A6243452 for ; Wed, 15 Feb 2023 21:18:18 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id h14-20020a258a8e000000b00827819f87e5so917586ybl.0 for ; Wed, 15 Feb 2023 21:18:18 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=pFZiiTJ1lM5Y9cE9VJMMN6c4zys62lyfuQWFB/SvSfY=; b=hDLR1pvRXBoNPEsd5rWO1uuL2xRvkViIuLxNT0AXT2es+wKEpveR0MayIpEY9kMwnX yKqoPf3/kr2W8U8aZxHHsI3MeVglv4Uuopqy/RZQvDUL+Iw/1b3yNPaW9UiWpFiktEjT IG17SwGNRtI3Tudjcg57P2Pp6pg3eItytQO0663ElsQFpDjPji34Qnzv66OgBFV/+yja uvKllpdQQMscfNlzsu7K4yE4HNqAZ53EkNxV25Op17ac2k06MkhohfLIKcbxFpec+BdV 695/dYeBJhC1TaWGjUDZda/yGXvfwWspsF3C1nfSkAJuqCo5le3F23yvLzWMlOVw2Nnw Ls0Q== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=pFZiiTJ1lM5Y9cE9VJMMN6c4zys62lyfuQWFB/SvSfY=; b=ZH3ovrhLib0C7VYfFMEp9wUCa1a0sNv43nTZyEj7wvYn8i+tt1UwDkeH0mm+c5GdFV m6U2FqPgN1XTW1NvUcnZoiu5cHbf7pkaCTbdM6Ly9FxwYz+BZKI0YJQQC9OOnYuPchRf pYjy/ldsOWwqAhhbUuYCvaiMiuCG7GVhBdl5imo8PPcSOGNA43Qb+9VVPXHqcM0XSLs8 e3fEdLmjX3zLLVgaxQSvfbdFVi30PGAUS7IsaxuJ5B2FP+QLxtT7n9CAZQFFtmXBAAe8 V/r7eTk/WMi22nDvlW2v0dhD9z43UlHQQi9a9u2z3wUUAaqWE+aDp0W9nN46vnO6aHpV TTnw== X-Gm-Message-State: AO0yUKX7nXSIQWJMT9c0VHqjqeF3J7x+PExHhiOA20f2Co81l7nt81S6 J3k82obhH4VCA8PRLHkNUrmr8IAZjtM= X-Google-Smtp-Source: AK7set/vOsUP0PVWaWbTlqFxALvIpJBlIbMQnTBHBu2Oot2RdkwCU0iTN4oS1VPXE/uz69lz5N2GmOvT6GU= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a81:9e45:0:b0:50f:f163:7072 with SMTP id n5-20020a819e45000000b0050ff1637072mr442506ywj.285.1676524697322; Wed, 15 Feb 2023 21:18:17 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:24 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-10-surenb@google.com> Subject: [PATCH v3 09/35] mm: introduce CONFIG_PER_VMA_LOCK From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" This configuration variable will be used to build the support for VMA locking during page fault handling. This is enabled on supported architectures with SMP and MMU set. The architecture support is needed since the page fault handler is called from the architecture's page faulting code which needs modifications to handle faults under VMA lock. Signed-off-by: Suren Baghdasaryan --- mm/Kconfig | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/mm/Kconfig b/mm/Kconfig index ca98b2072df5..2e4a7e61768a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1211,6 +1211,18 @@ config LRU_GEN_STATS This option has a per-memcg and per-node memory overhead. # } =20 +config ARCH_SUPPORTS_PER_VMA_LOCK + def_bool n + +config PER_VMA_LOCK + def_bool y + depends on ARCH_SUPPORTS_PER_VMA_LOCK && MMU && SMP + help + Allow per-vma locking during page fault handling. + + This feature allows locking each virtual memory area separately when + handling page faults instead of taking mmap_lock. + source "mm/damon/Kconfig" =20 endmenu --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id B53E0C6379F for ; Thu, 16 Feb 2023 05:19:21 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229686AbjBPFTU (ORCPT ); Thu, 16 Feb 2023 00:19:20 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46082 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229647AbjBPFSs (ORCPT ); Thu, 16 Feb 2023 00:18:48 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5A62946153 for ; Wed, 15 Feb 2023 21:18:20 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id a4-20020a5b0004000000b006fdc6aaec4fso842945ybp.20 for ; Wed, 15 Feb 2023 21:18:20 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=nnxUMMQI6vT0M/Q5sa1IAaTl/WTbc27CfHmxL3/FviM=; b=KnhAt7quYBwXQRhfuSYaJDek4wttI7RDXOOACIL/F63Nz/58eVv03No3Zqh2UIwNdl TkKGisyfzRrXImR2jOkU2wcQekf+kt9/XxXIsRoXlqskuIeX6hOhHTNZXgdfaMplle7K SAATQyrMS3HXKQy7j0lc9Kc3dK3KueDsfgI8ryy5OQhnNBROeaCbU2Hmg2SLUQOwNbgt XwBV7zK7XX+1ClOK24Y2b0Ex8amo9ljLG0ZokYkKzEniw9mpgT07P+84GOQVR154zkPK 4YJcp2AEGd79KcxB1dsy4RuRGjRlYt9yxaLz/0Wa2WtK7Y2fqnfOErlG1kh3RHBuBBJK 58yQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=nnxUMMQI6vT0M/Q5sa1IAaTl/WTbc27CfHmxL3/FviM=; b=cizoPD8FGYc8oxJRSDOtod6y2ksbkM9Nf7KWgULp83n5IriPxwPRahnd4O3DT1HBtx Cq7BTBuNRYLwvbaRm+q2TDaXBlnOv0y1Dgx+rJi4JCaloNIGSMJI2SSO+f+6c2Dvtyik NgEJxQXkQS6sY3c+AnrggVru9x2lictY3igIprMtmgP4WHNz5lIw/leWt4goToMKI5fG ORyew4tAYjNSwcA804jmA8rNHiNtptIZZTXCReLLrMpKR452rgkDQ7akhFJYCSD5Gv06 h3HVRfTLBYXVcn05/EvtplkZw8kaYSylqyHXvDoQFEUG3sTyeDLfkoL2sXrMZyc1Yinp n7cg== X-Gm-Message-State: AO0yUKUKPzTYf3nE/WewVHG8+B4azpm9YHaBTLhUC7pzxdOnfYOY7QiD MF14gF/ag0j+e1tgo2kv26XY7rzLgRI= X-Google-Smtp-Source: AK7set+tlzm8noztFtNGPsWUqf3Z7zno+mYjAZ9UmWznIF5F5gJ5LrgX6pBfJhOwGepp15lWUy19j/IuAtc= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a81:a20c:0:b0:52e:f79e:b639 with SMTP id w12-20020a81a20c000000b0052ef79eb639mr389199ywg.283.1676524699450; Wed, 15 Feb 2023 21:18:19 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:25 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-11-surenb@google.com> Subject: [PATCH v3 10/35] mm: rcu safe VMA freeing From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Michel Lespinasse This prepares for page faults handling under VMA lock, looking up VMAs under protection of an rcu read lock, instead of the usual mmap read lock. Signed-off-by: Michel Lespinasse Signed-off-by: Suren Baghdasaryan --- include/linux/mm_types.h | 13 ++++++++++--- kernel/fork.c | 20 +++++++++++++++++++- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3fd5305dbbf9..fb4e2afad787 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -480,9 +480,16 @@ struct anon_vma_name { struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ =20 - unsigned long vm_start; /* Our start address within vm_mm. */ - unsigned long vm_end; /* The first byte after our end address - within vm_mm. */ + union { + struct { + /* VMA covers [vm_start; vm_end) addresses within mm */ + unsigned long vm_start; + unsigned long vm_end; + }; +#ifdef CONFIG_PER_VMA_LOCK + struct rcu_head vm_rcu; /* Used for deferred freeing. */ +#endif + }; =20 struct mm_struct *vm_mm; /* The address space we belong to. */ =20 diff --git a/kernel/fork.c b/kernel/fork.c index 5f23d5e03362..314d51eb91da 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -479,12 +479,30 @@ struct vm_area_struct *vm_area_dup(struct vm_area_str= uct *orig) return new; } =20 -void vm_area_free(struct vm_area_struct *vma) +static void __vm_area_free(struct vm_area_struct *vma) { free_anon_vma_name(vma); kmem_cache_free(vm_area_cachep, vma); } =20 +#ifdef CONFIG_PER_VMA_LOCK +static void vm_area_free_rcu_cb(struct rcu_head *head) +{ + struct vm_area_struct *vma =3D container_of(head, struct vm_area_struct, + vm_rcu); + __vm_area_free(vma); +} +#endif + +void vm_area_free(struct vm_area_struct *vma) +{ +#ifdef CONFIG_PER_VMA_LOCK + call_rcu(&vma->vm_rcu, vm_area_free_rcu_cb); +#else + __vm_area_free(vma); +#endif +} + static void account_kernel_stack(struct task_struct *tsk, int account) { if (IS_ENABLED(CONFIG_VMAP_STACK)) { --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6DF49C636CC for ; Thu, 16 Feb 2023 05:19:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229658AbjBPFTY (ORCPT ); Thu, 16 Feb 2023 00:19:24 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45784 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229657AbjBPFSw (ORCPT ); Thu, 16 Feb 2023 00:18:52 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4285245F6C for ; Wed, 15 Feb 2023 21:18:23 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-52a8f97c8ccso8654047b3.6 for ; Wed, 15 Feb 2023 21:18:23 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=OfsRQat1IE0fDeYSs5Psw7qjLLZE8mkiXmSdSDqQs6E=; b=MfUNnuRVqilTlETV75doNo5d3X2f1I64PkztavWwbIZWW2j+3a5GtwWp8jaeE2FAS7 MLS5o+W6QzlLw73Ic3jcbhKR5fcnDvpJtAm6NUi7rdrBvnG1z5U94zlj5p9Rf6AtiMXB /0WTQWR4hibDfKvJz8o49Ph/XAqUGXxJaWOev01ngzcVh9Xd6JHGOFVoBB6DDs9XGB0U jDPi/dJdwD6mt/I/Iobb8XMkCnfA0C9OXZWj96tLYiXqom79AFLMHYYopE8HfxvEiMuA aI5RyI6LPkvxen53dNb8XfFirBUFuNssxJItswg1uaEv8wS3br6uTFvWft0h1+Cz189K IN3Q== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=OfsRQat1IE0fDeYSs5Psw7qjLLZE8mkiXmSdSDqQs6E=; b=0INJSZZGRj5H2sQcuUY0td4iWSAnT1XYft+W7vQ8n31jU7wcv5lT+ju/qvpI/+FpD1 FfArRVteTZ9cv+jMavW5HeCtFEYrcHI2Ugb+k5X64pLkARHIVHdz+1DhYsvgEBrvuTBV xTZ/i9+TD/KUxEEKaJrwOPLZOt7Tmc8zt/UkfQmu8JCTAqBpqT/LPtj9E7dCSScg7j6c AjhAFC0MFGwSexfHTCmz0MMC1VkMGvB/GN82u5fpZ0wyQl/ge9DvnE5aWZuuDeX5lr5q pwhPENknly0wFvNT7ibOzStD0vlJLrBCIeH8lbiQI3Fx9jRzxV9V8mey9yNWuwNOeoSr 6jnA== X-Gm-Message-State: AO0yUKU8lGwq22vHBgm5Q2nw+NNwVJWO6oDg/HYyQQoONaquZTDnIk6a 3mV6EVCycZ8+CxePqkBE6iLX6YXgw3c= X-Google-Smtp-Source: AK7set/k5YICRmbO5PrlJaB9Zo139wu7UxHEXvq6ujpnvYnblxA9RcHNr8XeBP6uxC5C9xnIzXdi58p2CG0= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a25:8e8c:0:b0:965:bac9:d458 with SMTP id q12-20020a258e8c000000b00965bac9d458mr0ybl.11.1676524701625; Wed, 15 Feb 2023 21:18:21 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:26 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-12-surenb@google.com> Subject: [PATCH v3 11/35] mm: move mmap_lock assert function definitions From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Move mmap_lock assert function definitions up so that they can be used by other mmap_lock routines. Signed-off-by: Suren Baghdasaryan --- include/linux/mmap_lock.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 96e113e23d04..e49ba91bb1f0 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -60,6 +60,18 @@ static inline void __mmap_lock_trace_released(struct mm_= struct *mm, bool write) =20 #endif /* CONFIG_TRACING */ =20 +static inline void mmap_assert_locked(struct mm_struct *mm) +{ + lockdep_assert_held(&mm->mmap_lock); + VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); +} + +static inline void mmap_assert_write_locked(struct mm_struct *mm) +{ + lockdep_assert_held_write(&mm->mmap_lock); + VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); +} + static inline void mmap_init_lock(struct mm_struct *mm) { init_rwsem(&mm->mmap_lock); @@ -150,18 +162,6 @@ static inline void mmap_read_unlock_non_owner(struct m= m_struct *mm) up_read_non_owner(&mm->mmap_lock); } =20 -static inline void mmap_assert_locked(struct mm_struct *mm) -{ - lockdep_assert_held(&mm->mmap_lock); - VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); -} - -static inline void mmap_assert_write_locked(struct mm_struct *mm) -{ - lockdep_assert_held_write(&mm->mmap_lock); - VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); -} - static inline int mmap_lock_is_contended(struct mm_struct *mm) { return rwsem_is_contended(&mm->mmap_lock); --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id DED22C61DA4 for ; Thu, 16 Feb 2023 05:19:33 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229716AbjBPFTc (ORCPT ); Thu, 16 Feb 2023 00:19:32 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46626 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229620AbjBPFTE (ORCPT ); Thu, 16 Feb 2023 00:19:04 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C26784608B for ; Wed, 15 Feb 2023 21:18:25 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id n20-20020a25da14000000b008fa1d22bd55so839670ybf.21 for ; Wed, 15 Feb 2023 21:18:25 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=S0NkKUx4dkIelWS5aTsq7UgpXIDBIhGrMRdd8DrqTMg=; b=jg6fXXyNPA7kGzHuabxhVQS/lUkV32r/2v7x3huSOdcQqflpdfqhoIlNaR0t9wMIZ5 tNsdAuEIvz4tV4UF6ouWYwPH6jjBst45X8ZS39R/or9bhVEvk/hAhXAhGiG6HODkz0Ti 4+W9CepIhu/sbVVhPdRUrFOaALk+XM5cteC/he2x1EN76QEzJ6hbvpDzGk804gLMUejh OhIx76lfAQ2Aj8i3vKfn29ePpUTgzAK6jOUxJ6v4seZ7ANgrIyb45IfAAVauV03+BjjR E1pd5h52L83bbDKmbPDtlCRxv6trm/z+Fa4Qs6PgKsiEdeHNP77Ta8uAxqqtUigosnhI vlvg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=S0NkKUx4dkIelWS5aTsq7UgpXIDBIhGrMRdd8DrqTMg=; b=Q/Uo+RLjMFlihdIzbmlPSUbkIaJIJTcb4c1bneOax/ntNJhAvKFQNBA3zN7ZkhUV6E TSq8Ut3FpDf8slbbScMCpDenD3Cd3JES5REoGe3cO5trmE5yibGopWva8oVLtDtD/bEy lejdZd2FZixobs1qwJLUU/dT9wcfOpx58hnC5UoPKa3k9k5JgW+r08WVcaptBZUAabcS kIfK4Djaj1ftiC69xa8dRpF+ci1O9rtH53yRUNPaqx64sbPSjCUcFX94gnFsOZmRe54h R/gYsFnjZrtvpG/G+Ff8UZQi12G20IbytQCfogSXTe1M5sWx+0dV5QCAwc3I0UW8ZfjY T0PQ== X-Gm-Message-State: AO0yUKWsCjeq4Mak6wb73UXpvEvAk8M08+g3nCfQYOvwFXPwMmV+CWxx 0VPXwSTgUoGD2IqroIftbGIHBDYB4mM= X-Google-Smtp-Source: AK7set9uXuFcAAjB5O0QV0Xb2ftT+HcRWdedCtf2z+W+4L/MaBisfzCjx3cFBAo6If411AuBWDXU7wCyXBg= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a05:6902:13c6:b0:8da:3163:224 with SMTP id y6-20020a05690213c600b008da31630224mr5ybu.0.1676524704439; Wed, 15 Feb 2023 21:18:24 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:27 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-13-surenb@google.com> Subject: [PATCH v3 12/35] mm: add per-VMA lock and helper functions to control it From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Introduce per-VMA locking. The lock implementation relies on a per-vma and per-mm sequence counters to note exclusive locking: - read lock - (implemented by vma_start_read) requires the vma (vm_lock_seq) and mm (mm_lock_seq) sequence counters to differ. If they match then there must be a vma exclusive lock held somewhere. - read unlock - (implemented by vma_end_read) is a trivial vma->lock unlock. - write lock - (vma_start_write) requires the mmap_lock to be held exclusively and the current mm counter is assigned to the vma counter. This will allow multiple vmas to be locked under a single mmap_lock write lock (e.g. during vma merging). The vma counter is modified under exclusive vma lock. - write unlock - (vma_end_write_all) is a batch release of all vma locks held. It doesn't pair with a specific vma_start_write! It is done before exclusive mmap_lock is released by incrementing mm sequence counter (mm_lock_seq). - write downgrade - if the mmap_lock is downgraded to the read lock, all vma write locks are released as well (effectivelly same as write unlock). Signed-off-by: Suren Baghdasaryan --- include/linux/mm.h | 82 +++++++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 8 ++++ include/linux/mmap_lock.h | 13 +++++++ kernel/fork.c | 4 ++ mm/init-mm.c | 3 ++ 5 files changed, 110 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 2992a2d55aee..a056ee170e34 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -623,6 +623,87 @@ struct vm_operations_struct { unsigned long addr); }; =20 +#ifdef CONFIG_PER_VMA_LOCK +static inline void vma_init_lock(struct vm_area_struct *vma) +{ + init_rwsem(&vma->lock); + vma->vm_lock_seq =3D -1; +} + +/* + * Try to read-lock a vma. The function is allowed to occasionally yield f= alse + * locked result to avoid performance overhead, in which case we fall back= to + * using mmap_lock. The function should never yield false unlocked result. + */ +static inline bool vma_start_read(struct vm_area_struct *vma) +{ + /* Check before locking. A race might cause false locked result. */ + if (vma->vm_lock_seq =3D=3D READ_ONCE(vma->vm_mm->mm_lock_seq)) + return false; + + if (unlikely(down_read_trylock(&vma->lock) =3D=3D 0)) + return false; + + /* + * Overflow might produce false locked result. + * False unlocked result is impossible because we modify and check + * vma->vm_lock_seq under vma->lock protection and mm->mm_lock_seq + * modification invalidates all existing locks. + */ + if (unlikely(vma->vm_lock_seq =3D=3D READ_ONCE(vma->vm_mm->mm_lock_seq)))= { + up_read(&vma->lock); + return false; + } + return true; +} + +static inline void vma_end_read(struct vm_area_struct *vma) +{ + rcu_read_lock(); /* keeps vma alive till the end of up_read */ + up_read(&vma->lock); + rcu_read_unlock(); +} + +static inline void vma_start_write(struct vm_area_struct *vma) +{ + int mm_lock_seq; + + mmap_assert_write_locked(vma->vm_mm); + + /* + * current task is holding mmap_write_lock, both vma->vm_lock_seq and + * mm->mm_lock_seq can't be concurrently modified. + */ + mm_lock_seq =3D READ_ONCE(vma->vm_mm->mm_lock_seq); + if (vma->vm_lock_seq =3D=3D mm_lock_seq) + return; + + down_write(&vma->lock); + vma->vm_lock_seq =3D mm_lock_seq; + up_write(&vma->lock); +} + +static inline void vma_assert_write_locked(struct vm_area_struct *vma) +{ + mmap_assert_write_locked(vma->vm_mm); + /* + * current task is holding mmap_write_lock, both vma->vm_lock_seq and + * mm->mm_lock_seq can't be concurrently modified. + */ + VM_BUG_ON_VMA(vma->vm_lock_seq !=3D READ_ONCE(vma->vm_mm->mm_lock_seq), v= ma); +} + +#else /* CONFIG_PER_VMA_LOCK */ + +static inline void vma_init_lock(struct vm_area_struct *vma) {} +static inline bool vma_start_read(struct vm_area_struct *vma) + { return false; } +static inline void vma_end_read(struct vm_area_struct *vma) {} +static inline void vma_start_write(struct vm_area_struct *vma) {} +static inline void vma_assert_write_locked(struct vm_area_struct *vma) {} + +#endif /* CONFIG_PER_VMA_LOCK */ + static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *= mm) { static const struct vm_operations_struct dummy_vm_ops =3D {}; @@ -631,6 +712,7 @@ static inline void vma_init(struct vm_area_struct *vma,= struct mm_struct *mm) vma->vm_mm =3D mm; vma->vm_ops =3D &dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); + vma_init_lock(vma); } =20 /* Use when VMA is not part of the VMA tree and needs no locking */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index fb4e2afad787..e268723eaf44 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -508,6 +508,11 @@ struct vm_area_struct { vm_flags_t __private __vm_flags; }; =20 +#ifdef CONFIG_PER_VMA_LOCK + int vm_lock_seq; + struct rw_semaphore lock; +#endif + /* * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. @@ -633,6 +638,9 @@ struct mm_struct { * init_mm.mmlist, and are protected * by mmlist_lock */ +#ifdef CONFIG_PER_VMA_LOCK + int mm_lock_seq; +#endif =20 =20 unsigned long hiwater_rss; /* High-watermark of RSS usage */ diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index e49ba91bb1f0..aab8f1b28d26 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -72,6 +72,17 @@ static inline void mmap_assert_write_locked(struct mm_st= ruct *mm) VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); } =20 +#ifdef CONFIG_PER_VMA_LOCK +static inline void vma_end_write_all(struct mm_struct *mm) +{ + mmap_assert_write_locked(mm); + /* No races during update due to exclusive mmap_lock being held */ + WRITE_ONCE(mm->mm_lock_seq, mm->mm_lock_seq + 1); +} +#else +static inline void vma_end_write_all(struct mm_struct *mm) {} +#endif + static inline void mmap_init_lock(struct mm_struct *mm) { init_rwsem(&mm->mmap_lock); @@ -114,12 +125,14 @@ static inline bool mmap_write_trylock(struct mm_struc= t *mm) static inline void mmap_write_unlock(struct mm_struct *mm) { __mmap_lock_trace_released(mm, true); + vma_end_write_all(mm); up_write(&mm->mmap_lock); } =20 static inline void mmap_write_downgrade(struct mm_struct *mm) { __mmap_lock_trace_acquire_returned(mm, false, true); + vma_end_write_all(mm); downgrade_write(&mm->mmap_lock); } =20 diff --git a/kernel/fork.c b/kernel/fork.c index 314d51eb91da..9141427a98b2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -474,6 +474,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struc= t *orig) */ data_race(memcpy(new, orig, sizeof(*new))); INIT_LIST_HEAD(&new->anon_vma_chain); + vma_init_lock(new); dup_anon_vma_name(orig, new); } return new; @@ -1147,6 +1148,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm= , struct task_struct *p, seqcount_init(&mm->write_protect_seq); mmap_init_lock(mm); INIT_LIST_HEAD(&mm->mmlist); +#ifdef CONFIG_PER_VMA_LOCK + mm->mm_lock_seq =3D 0; +#endif mm_pgtables_bytes_init(mm); mm->map_count =3D 0; mm->locked_vm =3D 0; diff --git a/mm/init-mm.c b/mm/init-mm.c index c9327abb771c..33269314e060 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -37,6 +37,9 @@ struct mm_struct init_mm =3D { .page_table_lock =3D __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .arg_lock =3D __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), .mmlist =3D LIST_HEAD_INIT(init_mm.mmlist), +#ifdef CONFIG_PER_VMA_LOCK + .mm_lock_seq =3D 0, +#endif .user_ns =3D &init_user_ns, .cpu_bitmap =3D CPU_BITS_NONE, #ifdef CONFIG_IOMMU_SVA --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7E200C61DA4 for ; Thu, 16 Feb 2023 05:19:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229721AbjBPFTi (ORCPT ); Thu, 16 Feb 2023 00:19:38 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45974 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229587AbjBPFTL (ORCPT ); Thu, 16 Feb 2023 00:19:11 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6D08039B98 for ; Wed, 15 Feb 2023 21:18:27 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-4cddba76f55so8301457b3.23 for ; Wed, 15 Feb 2023 21:18:27 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=lpf0g6jEgEDruwUAtDYeX37TOF9sV0bBdZYAp5OaPFw=; b=QqHoT4KBvKUoUdLlMqp51fx+DvEGbEeCQBAnfd6VVaWQ1iGMSVWdcYYT+IcmdoGW6k 3cvpI3DbxmqWzlsZbZtdKxchg+b34bnVwYQM28D7iM4wfdyptvE97biBiYefhiTugMIi 6QLuIfwXdU8feO6tOQjuROgghWTm3q6OIMngPhTxYx3FIBx/Q/vwNR9bBonlFJnVLRif jIgBrdCbBh8/u0HLrcAEynfXgVsHAUyrN5D1sUvBi7o9DTj4LnFYvOofA3wqcNiCIoKK +suiPaGBCd0kL8KKwbFLDtmEf/YUc3gt6F4GpOlLFqmXcdgxQWnJd0Ox4qoA9H1pNQ9Z aRAQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=lpf0g6jEgEDruwUAtDYeX37TOF9sV0bBdZYAp5OaPFw=; b=B74ZFrO6R44wFBlyz2MCssrIgopSojVQC8qyy0NXl+Nq87HlQIzydb3FEqU74fQHww qWrR0nwICIbOAtz36fVIX1U1peg5+3wmahF/VHlRswJNW7uZ3bNv2tmURvCmdCLol3Tu e9VAHheIaeoJCijRq/4QatWCPIe25Op9TNhFlIStz5AlyvfHqwcSyY+tPYXRrUtMcirf 4V08cDa+NrFTutD8ehZcRfg5/+JcIrBSh6B/NJdTrHtIgjZVBKTEpZPQ08wnnC/2joA8 smD6UYEL8mpGyzPttkUTRggPq318tOVP5saQPu1AP0EtuGipD1eOZ9GKxkhKYrLcZUwP unIQ== X-Gm-Message-State: AO0yUKX3jE0Qh/2Ss6g7THPB3cVWFfBB3LegEHj/csjMqGeQc10LwBT3 E3nAeAClMMnTUR4zd+THMTH3vAfQt1Y= X-Google-Smtp-Source: AK7set9lI2rlt8srxq1B77BNUY6DsMc7qDNM9tTWpZOotyLmczPTVi4pDHBbcvVuKtvP6t7k4zPbNPC6UU8= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a81:5a03:0:b0:527:83f5:3f08 with SMTP id o3-20020a815a03000000b0052783f53f08mr494142ywb.425.1676524707124; Wed, 15 Feb 2023 21:18:27 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:28 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-14-surenb@google.com> Subject: [PATCH v3 13/35] mm: mark VMA as being written when changing vm_flags From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Updates to vm_flags have to be done with VMA marked as being written for preventing concurrent page faults or other modifications. Signed-off-by: Suren Baghdasaryan --- include/linux/mm.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index a056ee170e34..f4f702224ec5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -726,28 +726,28 @@ static inline void vm_flags_init(struct vm_area_struc= t *vma, static inline void vm_flags_reset(struct vm_area_struct *vma, vm_flags_t flags) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); vm_flags_init(vma, flags); } =20 static inline void vm_flags_reset_once(struct vm_area_struct *vma, vm_flags_t flags) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); } =20 static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); ACCESS_PRIVATE(vma, __vm_flags) |=3D flags; } =20 static inline void vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); ACCESS_PRIVATE(vma, __vm_flags) &=3D ~flags; } =20 @@ -768,7 +768,7 @@ static inline void __vm_flags_mod(struct vm_area_struct= *vma, static inline void vm_flags_mod(struct vm_area_struct *vma, vm_flags_t set, vm_flags_t clear) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); __vm_flags_mod(vma, set, clear); } =20 --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id D8BF4C636D7 for ; Thu, 16 Feb 2023 05:19:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229726AbjBPFTl (ORCPT ); Thu, 16 Feb 2023 00:19:41 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46806 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229724AbjBPFTN (ORCPT ); Thu, 16 Feb 2023 00:19:13 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 45E8946178 for ; Wed, 15 Feb 2023 21:18:32 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id p83-20020a25d856000000b0095d2ada3d26so832015ybg.5 for ; Wed, 15 Feb 2023 21:18:32 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=pCPLSRzUXQbjlFnRzNKKqjCLNtpc7ieGcBzd48Gb76w=; b=Aww5hQNuikXrQshFEtykppDfMBP38yEIiv8YZeHijhJepfYOC8bhl72PA7W7euJsgq UTdE3Yop6yuQ+QJ/imxEBpOD963eWsY1ljkA0jBaqGGg7cF+cENyfejTL2VbGXvtI8Gp zk9pQRaFettmR6L6WtIha8F6CvO6a31a83bm3SVswwIqM5PcX+z7DGQBjncH/5EAGpUc AT/XTvZe03bL8P4vyUJmA5jKJVVP1mrUYpMWnBW8gSUZQLx6c9BBl7Qtd+VfpJh9LBNJ HXZBHaakiRojDQMlDwmDxwqFzGyn5/4IdZKihfypYjTTs63kUZWxAGQtxOSq8MgWRU/t uPxg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=pCPLSRzUXQbjlFnRzNKKqjCLNtpc7ieGcBzd48Gb76w=; b=pHXQo1pPLEDZiV1gU1KpP87jqWOjYqrO3mNpuQ1aVjTNbhPQkVG3sivFB6UcS/JPhT BTB+S+4Rmt+4oiV9V3mK1VmK5q20ZZVNkwDi5nDBlnUDchEg9cbvGzLihZT9PzUbtjzF hPyoumH4uxjWdkxXhHDLyD805++3HzDXRM2oJBHPIg89emyyfIiP8TqJGlb1DFgshNbK lgNn4BKHR/towu34S15Dyx8At9r6HcycAJhxUf12ncBSvPqiNkViSedFbp5JAvIX/z1B NotTDVZMHWi/dkNRgLdi+9Jb79RJ4V09cMqq9IdZ6iUW7gC7mnG2eMtwO0fSxzPCu0I2 vQjQ== X-Gm-Message-State: AO0yUKW1/cC3rRElFXiPEzFbK8BxTI8KXb8qKf/AaczCiqDrrRo1D5O0 j5ag6v6Iihbps/4aK5qHiSH40sBtQFQ= X-Google-Smtp-Source: AK7set+U14wrD6Ap1lad6EVJBTJXnEdfFV5kP7qrWMf6wQhZaMdydVfue60wPwI1XeNtgIFp8aY/q1HqCcY= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a0d:d5c6:0:b0:500:e6a3:a141 with SMTP id x189-20020a0dd5c6000000b00500e6a3a141mr575676ywd.470.1676524709201; Wed, 15 Feb 2023 21:18:29 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:29 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-15-surenb@google.com> Subject: [PATCH v3 14/35] mm/mmap: move VMA locking before vma_adjust_trans_huge call From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" vma_adjust_trans_huge() modifies the VMA and such modifications should be done after VMA is marked as being written. Therefore move VMA flag modifications before vma_adjust_trans_huge() so that VMA is marked before all these modifications. Signed-off-by: Suren Baghdasaryan --- mm/mmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/mmap.c b/mm/mmap.c index 06c0f6686de8..c5f2ddf17b87 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2910,11 +2910,12 @@ static int do_brk_flags(struct vma_iterator *vmi, s= truct vm_area_struct *vma, if (vma_iter_prealloc(vmi)) goto unacct_fail; =20 + /* Set flags first to implicitly lock the VMA before updates */ + vm_flags_set(vma, VM_SOFTDIRTY); vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0); init_vma_prep(&vp, vma); vma_prepare(&vp); vma->vm_end =3D addr + len; - vm_flags_set(vma, VM_SOFTDIRTY); vma_iter_store(vmi, vma); =20 vma_complete(&vp, vmi, mm); --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 130C4C636CC for ; Thu, 16 Feb 2023 05:20:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229741AbjBPFT7 (ORCPT ); Thu, 16 Feb 2023 00:19:59 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46884 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229736AbjBPFTQ (ORCPT ); Thu, 16 Feb 2023 00:19:16 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C0B0947416 for ; Wed, 15 Feb 2023 21:18:36 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-52ec8c88d75so8524207b3.12 for ; Wed, 15 Feb 2023 21:18:36 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=4tPY63bpwhpT+tcOBCZmVWQ4EsQSsAiOumfwemoUSR4=; b=S2sXkiCN7LWOjXaeaMYvz0qNJtP37xcl3aD2Yh+x1ZEOEg3KLdTaCbat1bdYvpjsne ssUpFmsgMZn2ZzaQIQ1nCUMhum1BKo1kVXXpN4w0/hSPSa2H/9hW6SiJ4urWGDrWEDIg /lDLOzKc/0bGJk2Y7O7P1pt9zjSY2dn9UPLftjGBDJCogvybLOdBJ/pVV0l/nLyc8nbd 78hCVT3xeGJZ3GfClpuKXysNAZLItz5CqeYcRfuccmKUwUq5LhC6V5ar511CeqKShlzR g15CQtYg8YR8Te7+dSB+30LV6maBKvWp8iWZepEStLZa94uN9djYv3KGIMR2EUqABQ+Q /xVA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=4tPY63bpwhpT+tcOBCZmVWQ4EsQSsAiOumfwemoUSR4=; b=1gSp1NhqKDSGVwAYOrs7UiLEtJSXs/IZIItXO+LeS/aSR5jAeqWBV1BD8ll43L61ZD 2mJyESaOQrLAJg7LYujyxfmj8o0Es8EkyaJp8/VAaOE2uU0Zqu5sZyK9PPtqpC0jN78+ KrJLQm1k4oN13qGwlatE4W30dlmUkhQEeRuW2Eqy/B758r7v0nYScKo0RgwoMh5yvTQa idQg6T4r/A6qXoFuY4oLsJ3ZYVY/L+EzAsMvfr0JuYRG7Rv+AaaESpKMvF5+VSL1khRl m8c40SwUTXYvTwvsgTnd51eQ/2Ce/XC1V6tNSlWAQGnJB+8aV0GI+B6WdVB5pJadWK25 /EEw== X-Gm-Message-State: AO0yUKVO2ispUTM5pBxHy2HkofDvcWn6bm6Ec5DczVN9HbhBMrK86TIj 4qgRcRIGlTKDLXQoQ0ooXPuLJ1WvOUA= X-Google-Smtp-Source: AK7set+CCkBNuKyDmku/7wwfF8A9hgf/5Gdg5m5aj/wzSHdI7lsc4GkupJYUuH2rfCym/bLqx0hNud/JqYI= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a05:6902:10f:b0:855:fa17:4f5f with SMTP id o15-20020a056902010f00b00855fa174f5fmr82ybh.1.1676524711465; Wed, 15 Feb 2023 21:18:31 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:30 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-16-surenb@google.com> Subject: [PATCH v3 15/35] mm/khugepaged: write-lock VMA while collapsing a huge page From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Protect VMA from concurrent page fault handler while collapsing a huge page. Page fault handler needs a stable PMD to use PTL and relies on per-VMA lock to prevent concurrent PMD changes. pmdp_collapse_flush(), set_huge_pmd() and collapse_and_free_pmd() can modify a PMD, which will not be detected by a page fault handler without proper locking. Before this patch, page tables can be walked under any one of the mmap_lock, the mapping lock, and the anon_vma lock; so when khugepaged unlinks and frees page tables, it must ensure that all of those either are locked or don't exist. This patch adds a fourth lock under which page tables can be traversed, and so khugepaged must also lock out that one. Signed-off-by: Suren Baghdasaryan --- mm/khugepaged.c | 5 +++++ mm/rmap.c | 31 ++++++++++++++++--------------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 8dbc39896811..d9376acf1fbe 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1140,6 +1140,7 @@ static int collapse_huge_page(struct mm_struct *mm, u= nsigned long address, if (result !=3D SCAN_SUCCEED) goto out_up_write; =20 + vma_start_write(vma); anon_vma_lock_write(vma->anon_vma); =20 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, address, @@ -1607,6 +1608,9 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, uns= igned long addr, goto drop_hpage; } =20 + /* Lock the vma before taking i_mmap and page table locks */ + vma_start_write(vma); + /* * We need to lock the mapping so that from here on, only GUP-fast and * hardware page walks can access the parts of the page tables that @@ -1812,6 +1816,7 @@ static int retract_page_tables(struct address_space *= mapping, pgoff_t pgoff, result =3D SCAN_PTE_UFFD_WP; goto unlock_next; } + vma_start_write(vma); collapse_and_free_pmd(mm, vma, addr, pmd); if (!cc->is_khugepaged && is_target) result =3D set_huge_pmd(vma, addr, pmd, hpage); diff --git a/mm/rmap.c b/mm/rmap.c index 15ae24585fc4..8e1a2ad9ca53 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -25,21 +25,22 @@ * mapping->invalidate_lock (in filemap_fault) * page->flags PG_locked (lock_page) * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share, see hugetlbfs be= low) - * mapping->i_mmap_rwsem - * anon_vma->rwsem - * mm->page_table_lock or pte_lock - * swap_lock (in swap_duplicate, swap_info_get) - * mmlist_lock (in mmput, drain_mmlist and others) - * mapping->private_lock (in block_dirty_folio) - * folio_lock_memcg move_lock (in block_dirty_folio) - * i_pages lock (widely used) - * lruvec->lru_lock (in folio_lruvec_lock_irq) - * inode->i_lock (in set_page_dirty's __mark_inode_dirty) - * bdi.wb->list_lock (in set_page_dirty's __mark_inode_d= irty) - * sb_lock (within inode_lock in fs/fs-writeback.c) - * i_pages lock (widely used, in set_page_dirty, - * in arch-dependent flush_dcache_mmap_lock, - * within bdi.wb->list_lock in __sync_single= _inode) + * vma_start_write + * mapping->i_mmap_rwsem + * anon_vma->rwsem + * mm->page_table_lock or pte_lock + * swap_lock (in swap_duplicate, swap_info_get) + * mmlist_lock (in mmput, drain_mmlist and others) + * mapping->private_lock (in block_dirty_folio) + * folio_lock_memcg move_lock (in block_dirty_folio) + * i_pages lock (widely used) + * lruvec->lru_lock (in folio_lruvec_lock_irq) + * inode->i_lock (in set_page_dirty's __mark_inode_dir= ty) + * bdi.wb->list_lock (in set_page_dirty's __mark_inode= _dirty) + * sb_lock (within inode_lock in fs/fs-writeback.c) + * i_pages lock (widely used, in set_page_dirty, + * in arch-dependent flush_dcache_mmap_loc= k, + * within bdi.wb->list_lock in __sync_sing= le_inode) * * anon_vma->rwsem,mapping->i_mmap_rwsem (memory_failure, collect_procs_= anon) * ->tasklist_lock --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id AE53BC61DA4 for ; Thu, 16 Feb 2023 05:20:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229783AbjBPFUI (ORCPT ); Thu, 16 Feb 2023 00:20:08 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46296 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229695AbjBPFTT (ORCPT ); Thu, 16 Feb 2023 00:19:19 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B91EF42BCB for ; Wed, 15 Feb 2023 21:18:45 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id a1-20020a258701000000b008e1de4c1e7dso848079ybl.17 for ; Wed, 15 Feb 2023 21:18:45 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=28uvo2IqceKOssyZjY7i8BHUgiXFo9Fj5hjouTFWFAM=; b=ZPrkg6AbbvOKBZi1SkD/IxKxJeFUcG0rgRJFt48c6bsRhM/elzC2zYFPNxVAX1Pb/y v8gEIUO6XL4wPMpxHAJU/jtJ7FTBRhHG6n1tq7RCpsfo3BvcbN99La419Vjdnmr4LZUg Gb+YjubKYslJ6dowNgphM8TvAV/LOX1cmszsmnXszpI4IFG2RXQTfj4jwXf+4wC/qln3 8/CHWsl7LtqeSj/ymA2+araRUo+8VrDVeXfGOtKcUcgJXUwZ/d5G40s9s5ABK3v0GdSA ZKM8IbDd/wXIDY67hjDQxG31fd3mnWXrl8BZPhPKFflZXpp+nOaLG/s24K3lMQ1hF8se I12A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=28uvo2IqceKOssyZjY7i8BHUgiXFo9Fj5hjouTFWFAM=; b=tZZnuoU+6DGzX/CGijzGnY/iUPwL3wFTYD2DNuP2aSss06bywYwcoLbbFdJoY1QGT4 Bd4G0HITtMVU6UzG3HFhfYTACD0Hj48ZyqrTCFQt7Kez5P75uw5qiXTiW3b9Yrxusb7I XmarJgOj2j6Kg+rWymZoNhQUP/m05/QYw127mhXTqzLuBPZeSl4wI1LGdhQB42kGPMYT P7S1kVN1YbK41P7TXp1X81JQnUKbAroPpMEEYfZg4KsovPCK0f1cCR52AeaYWrHFu/vU DAW0dMN9ii6EtOuSZ7pnbAYoFM6eqtBjt27RGXQYuVOI+vxUnR0wiFU/zWWgnEGLZuWm pstw== X-Gm-Message-State: AO0yUKXU4bQj/n0NJG+6IX4yGxH1LWdVRoIVVgmpCF/9uZYwJHcLaDSo X6ItIgjNZeV9XB7OJ9WiLUdc4rF8lc8= X-Google-Smtp-Source: AK7set9S4UPuY0Di9CvIunrawIij5qvY5UYsfRSe+cxMHLWbfH6TBfZEYx+EaVk4FGZlSlGHqCNzztV/E7o= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a5b:90d:0:b0:94a:ebba:cba1 with SMTP id a13-20020a5b090d000000b0094aebbacba1mr584331ybq.179.1676524713985; Wed, 15 Feb 2023 21:18:33 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:31 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-17-surenb@google.com> Subject: [PATCH v3 16/35] mm/mmap: write-lock VMAs before merging, splitting or expanding them From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Decisions about whether VMAs can be merged, split or expanded must be made while VMAs are protected from the changes which can affect that decision. For example, merge_vma uses vma->anon_vma in its decision whether the VMA can be merged. Meanwhile, page fault handler changes vma->anon_vma during COW operation. Write-lock all VMAs which might be affected by a merge or split operation before making decision how such operations should be performed. Signed-off-by: Suren Baghdasaryan --- mm/mmap.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index c5f2ddf17b87..ec2f8d0af280 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -269,8 +269,11 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) */ vma_iter_init(&vmi, mm, oldbrk); next =3D vma_find(&vmi, newbrk + PAGE_SIZE + stack_guard_gap); - if (next && newbrk + PAGE_SIZE > vm_start_gap(next)) - goto out; + if (next) { + vma_start_write(next); + if (newbrk + PAGE_SIZE > vm_start_gap(next)) + goto out; + } =20 brkvma =3D vma_prev_limit(&vmi, mm->start_brk); /* Ok, looks good - let it rip. */ @@ -912,10 +915,17 @@ struct vm_area_struct *vma_merge(struct vma_iterator = *vmi, struct mm_struct *mm, if (vm_flags & VM_SPECIAL) return NULL; =20 + if (prev) + vma_start_write(prev); next =3D find_vma(mm, prev ? prev->vm_end : 0); + if (next) + vma_start_write(next); mid =3D next; - if (next && next->vm_end =3D=3D end) /* cases 6, 7, 8 */ + if (next && next->vm_end =3D=3D end) { /* cases 6, 7, 8 */ next =3D find_vma(mm, next->vm_end); + if (next) + vma_start_write(next); + } =20 /* verify some invariant that must be enforced by the caller */ VM_WARN_ON(prev && addr <=3D prev->vm_start); @@ -2163,6 +2173,7 @@ int __split_vma(struct vma_iterator *vmi, struct vm_a= rea_struct *vma, WARN_ON(vma->vm_start >=3D addr); WARN_ON(vma->vm_end <=3D addr); =20 + vma_start_write(vma); if (vma->vm_ops && vma->vm_ops->may_split) { err =3D vma->vm_ops->may_split(vma, addr); if (err) @@ -2518,6 +2529,8 @@ unsigned long mmap_region(struct file *file, unsigned= long addr, =20 /* Attempt to expand an old mapping */ /* Check next */ + if (next) + vma_start_write(next); if (next && next->vm_start =3D=3D end && !vma_policy(next) && can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen, NULL_VM_UFFD_CTX, NULL)) { @@ -2527,6 +2540,8 @@ unsigned long mmap_region(struct file *file, unsigned= long addr, } =20 /* Check prev */ + if (prev) + vma_start_write(prev); if (prev && prev->vm_end =3D=3D addr && !vma_policy(prev) && (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file, pgoff, vma->vm_userfaultfd_ctx, NULL) : @@ -2900,6 +2915,8 @@ static int do_brk_flags(struct vma_iterator *vmi, str= uct vm_area_struct *vma, if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT)) return -ENOMEM; =20 + if (vma) + vma_start_write(vma); /* * Expand the existing vma if possible; Note that singular lists do not * occur after forking, so the expand will only happen on new VMAs. --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id CEBD7C636CC for ; Thu, 16 Feb 2023 05:20:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229804AbjBPFUE (ORCPT ); Thu, 16 Feb 2023 00:20:04 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46294 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229746AbjBPFTT (ORCPT ); Thu, 16 Feb 2023 00:19:19 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B97D742BD8 for ; Wed, 15 Feb 2023 21:18:45 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id p7-20020a257407000000b0091b90b20cd9so869939ybc.6 for ; Wed, 15 Feb 2023 21:18:45 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=Gs5XDlSmqlz144wCEPg25vfe9kudkVyQE95J1Pewv8Q=; b=NZSRPVmVQg07iGqxuaNSoyEyX/JUlE5074LqD7StZA8wPOZx80SXxNMt5E5KxxrtoC 5GmbXen1kftSSNBaHdmndb327iEU+JgvGlsjv+jrut0kw6bBF6uNimcKzz4dFOSEItYW Y4LEl4ERFvkTfh+pRssZBSprx0WHPyqr3iZkWvDeufJqDlNo5Dl9o53Xv9skjZ4WRCg9 pPw2zfwLYTP83BBCg7T3fM4u8tR26MmRDA1XMM3A/clmWDV/BI6rgpWB66aseND2aMHJ sak6MgPcdV5XEwJQs9ETMkutqiCQEhW6TAM4KAr4SAjK7KgYItsJ/rcb5RNyZGDs+f6q Q5lA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=Gs5XDlSmqlz144wCEPg25vfe9kudkVyQE95J1Pewv8Q=; b=z5HT4SprMcPNuNcT3lm4djTjfO3QyMjxUIKUeVcycvYqZ9+Yhz5oyJyKR93sl/bIxJ hZbvWKKE1WpGQuDnImLs2vScmX/viRzHYfK7MomFeykWIbeTaA92ell6w58ykjrFdsbb +PtuSkGY6Qq3FCWGCygCUsxir5cHEoSxmZ/Xb5SgZOLlvh7QDmt0Qo1lTyJBP3YoGutp RsEadFbhoNPhtJIrUI8BOyVkOCUfMi+NfOcX9d5yS8or9eE3hizsw8TadZ1AB49L+MA4 AGfMwyUBJQpCCweZk2pCiFgRpvizP8sNebKxIP9njMMHKcSoZwqs7myyMyPUNc/OjzLT iCUw== X-Gm-Message-State: AO0yUKWaWg8YbSJgw9u+VRqFYHL33ZJsinlugrWsfA723BW8PENHxxPN /QrRTGxHx7kZy0WEdE26EllXwamHcrw= X-Google-Smtp-Source: AK7set81sG3yc9X92pQzrCT4qIFN1iEujQCfNrT8GtjdXbA2djctnG24/XowjsXx2FP9/9bsOvEUQhAtEC0= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a81:9345:0:b0:52e:e6ed:30bb with SMTP id k66-20020a819345000000b0052ee6ed30bbmr607996ywg.571.1676524715849; Wed, 15 Feb 2023 21:18:35 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:32 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-18-surenb@google.com> Subject: [PATCH v3 17/35] mm/mmap: write-lock VMA before shrinking or expanding it From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" vma_expand and vma_shrink change VMA boundaries. Expansion might also result in freeing of an adjacent VMA. Write-lock affected VMAs to prevent concurrent page faults. Signed-off-by: Suren Baghdasaryan Reviewed-by: Liam R. Howlett --- mm/mmap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index ec2f8d0af280..f079e5bbcd57 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -674,6 +674,9 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area= _struct *vma, ret =3D dup_anon_vma(vma, next); if (ret) return ret; + + /* Lock the VMA before removing it */ + vma_start_write(next); } =20 init_multi_vma_prep(&vp, vma, NULL, remove_next ? next : NULL, NULL); @@ -686,6 +689,7 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area= _struct *vma, if (vma_iter_prealloc(vmi)) goto nomem; =20 + vma_start_write(vma); vma_adjust_trans_huge(vma, start, end, 0); /* VMA iterator points to previous, so set to start if necessary */ if (vma_iter_addr(vmi) !=3D start) @@ -725,6 +729,7 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area= _struct *vma, if (vma_iter_prealloc(vmi)) return -ENOMEM; =20 + vma_start_write(vma); init_vma_prep(&vp, vma); vma_adjust_trans_huge(vma, start, end, 0); vma_prepare(&vp); --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0D919C61DA4 for ; Thu, 16 Feb 2023 05:20:14 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229806AbjBPFUM (ORCPT ); Thu, 16 Feb 2023 00:20:12 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46326 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229652AbjBPFTU (ORCPT ); Thu, 16 Feb 2023 00:19:20 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B942642BCD for ; Wed, 15 Feb 2023 21:18:45 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-527501b56ffso8390927b3.15 for ; Wed, 15 Feb 2023 21:18:45 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=jGkERMphMFi5DTXO6AzCtASC+DQ2dhGXIGrkfltmA+k=; b=ClKHnTmRqrwFsiwdvZoHbs8UrIxrOZcnAlLMKj0DmY4EeItAiZvM3xF/1XkOIILsHj t/ncp3cCJ608udelXLFUJi2J2Bkc4zBNFEFw7AtqnW62B+1ruBsojOMJ+l//Eveg6kMb 9mESDdWGEfVZo4wbqdchVNx7evryh4U8zH6lvFtZPteOd8Fw20lUCwc+q1jxjD4dRCfO fPscX4k3EE8nShVcC7jG6Imy8fv3tD4a5LzQcDPbvgxMAF9FYu61sb1sXCbMYbbYMnzw H0gcnPTwNkiDCSFpe5MW+pIUr82O7ftWGw19nAsT/pwTKfJQo1rJfp9QBdZ/qdttHeJW nqYA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=jGkERMphMFi5DTXO6AzCtASC+DQ2dhGXIGrkfltmA+k=; b=qtRDFKEJip7gsG4CTysLnnszyVJIiuQkxWTuCv1I1nfYMUON81OK6T3Vy//J9mff9/ i/XJXM6XqVacJaRsfYxvGXsAsxBQ/HH3YH3GCzeqS6SqT6rcLs3VO3c3GUvX0Wx5zZyo lE2Tq26qVNoMV0E8TS4bpUoHne9Wwg4pli56CK0SzWcRdtyGUq5VfJ4xclJZPHnV+oqu M258AcyziLknuvpaFO5Zv7rbRAH9rLx9ree/UkTEo4ZLxSRV+X5+P98C9Qk+pbEy3Tvy GGhG+XQXRAvjyLXWn3bxq3hYL3VxcEycT2dTM64s5KTUkj3nOEjcYFaF+9XOqqLV7kNT SlQg== X-Gm-Message-State: AO0yUKWHw/wvH2cYthaA07nSK7TRG8MBnNZtkJvsJGvPdx/y5f0iFopj rfsT+I2Ur0X3/7R+yzQmSSALSn/eTbY= X-Google-Smtp-Source: AK7set+tNlKzQOXitoUESk90msiy2FCUMsrjKiA2Q/PTOw27FiKf0wEmZgQspJdrm7P++s7G3RI15C4Nebc= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a81:ac54:0:b0:52f:45a:5b00 with SMTP id z20-20020a81ac54000000b0052f045a5b00mr11ywj.2.1676524717784; Wed, 15 Feb 2023 21:18:37 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:33 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-19-surenb@google.com> Subject: [PATCH v3 18/35] mm/mremap: write-lock VMA while remapping it to a new address range From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan , Laurent Dufour Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Write-lock VMA as locked before copying it and when copy_vma produces a new VMA. Signed-off-by: Suren Baghdasaryan Reviewed-by: Laurent Dufour --- mm/mmap.c | 1 + mm/mremap.c | 1 + 2 files changed, 2 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index f079e5bbcd57..0eaa3d1a6cd1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3202,6 +3202,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct= **vmap, get_file(new_vma->vm_file); if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); + vma_start_write(new_vma); if (vma_link(mm, new_vma)) goto out_vma_link; *need_rmap_locks =3D false; diff --git a/mm/mremap.c b/mm/mremap.c index 411a85682b58..dd541e59edda 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -623,6 +623,7 @@ static unsigned long move_vma(struct vm_area_struct *vm= a, return -ENOMEM; } =20 + vma_start_write(vma); new_pgoff =3D vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); new_vma =3D copy_vma(&vma, new_addr, new_len, new_pgoff, &need_rmap_locks); --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7D93AC636D7 for ; Thu, 16 Feb 2023 05:20:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229750AbjBPFUB (ORCPT ); Thu, 16 Feb 2023 00:20:01 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45784 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229698AbjBPFTT (ORCPT ); Thu, 16 Feb 2023 00:19:19 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B971D42BCE for ; Wed, 15 Feb 2023 21:18:45 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-527501b56ffso8391427b3.15 for ; Wed, 15 Feb 2023 21:18:45 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=rL5PSjChr8wFTlKRWTO498bp0kCwADLZZe/SXHes2h8=; b=LRD+lTaXr6Pqw8LYMkej56Kfm91pJviFVpFaUDR17WEIFgUKOpU+ogLRfy6oidvw/F 4mOXwIhPJkh7gxaJzacQ5Q2xpA2MxMudctw4pQKYtuqshuSCMBeCw647b99z6JqUfMYU FnWI7aY3RIFvc0EuIZqwF4n5sAACqaxfMc5BUHOhpo7PrLkybX4Nw85DLStMxOu85Den S+x5aTXsnUr3Va/T4IBtQXPzTR4iMs6msv64zgKWL87HLYoK/sTJqZLT5fditOnzScG6 JUQkoM18jflZAeAGFMAkFQi7v0aeD9Evl42tB/J4ec9dW/Jbo4ktb05auTom3OB08iqz 18AA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=rL5PSjChr8wFTlKRWTO498bp0kCwADLZZe/SXHes2h8=; b=oJDdWhN0RQy9I/hsb4XFwsiK2vAh7zeStY3CwjBy3BWD/hn37uUGYL2UJ0GKdWoEaI BnB4TqXN5H2nAdkVmwido6KH6uBNWt55Amd1shMzEMdUDWTGCWMOcz/PoqH3oxBd3RAf qAD+hyCp3M/QasPeWvDxw1O9B9woITynPJZJ1awqJj+/QKz3O0fZGczViUJJ+lRjSOQ8 9PA67umAd/pORsWQMmtkIr8ZcuF0UdsV5o0PvtSLqnSBMooWcYMRcEprKwnw8+p8NpZ1 aouNk1eEhYm4uriF9ly5mLigDEm+ge5jQJknwn6XY5Kc8eE6UbFkIar+4JWbPz9/qsjw 0scw== X-Gm-Message-State: AO0yUKXm3KAlMKeaGDPmp8mMIFRS7KprUtNB0dIVtILxRKA6lMPMbT8W KmT7TbMb6FsQQH/n5RQhUJIWvvwf6wA= X-Google-Smtp-Source: AK7set+44y5aeQf1zuFNW05XeMWyuIDsdI+r8pPqkHzR/in6WRVQ9dwNjFzyqkfy6m3wNLuusOCyHSVO/GU= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a0d:e506:0:b0:52e:c531:20b3 with SMTP id o6-20020a0de506000000b0052ec53120b3mr448676ywe.147.1676524720915; Wed, 15 Feb 2023 21:18:40 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:34 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-20-surenb@google.com> Subject: [PATCH v3 19/35] mm: write-lock VMAs before removing them from VMA tree From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Write-locking VMAs before isolating them ensures that page fault handlers don't operate on isolated VMAs. Signed-off-by: Suren Baghdasaryan --- mm/mmap.c | 1 + mm/nommu.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 0eaa3d1a6cd1..aaa359a147b2 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2261,6 +2261,7 @@ int split_vma(struct vma_iterator *vmi, struct vm_are= a_struct *vma, static inline int munmap_sidetree(struct vm_area_struct *vma, struct ma_state *mas_detach) { + vma_start_write(vma); mas_set_range(mas_detach, vma->vm_start, vma->vm_end - 1); if (mas_store_gfp(mas_detach, vma, GFP_KERNEL)) return -ENOMEM; diff --git a/mm/nommu.c b/mm/nommu.c index 57ba243c6a37..2ab162d773e2 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -588,6 +588,7 @@ static int delete_vma_from_mm(struct vm_area_struct *vm= a) current->pid); return -ENOMEM; } + vma_start_write(vma); cleanup_vma_from_mm(vma); =20 /* remove from the MM's tree and list */ @@ -1519,6 +1520,10 @@ void exit_mmap(struct mm_struct *mm) */ mmap_write_lock(mm); for_each_vma(vmi, vma) { + /* + * No need to lock VMA because this is the only mm user and no + * page fault handled can race with it. + */ cleanup_vma_from_mm(vma); delete_vma(mm, vma); cond_resched(); --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3501CC636CC for ; Thu, 16 Feb 2023 05:20:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229712AbjBPFUQ (ORCPT ); Thu, 16 Feb 2023 00:20:16 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46124 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229627AbjBPFTV (ORCPT ); Thu, 16 Feb 2023 00:19:21 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B98BD42DCE for ; Wed, 15 Feb 2023 21:18:45 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id k9-20020a25bec9000000b00944353b6a81so863507ybm.7 for ; Wed, 15 Feb 2023 21:18:45 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=65rn53nYPCehqawxI5VHHUH5OXR7IFklQh11X90VOGk=; b=ngrSSnFGdDPlYz5zuJYWd2ZcJdDDxwAT/RtnIL5xzVfAFVgHCKIj7A9xwEDcTyX2aB 3vsZSlkeyPVpcwi1aa9l93eu/icfxKL0DIaqQVOS2lNklMJwu2jpUpq532q1GD3mJcXZ OYyJWX25F7RdsRsm0bO+ULQDs6kU+hJx2yHcqT8VTisE8SVpLjiLy4deYJxyu/llv5yd uNDnBwf4OPlsNNTMONtuv4dnuvjdk8hbLw2jK3U2tc3OA2ZTvVJ3/lGJPyy4Jcy7H5Vt +ZqHtuuBA7mxmkHLm02ywaCux2tqtjmQCbr0/F6E0nSC73lfB2u+G7wX3tsYtQZiJlY+ ecqA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=65rn53nYPCehqawxI5VHHUH5OXR7IFklQh11X90VOGk=; b=If914xyNZ6E6JKn7JwPiydaFH9m235YeOJJjL/0kh0TjuFOcDzgmXtRrXJhjCMHDBo SewVQlA9iQFZBoC2ydiSQscZ46Vt51irOxbqdbPemMpg6t4QQ9fjhsRimf33SdhFuSVw rE/6vInHrhZLQcPnU590LauLe1Vup2MsFv1Rnb5q8DVXWBZ3YJKLkF37lWqgUfHm9uTx Z3qzTY3yMkJQ4I4Mw9VJOc1bKaiA5eGnJTVjUUrbdkUMH12hp1OWvbTDI/KEhG2nMZQL 58Rs2Fp9jmqQiE4dyZHQS0WDg8/zpwDvWr+KZt4LjyoPcPcbv42jkbo2eDdN9zBK7AwB EM+Q== X-Gm-Message-State: AO0yUKUKFoUUb+u5EjrxgeHJRO/K9ZmhN9fdKH/eDARrjH331nWPjWuO GQcXRMn9bh/ItRfqN+A5OKCVL+/6IAE= X-Google-Smtp-Source: AK7set9eBfEal0C1jR4VjCmFFWO3/hvdavzvid6pMzzd4bvHowMVw43Kn34On8e4ZBDtcDdaQHGRoUI6tN4= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a81:b704:0:b0:52b:fcf0:4fd1 with SMTP id v4-20020a81b704000000b0052bfcf04fd1mr5ywh.1.1676524723168; Wed, 15 Feb 2023 21:18:43 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:35 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-21-surenb@google.com> Subject: [PATCH v3 20/35] mm: conditionally write-lock VMA in free_pgtables From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Normally free_pgtables needs to lock affected VMAs except for the case when VMAs were isolated under VMA write-lock. munmap() does just that, isolating while holding appropriate locks and then downgrading mmap_lock and dropping per-VMA locks before freeing page tables. Add a parameter to free_pgtables for such scenario. Signed-off-by: Suren Baghdasaryan --- mm/internal.h | 2 +- mm/memory.c | 6 +++++- mm/mmap.c | 5 +++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index fc01fd092ea5..400c302fbf47 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -105,7 +105,7 @@ void folio_activate(struct folio *folio); =20 void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long floor, - unsigned long ceiling); + unsigned long ceiling, bool mm_wr_locked); void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte); =20 struct zap_details; diff --git a/mm/memory.c b/mm/memory.c index f456f3b5049c..8177c59ffd2d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -348,7 +348,7 @@ void free_pgd_range(struct mmu_gather *tlb, =20 void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *vma, unsigned long floor, - unsigned long ceiling) + unsigned long ceiling, bool mm_wr_locked) { MA_STATE(mas, mt, vma->vm_end, vma->vm_end); =20 @@ -366,6 +366,8 @@ void free_pgtables(struct mmu_gather *tlb, struct maple= _tree *mt, * Hide vma from rmap and truncate_pagecache before freeing * pgtables */ + if (mm_wr_locked) + vma_start_write(vma); unlink_anon_vmas(vma); unlink_file_vma(vma); =20 @@ -380,6 +382,8 @@ void free_pgtables(struct mmu_gather *tlb, struct maple= _tree *mt, && !is_vm_hugetlb_page(next)) { vma =3D next; next =3D mas_find(&mas, ceiling - 1); + if (mm_wr_locked) + vma_start_write(vma); unlink_anon_vmas(vma); unlink_file_vma(vma); } diff --git a/mm/mmap.c b/mm/mmap.c index aaa359a147b2..118b2246bba9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2157,7 +2157,8 @@ static void unmap_region(struct mm_struct *mm, struct= maple_tree *mt, update_hiwater_rss(mm); unmap_vmas(&tlb, mt, vma, start, end, mm_wr_locked); free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, - next ? next->vm_start : USER_PGTABLES_CEILING); + next ? next->vm_start : USER_PGTABLES_CEILING, + mm_wr_locked); tlb_finish_mmu(&tlb); } =20 @@ -3069,7 +3070,7 @@ void exit_mmap(struct mm_struct *mm) mmap_write_lock(mm); mt_clear_in_rcu(&mm->mm_mt); free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, - USER_PGTABLES_CEILING); + USER_PGTABLES_CEILING, true); tlb_finish_mmu(&tlb); =20 /* --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 752C3C61DA4 for ; Thu, 16 Feb 2023 05:20:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229819AbjBPFUV (ORCPT ); Thu, 16 Feb 2023 00:20:21 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46396 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229623AbjBPFTW (ORCPT ); Thu, 16 Feb 2023 00:19:22 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5931636FC0 for ; Wed, 15 Feb 2023 21:18:47 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id n20-20020a25da14000000b008fa1d22bd55so840318ybf.21 for ; Wed, 15 Feb 2023 21:18:47 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=zeEKIs6RouZ3NeHI7ZJ8m5Jmv1yLdTyFBRJo2pt5tCE=; b=Hd3NZlonbpCzy5K7Z5feVeMneevLupNr2wIBk8xrF2F0FeVHuTW7v0p8Iwz465OudO Kz/cwziHnFST/NwsyYv9H1NpjXxfHSGLxO5ukc+3xsoPYXaSl8+RA3RdmvmM4fbLpXN9 gimJ3OxJZ4Ep64JmtioAuz7PGMRMavPtJuthlMhEIfRvRDzYwwXEt+kIY/UhQ/1ohnXV Mp7Yg73p6F/OdwNe1qmUkwSe8yYAoyBy/8c1r/0Zsdj45B+B6DEhDKlW7MvfqHG54uOt TWogzsTy9UemTUaeFAzTdpALJEgq+fwHHnoe+QVimQT+Dp4bg9S2kMJXbPj9+Gb2VnGT o48A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=zeEKIs6RouZ3NeHI7ZJ8m5Jmv1yLdTyFBRJo2pt5tCE=; b=FV5sNiMFzNHCNtvK23N/vPhSa88H749KUN2uP8iGhzQx7K+wYFs1fqloQJUvbcXwwg R6OKWNFPaTzj3aQ2TYOncfxBOFMPMgNgy26wQgmT1raRUN6gkZDZsu+ISTS/H0Z67FfD y//3p7Ht8NcQcJOsE0R5EKGDQBbSVAmN5Jp5V/fIvBJFu7WIKaMjqfJeR/XbsZ3ugZ9O Gf7cNJgPhJhHE/ssF9UCDaxtENN77oeH8NbNvBolB80oYXxW+QFw3sgRU+bDxqDmkptv Y19RdCjRxtkZZRhGP3oFjspef3lPMMBzFiMjQ+Cl8BTrRAWDnCYOqQo8/9RKAG8c+PBs lGKA== X-Gm-Message-State: AO0yUKXXbwg/WF7QEdet7cIP1MfWQvGumGsRur2t0XDBnuXUE5A3xB82 /BFlWIVTGqFtpiO7QVMvyvvaD8FvLws= X-Google-Smtp-Source: AK7set9EG6pwKD6Qe8/WGqDgk7Um8RE2RvPxkguhsK1CKP8nzrHSAEE/Znnb7/ves5IDs/PTEla86+djibg= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a5b:604:0:b0:859:2acc:deb6 with SMTP id d4-20020a5b0604000000b008592accdeb6mr561571ybq.79.1676524725671; Wed, 15 Feb 2023 21:18:45 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:36 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-22-surenb@google.com> Subject: [PATCH v3 21/35] mm/mmap: write-lock adjacent VMAs if they can grow into unmapped area From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" While unmapping VMAs, adjacent VMAs might be able to grow into the area being unmapped. In such cases write-lock adjacent VMAs to prevent this growth. Signed-off-by: Suren Baghdasaryan --- mm/mmap.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 118b2246bba9..00f8c5798936 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2399,11 +2399,13 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struc= t vm_area_struct *vma, * down_read(mmap_lock) and collide with the VMA we are about to unmap. */ if (downgrade) { - if (next && (next->vm_flags & VM_GROWSDOWN)) + if (next && (next->vm_flags & VM_GROWSDOWN)) { + vma_start_write(next); downgrade =3D false; - else if (prev && (prev->vm_flags & VM_GROWSUP)) + } else if (prev && (prev->vm_flags & VM_GROWSUP)) { + vma_start_write(prev); downgrade =3D false; - else + } else mmap_write_downgrade(mm); } =20 --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1507EC61DA4 for ; Thu, 16 Feb 2023 05:20:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229756AbjBPFUa (ORCPT ); Thu, 16 Feb 2023 00:20:30 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46610 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229672AbjBPFT3 (ORCPT ); Thu, 16 Feb 2023 00:19:29 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 88CC1457D5 for ; Wed, 15 Feb 2023 21:18:49 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id r17-20020a258291000000b008ee1c76c25dso857450ybk.11 for ; Wed, 15 Feb 2023 21:18:49 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=Cy3b4YdYe1JheypyM1qEqlntx+LNPOiVtlvqoTpRTGk=; b=ee09VaaNfMCXdTYs7ho4FL1rz+7Jau0hVrpqtPJwR0LOjVJOOKMHcMbwd/rGxak63H BRVuTQ9tdXJzSR+6NhiK4KAEy3orTDG5A6ugcB1Mu/JZ3TtJ8rFlU/MHE4DwjmulfLqA 4lbez5vLr44VvmK0HXNKetjG61FmR5dtu/ffCFjZaMQfVdd6yPZ6Vqs7cSoCFmCWN2ej U7UJuLArjVJJvFA58VBbTFzUGAlbxAZVBV5EE2zVq6EhbmHffJxz6yeGBWtLnffH9zfX I0SKmeE3zn9vqZPVYyRXu1tk/VZ/LisV9OyoAUd6iOjxLewnK1RWOS7bKO/6E67varMh DYqg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=Cy3b4YdYe1JheypyM1qEqlntx+LNPOiVtlvqoTpRTGk=; b=S9oRNMN53hXvJ2u1o7n8WUTsVPHgF6cCHA3DtHB81BbxgFZtLfiESNINvU+MrQlQTl 2OQI5aBIlbXTepj2OvGFJy/NxbAg/D2XM9ATKRNyocDpu83/sPUO+aMTUXiMHYwMYhsu s7viF2fnSNjy2HCFJ7RkGPHKAp8Ss1MuTFjQK+KL9rw1EBJpjtFnw/64M0BfxmMu9RRq xzzVmaVRZG63cXb7Kvfjc8EqozIab4Hqa7EdAxGbaAG9/oyXLbYGAnd82croe6EQ396q f3rScoWmoAKeTcCZb0cVCntKD8dMZ2lAwHZXSUOx1q8G0mCya+Dc4SxPLUpeXChTQUAw CafA== X-Gm-Message-State: AO0yUKUz8GvR4fi84hr3SHgZZVjuQxXZI6fJuvveD+V0xnJx0/jIwh/4 Soh49U9H69yCtRj4ZMAt220Ni4XVSXc= X-Google-Smtp-Source: AK7set98gd/BBmD2MrHMvmTSXIUmiTX+HY9u8tg6jgvhUw+D34Dv05igzcuyCWu8/RYb3F6ryIOQeEDmrWY= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a0d:c347:0:b0:52f:1923:4f9d with SMTP id f68-20020a0dc347000000b0052f19234f9dmr562132ywd.366.1676524727987; Wed, 15 Feb 2023 21:18:47 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:37 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-23-surenb@google.com> Subject: [PATCH v3 22/35] kernel/fork: assert no VMA readers during its destruction From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Assert there are no holders of VMA lock for reading when it is about to be destroyed. Signed-off-by: Suren Baghdasaryan --- kernel/fork.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/fork.c b/kernel/fork.c index 9141427a98b2..a08cc0e2bfde 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -491,6 +491,9 @@ static void vm_area_free_rcu_cb(struct rcu_head *head) { struct vm_area_struct *vma =3D container_of(head, struct vm_area_struct, vm_rcu); + + /* The vma should not be locked while being destroyed. */ + VM_BUG_ON_VMA(rwsem_is_locked(&vma->lock), vma); __vm_area_free(vma); } #endif --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 493F7C636D7 for ; Thu, 16 Feb 2023 05:20:33 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229770AbjBPFUc (ORCPT ); Thu, 16 Feb 2023 00:20:32 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46692 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229635AbjBPFTa (ORCPT ); Thu, 16 Feb 2023 00:19:30 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E422645F60 for ; Wed, 15 Feb 2023 21:18:50 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id n20-20020a25da14000000b008fa1d22bd55so840462ybf.21 for ; Wed, 15 Feb 2023 21:18:50 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=GTmH+3zLa4DS1pm6XKzoFa9XiN/n/492GQmNm0F2u98=; b=lxQtFymGNoCMn73Q032qmSTTcj+ve7GubiXjG/S2/wZlI5A+wvms4mpLfeb6oKn81f gVVSU10rEKBXvaTQ4DI3+isYBuVvKtNDNUM4jT8VbxX9prWBulP37D7mcvfTTB0yv5Vd xLNBfAphNgvAxOXoC4bxmXgktQA4hGiUCCaPhABiIUzurEkqvA5yibf0xhi6vP6KsH63 2fMMNvtfnEstPxLz+UPzwP+9hfF+/M/equZBkR4oGxxGpJkcmKAi9IzofVMskpdF8tY7 tvoaCpVKwaaM8HBNFnnIuJry7ZsbTgV8SMkds4HojfN0Cbg+ccWTepT6dYOQ/gKplcjn qubQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=GTmH+3zLa4DS1pm6XKzoFa9XiN/n/492GQmNm0F2u98=; b=dWwGXiD833Vh5DcAbp2IOBMXewpJtdezcUeVBwztsoS5bGpNgW8LQSFGxjRlHkclcS ft1uBp84hnEh+mI3muVkuhDeCE4x3idmD9e5wqEl8Apf+W5ZkgcrZ5/Q/COwsWj8VDMI a8yiC3tK8gbuFKQhiKJm9JlJUEv3KZERVAqb93L5bMpnmgAc7tgsqzpRxSS/HNpQaqKB 7P289jaSAZJEUn4bjH7UgU3sI8wYEeYOwvclh1TGiLsbaKNJJAhIpNpIcz+7684GGTSV qIk2KQLff3gNiHP/f3Sv1g7pZ4hSujmlZqoln9gqHBYx6S3jjrOhYe8CjmVkhB5Ys4i5 N0sw== X-Gm-Message-State: AO0yUKXMg/yloDfeGiMUlWXpn2eGKs5NdCujJ+4SELHMABO3sSyU22bZ awTvAqi4892mlH8EelqlV/IO7kglQcc= X-Google-Smtp-Source: AK7set/Pm4WgUpt5iRaimc/pvYKHTFOy9juHCFu4sSQHLZZAabCINcoiFbttYYVnL3GbT+1smZzLxo4F4W0= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a81:441c:0:b0:52e:c79a:cda with SMTP id r28-20020a81441c000000b0052ec79a0cdamr12ywa.10.1676524730090; Wed, 15 Feb 2023 21:18:50 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:38 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-24-surenb@google.com> Subject: [PATCH v3 23/35] mm/mmap: prevent pagefault handler from racing with mmu_notifier registration From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Page fault handlers might need to fire MMU notifications while a new notifier is being registered. Modify mm_take_all_locks to write-lock all VMAs and prevent this race with page fault handlers that would hold VMA locks. VMAs are locked before i_mmap_rwsem and anon_vma to keep the same locking order as in page fault handlers. Signed-off-by: Suren Baghdasaryan --- mm/mmap.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 00f8c5798936..801608726be8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3501,6 +3501,7 @@ static void vm_lock_mapping(struct mm_struct *mm, str= uct address_space *mapping) * of mm/rmap.c: * - all hugetlbfs_i_mmap_rwsem_key locks (aka mapping->i_mmap_rwsem for * hugetlb mapping); + * - all vmas marked locked * - all i_mmap_rwsem locks; * - all anon_vma->rwseml * @@ -3523,6 +3524,13 @@ int mm_take_all_locks(struct mm_struct *mm) =20 mutex_lock(&mm_all_locks_mutex); =20 + mas_for_each(&mas, vma, ULONG_MAX) { + if (signal_pending(current)) + goto out_unlock; + vma_start_write(vma); + } + + mas_set(&mas, 0); mas_for_each(&mas, vma, ULONG_MAX) { if (signal_pending(current)) goto out_unlock; @@ -3612,6 +3620,7 @@ void mm_drop_all_locks(struct mm_struct *mm) if (vma->vm_file && vma->vm_file->f_mapping) vm_unlock_mapping(vma->vm_file->f_mapping); } + vma_end_write_all(mm); =20 mutex_unlock(&mm_all_locks_mutex); } --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 17EAFC61DA4 for ; Thu, 16 Feb 2023 05:20:36 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229842AbjBPFUe (ORCPT ); Thu, 16 Feb 2023 00:20:34 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46194 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229767AbjBPFTj (ORCPT ); Thu, 16 Feb 2023 00:19:39 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BB4FA47423 for ; Wed, 15 Feb 2023 21:18:54 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id v63-20020a254842000000b008f257b16d71so842164yba.15 for ; Wed, 15 Feb 2023 21:18:54 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=u26HcyQZUssNB/wvgmnc8FNLt5Bjt6C27EcCpLSHQ2I=; b=e8FcGNDH5VkVzBTYRblbtAQ0+ZP2ugsPFuxmDPyoQXwVKDnKFUyAPJn/SJMrsaQ1Ia jo+Cd4OdGYV2adIa7Ps1WdQQvf3M0XkBbQCS2vH4hvx3mR4XIU6AeYNJ4fgFw+1soX5Z Pa1xM0MUFfbg6JS2S95T1SCUKSw+S8anZTC0et5KKeuEZZM6kRmGtxME3D+du+nzcvgG qzTaGZDic4CCJCQNQmsvjPxTw8HSPKOmRJrMlp7St/TdGbLlhdx8Vn9wPFTpNdXttF8Q VA64i9yGakw2MEWw1P/yGjLggzJzYv4qMjifZYLSh4iksjXsYnlNyRAbe89au2hayYpB yYBw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=u26HcyQZUssNB/wvgmnc8FNLt5Bjt6C27EcCpLSHQ2I=; b=sNtkjqp3WiwTltWeeHGLdA0Gx0tGlfGR9f59XRhjXQRZf9+1W3vOYF1joJest+20u3 dUlA4P6MSjsw+Dt78EkkqzKGRxxrcNCS5qGXNPM8liu3S+CcrFvc9nM+LpDr9ZfsYwgW pWPkEzPPIHhi7jfo0dhBeTVXgzjPJ5V9g8axCktdhSDHQ9ZSlzTI5C43jllUNcHeVOt8 ls9DNm9Xx42TY+G8sIn8Rha5hzT9cYFRlqpkGNUj2RmDHpmosq9J06lEvUpJdoNUwMdb ka3+VxZN2fUYEMZQ0Fy0FqMpjSz2wpapGkDYJ9pno9HYwF5vNCvBfmNVqzvJwJhDqWzT 1Xow== X-Gm-Message-State: AO0yUKXSrwara7XUqoSGxke4MnoGe97GRepEpNMBc/h3QnheVl9MoNB3 BejSWlE/oVb771b48BhQeTmFqIp7TzU= X-Google-Smtp-Source: AK7set8LRdaZWnTNJXEEL+5OavvLJg+SJ7ELgpuDSzdI0PbEuUZjWdo/eOURR51UCivz1X36gI0P4ir+qmc= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a05:6902:114f:b0:855:fdcb:4467 with SMTP id p15-20020a056902114f00b00855fdcb4467mr31ybu.0.1676524732481; Wed, 15 Feb 2023 21:18:52 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:39 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-25-surenb@google.com> Subject: [PATCH v3 24/35] mm: introduce vma detached flag From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Per-vma locking mechanism will search for VMA under RCU protection and then after locking it, has to ensure it was not removed from the VMA tree after we found it. To make this check efficient, introduce a vma->detached flag to mark VMAs which were removed from the VMA tree. Signed-off-by: Suren Baghdasaryan --- include/linux/mm.h | 11 +++++++++++ include/linux/mm_types.h | 3 +++ mm/mmap.c | 2 ++ 3 files changed, 16 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index f4f702224ec5..3f98344f829c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -693,6 +693,14 @@ static inline void vma_assert_write_locked(struct vm_a= rea_struct *vma) VM_BUG_ON_VMA(vma->vm_lock_seq !=3D READ_ONCE(vma->vm_mm->mm_lock_seq), v= ma); } =20 +static inline void vma_mark_detached(struct vm_area_struct *vma, bool deta= ched) +{ + /* When detaching vma should be write-locked */ + if (detached) + vma_assert_write_locked(vma); + vma->detached =3D detached; +} + #else /* CONFIG_PER_VMA_LOCK */ =20 static inline void vma_init_lock(struct vm_area_struct *vma) {} @@ -701,6 +709,8 @@ static inline bool vma_start_read(struct vm_area_struct= *vma) static inline void vma_end_read(struct vm_area_struct *vma) {} static inline void vma_start_write(struct vm_area_struct *vma) {} static inline void vma_assert_write_locked(struct vm_area_struct *vma) {} +static inline void vma_mark_detached(struct vm_area_struct *vma, + bool detached) {} =20 #endif /* CONFIG_PER_VMA_LOCK */ =20 @@ -712,6 +722,7 @@ static inline void vma_init(struct vm_area_struct *vma,= struct mm_struct *mm) vma->vm_mm =3D mm; vma->vm_ops =3D &dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); + vma_mark_detached(vma, false); vma_init_lock(vma); } =20 diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e268723eaf44..939f4f5a1115 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -511,6 +511,9 @@ struct vm_area_struct { #ifdef CONFIG_PER_VMA_LOCK int vm_lock_seq; struct rw_semaphore lock; + + /* Flag to indicate areas detached from the mm->mm_mt tree */ + bool detached; #endif =20 /* diff --git a/mm/mmap.c b/mm/mmap.c index 801608726be8..adf40177e68f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -593,6 +593,7 @@ static inline void vma_complete(struct vma_prepare *vp, =20 if (vp->remove) { again: + vma_mark_detached(vp->remove, true); if (vp->file) { uprobe_munmap(vp->remove, vp->remove->vm_start, vp->remove->vm_end); @@ -2267,6 +2268,7 @@ static inline int munmap_sidetree(struct vm_area_stru= ct *vma, if (mas_store_gfp(mas_detach, vma, GFP_KERNEL)) return -ENOMEM; =20 + vma_mark_detached(vma, true); if (vma->vm_flags & VM_LOCKED) vma->vm_mm->locked_vm -=3D vma_pages(vma); =20 --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 87393C636D7 for ; Thu, 16 Feb 2023 05:20:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229849AbjBPFUj (ORCPT ); Thu, 16 Feb 2023 00:20:39 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46204 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229771AbjBPFTk (ORCPT ); Thu, 16 Feb 2023 00:19:40 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6742B410B9 for ; Wed, 15 Feb 2023 21:18:56 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id 191-20020a2504c8000000b008e2fd9e34e6so855049ybe.9 for ; Wed, 15 Feb 2023 21:18:56 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=rll6a/VW4pEw1AhNBjbFoMRbi+XwYRGQoRwSBUD0bh4=; b=hu+sMS9oSFHR1r7khxhf1UbXFcs2Z++5UsWdyoPEf98HLaKpCA/5NqLhTjKtFbNcAI bq4IC94z3LeKr+N8fZ27HaBeoB9eV7FJUK2PbLxLqtWEgkZI6C3sySZgRKxnS+2i/7yz RFGxaAOIl8opKirVI8bfAZAj2T7sVtyi7tZlmgl6KCk8dFxbKL8va87izUWyHNnCWm+i wFd5WvNjbROGmsVfKcufeNfiLR4xBKkikebAtyhw//UrLzCCkecCcI/c1WZAFUB1nZ5g ONtU8uW/bb5j1wyQf5MWCYTwREIrfwMyez4h+y24cgMUk3OrfInLJF7DZne80Aw4Hh2T oQOw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=rll6a/VW4pEw1AhNBjbFoMRbi+XwYRGQoRwSBUD0bh4=; b=0gKH/Uhd8pfqI3ivqX5DKP0pz/tLKTJDON9T74FLHKTuLdlQ6R59qDSzCG8A4F5BxR RdE3J6vSfZRG2JM9LFNcR3iGACpLNFGMZg6akGpF8yv77NZZEbpDimkKii8BC/7T87eg wFpCbx9IcBtxMPYumKvf4HIhzAkf6Y279xBU98IVfs68JqKespmtTT4SeSekpAdAaUKy LNZD5b4qCSD/7lrJHRuw0nlUSy1GHii33zUA0YQeYVqHD5VzOA4h2v/pSq+F/c/pwhLI uzQV3Z/c+CO0CBiGludJhO1J2u3ICvsEuO/Bir8ZZxC8wRlzaOph84VeDHpBjkigmdJZ iVAA== X-Gm-Message-State: AO0yUKXMOfqZfLcDlbaDfFJJuvb/GBj0IdZXeS3M1fMMslwjyDgtlSMW J1/nZzGMsv2rMhYOwhioG21GyTrQMM8= X-Google-Smtp-Source: AK7set/t52sA7P5kGXoASmYABiID5pIEjMuKzn7Z6yq6NOdnfI/iNhk3E99tS5Gm4PH95uk7/Rdn/fKqkhY= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a25:9012:0:b0:874:380b:887e with SMTP id s18-20020a259012000000b00874380b887emr545965ybl.239.1676524735071; Wed, 15 Feb 2023 21:18:55 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:40 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-26-surenb@google.com> Subject: [PATCH v3 25/35] mm: introduce lock_vma_under_rcu to be used from arch-specific code From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Introduce lock_vma_under_rcu function to lookup and lock a VMA during page fault handling. When VMA is not found, can't be locked or changes after being locked, the function returns NULL. The lookup is performed under RCU protection to prevent the found VMA from being destroyed before the VMA lock is acquired. VMA lock statistics are updated according to the results. For now only anonymous VMAs can be searched this way. In other cases the function returns NULL. Signed-off-by: Suren Baghdasaryan --- include/linux/mm.h | 3 +++ mm/memory.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 3f98344f829c..36172bb38e6b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -701,6 +701,9 @@ static inline void vma_mark_detached(struct vm_area_str= uct *vma, bool detached) vma->detached =3D detached; } =20 +struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, + unsigned long address); + #else /* CONFIG_PER_VMA_LOCK */ =20 static inline void vma_init_lock(struct vm_area_struct *vma) {} diff --git a/mm/memory.c b/mm/memory.c index 8177c59ffd2d..5e1c124552a1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5220,6 +5220,52 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vm= a, unsigned long address, } EXPORT_SYMBOL_GPL(handle_mm_fault); =20 +#ifdef CONFIG_PER_VMA_LOCK +/* + * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed = to be + * stable and not isolated. If the VMA is not found or is being modified t= he + * function returns NULL. + */ +struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, + unsigned long address) +{ + MA_STATE(mas, &mm->mm_mt, address, address); + struct vm_area_struct *vma; + + rcu_read_lock(); +retry: + vma =3D mas_walk(&mas); + if (!vma) + goto inval; + + /* Only anonymous vmas are supported for now */ + if (!vma_is_anonymous(vma)) + goto inval; + + if (!vma_start_read(vma)) + goto inval; + + /* Check since vm_start/vm_end might change before we lock the VMA */ + if (unlikely(address < vma->vm_start || address >=3D vma->vm_end)) { + vma_end_read(vma); + goto inval; + } + + /* Check if the VMA got isolated after we found it */ + if (vma->detached) { + vma_end_read(vma); + /* The area was replaced with another one */ + goto retry; + } + + rcu_read_unlock(); + return vma; +inval: + rcu_read_unlock(); + return NULL; +} +#endif /* CONFIG_PER_VMA_LOCK */ + #ifndef __PAGETABLE_P4D_FOLDED /* * Allocate p4d page table. --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 55CE7C61DA4 for ; Thu, 16 Feb 2023 05:20:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229791AbjBPFUv (ORCPT ); Thu, 16 Feb 2023 00:20:51 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46878 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229731AbjBPFTu (ORCPT ); Thu, 16 Feb 2023 00:19:50 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C3DE7457D4 for ; Wed, 15 Feb 2023 21:18:57 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-4cddba76f55so8311157b3.23 for ; Wed, 15 Feb 2023 21:18:57 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=76w8C0WtscQXhYR05yNYHvPFGtZWYYoigmdRBKNsAGs=; b=FCDmrKExzLuCArz312mMsT1GG2XBAJpY0IRk4Dudkq8tacxOtpStMHAB4mCqF7cHzr HxmYKbJzh0e/OlcI5tze/pJirqiUzOseaOUlDEtDH80qm7FP3CxiVmnvUlVVgBXp61ej ykGRdx137mYbxOiW4ntwVshjUhNYXqvxbpsd7tR4+U5K8xmf3z6sAC5BmIr5igjBFbE2 Lhh7YXKnyKDhial8w7C+wSOyLz2qDj3wAEjKVPIMU3N4BzLNr3q2F7Jzk8qKKMPeJp0i mhDW6Px8X9de0wrc9+taZBLP7xnnbG62Qe2RXE2u3O0X+3PidgLM2eknvp7ECPYQAyqI 26OQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=76w8C0WtscQXhYR05yNYHvPFGtZWYYoigmdRBKNsAGs=; b=Cl/2dg7px0VteYTm0pbQw/GnSDeS3R4D6VtyflogegSEOZoWEluwyTU/AlNNcDyfU1 qzLngBSYK2fAbFY7MXdonFLKX2iihrOywsQkp196b31AmMQs66bVyZjcqwYGiEKdV2As kI6qNDJKI8hp+4BFj3SHyy9J/bi+BrIIZ9YBAS9ro/c2Kn0CdOlRUj7QBrC+zMpwoepW 76GuwSpmusAEGjcieAX6cJENr51VyktlWN2Kk0V/VCrAJNEYfm/oIBNF4XKDI419JXZD W22h+sTVVExRJ1n9d8uR/y4l9KsxqbH8pwNyrijbcFQizA2cJnhWwpRO3sBcQRoTx6ef 9ENQ== X-Gm-Message-State: AO0yUKVaIfCPc3sKSXOFm2HkN/387vQOxApK9NEv/Wj7DBhMXSgdKfwr xckfzea6zM4CUEz9ld2zPHhxxJQYL8s= X-Google-Smtp-Source: AK7set/7Y/K8KWVB4K6J3b/XMgTvXO7u2Z/Kc+o++p3RcwHAcWZWf0oYrOVDXbq6GKc7ezqD+HuFayT02ok= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a81:9e0a:0:b0:532:a8a0:8d76 with SMTP id m10-20020a819e0a000000b00532a8a08d76mr235300ywj.85.1676524737340; Wed, 15 Feb 2023 21:18:57 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:41 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-27-surenb@google.com> Subject: [PATCH v3 26/35] mm: fall back to mmap_lock if vma->anon_vma is not yet set From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" When vma->anon_vma is not set, page fault handler will set it by either reusing anon_vma of an adjacent VMA if VMAs are compatible or by allocating a new one. find_mergeable_anon_vma() walks VMA tree to find a compatible adjacent VMA and that requires not only the faulting VMA to be stable but also the tree structure and other VMAs inside that tree. Therefore locking just the faulting VMA is not enough for this search. Fall back to taking mmap_lock when vma->anon_vma is not set. This situation happens only on the first page fault and should not affect overall performance. Signed-off-by: Suren Baghdasaryan --- mm/memory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 5e1c124552a1..13369ff15ec1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5242,6 +5242,10 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_= struct *mm, if (!vma_is_anonymous(vma)) goto inval; =20 + /* find_mergeable_anon_vma uses adjacent vmas which are not locked */ + if (!vma->anon_vma) + goto inval; + if (!vma_start_read(vma)) goto inval; =20 --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 45EC7C636CC for ; Thu, 16 Feb 2023 05:20:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229739AbjBPFUy (ORCPT ); Thu, 16 Feb 2023 00:20:54 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46044 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229734AbjBPFT5 (ORCPT ); Thu, 16 Feb 2023 00:19:57 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2FFAA474D1 for ; Wed, 15 Feb 2023 21:19:01 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id q127-20020a25d985000000b009362f0368aeso883604ybg.3 for ; Wed, 15 Feb 2023 21:19:01 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=g+2R9wHJik/ujG7pbEIR7UB7BFuMmi7T4GQjwfs+Slo=; b=GJ5BxW0Q1u88snEvipBftLmc3+Jf1QItAPirJyrJgB8Mp9ylRIIALZlXmJQ4wNiSwP o865zgbeTpwGE0/miDmLSBWepWhJN06egqwo+YGBO2p834t1tPcrBoNtCdfrXD1jQzaN e7lrVFe77phFTZAotKwLYsW/LOb59LyIPFjYvDx4mPrMSRvD2iWSSQr/TeKwF3SManJP 7zOmcrgCnGJ5t0wPXfGvFFU5Wepw7LHd2B3wtwv+KMRgdeCQNTeRgpwm3UNl2CvWi/nE 3FOidEUJKnpNbe60Or5Um47yum4t5Cm2ARYxCb7pet6vv6oqpLv+fwdHpYVdfug+gwv9 oixA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=g+2R9wHJik/ujG7pbEIR7UB7BFuMmi7T4GQjwfs+Slo=; b=Qi3tRaGOFoNfAfZOXYc5BFe0C7UUqBaV5lquw1ZJ5toVViOcgpfIVIkTr+7eGAzQKM MiP476TRD9/Gi/YCUSfYwC5297Yn0h5OJDxPp9Uu6Sa2gAyo595Fkw6l4ZY2v9WpYUtw BzfrK1e+xzxgFiCKBufoFG82ZcyF2zKF9SS4h6ocgzSmE1I9TdB7AzqJGyKuwTus04/7 sh00bqtKsu/xqYKnQhvg2M7+LxogCzjf7wPFcyblIK0UGh1rDKwZC0ApTfxehir00NJn 5hefN49CeTJLtsRz0zpSedzMfHjoXkFxJMlJW9NAnAaieKokpa0HtDRzd0zjTgq2M3AS +rXg== X-Gm-Message-State: AO0yUKUfxcUdXcQOCiH92eidQJURQsAkgLjGfjpIVT/UmZ63e+T8j9J8 xkTTs7ZOjRQSnz5SvGXFY5aAjXV5YG8= X-Google-Smtp-Source: AK7set/8zrN2bEgVNx1m2BtSP2crHaoN6LxODGu2trr1UY0Ec3uUEJoRM+mde4AC+sE3ZWg+Wi9w81iJ8kk= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a5b:144:0:b0:8ec:cc9b:7333 with SMTP id c4-20020a5b0144000000b008eccc9b7333mr463396ybp.205.1676524739514; Wed, 15 Feb 2023 21:18:59 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:42 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-28-surenb@google.com> Subject: [PATCH v3 27/35] mm: add FAULT_FLAG_VMA_LOCK flag From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan , Laurent Dufour Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a new flag to distinguish page faults handled under protection of per-vma lock. Signed-off-by: Suren Baghdasaryan Reviewed-by: Laurent Dufour --- include/linux/mm.h | 3 ++- include/linux/mm_types.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 36172bb38e6b..3c9167529417 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -478,7 +478,8 @@ static inline bool fault_flag_allow_retry_first(enum fa= ult_flag flags) { FAULT_FLAG_USER, "USER" }, \ { FAULT_FLAG_REMOTE, "REMOTE" }, \ { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \ - { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } + { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }, \ + { FAULT_FLAG_VMA_LOCK, "VMA_LOCK" } =20 /* * vm_fault is filled by the pagefault handler and passed to the vma's diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 939f4f5a1115..212e7f923a69 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1056,6 +1056,7 @@ enum fault_flag { FAULT_FLAG_INTERRUPTIBLE =3D 1 << 9, FAULT_FLAG_UNSHARE =3D 1 << 10, FAULT_FLAG_ORIG_PTE_VALID =3D 1 << 11, + FAULT_FLAG_VMA_LOCK =3D 1 << 12, }; =20 typedef unsigned int __bitwise zap_flags_t; --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 328F8C636CC for ; Thu, 16 Feb 2023 05:21:02 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229702AbjBPFVA (ORCPT ); Thu, 16 Feb 2023 00:21:00 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45832 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229685AbjBPFT6 (ORCPT ); Thu, 16 Feb 2023 00:19:58 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E063E47432 for ; Wed, 15 Feb 2023 21:19:02 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-4bdeb1bbeafso8706637b3.4 for ; Wed, 15 Feb 2023 21:19:02 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=+haucx/0aUjNKP4B5JYWnKqFhX48IZcct98M2E1/jbw=; b=aDErRd7kkY+Kp4AaU6gjM3F503YHpBTPjjocmaNoc4zXVCAuMI2RAQKjzcRAbLtJge uySjaOCoHcGS53W7o3RYdOQoeaAfeHvH08PuG1fh1bxXWOGZDVGDBlzIfGuJz8kR8m0P 4LCu9IW21ZFA5+XCq7hXP9c4LIZtK/mDOJpY3r8EhzjVEo7Ti1XlFspuAz3gf99Vo08U GQhyt4xy4/c9ROtLln2G8n+ixCeeiCqz1U0tO8VqF1k+R16fVXyhOw17lx8zHct64i1Z f89mO/K5qgtX3vlh4Aty7U18axNicxnQBp8nhqlTIla4iaOnphrb06qKzWIf+oeLlfcz N8gg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=+haucx/0aUjNKP4B5JYWnKqFhX48IZcct98M2E1/jbw=; b=vXvJ/jmmr8cilTQtDwSdBP8CsN/q9SuLvltAFG8UvI2R0R5hMj+Kq6MUK6SkLZ53Rt XcyyIW9gpU+L5xxQsTfhd77ZxzcOJ4JCdSiHVivWPLbF4A/R8zx9QlWWQlXf/d5VoF8a FgvXXwo66wc2uvZMqNgZO+Fxpqa1D6WmNPYbSqW9EoxmUWr/Jx+F050nEj4kIbB5eTMs LvxnN7MXQO1maObCExB10pge0LyvHy5kMnEqiJThAvyYqngbH83isJTwbCRMEnx4fk86 ya2dLL8+euGIVAtNNEy49vl+fp3ZPK2NEOkrp6XiegjNLcvJAnw3kAFjeNGwH4+e4lrw 4URA== X-Gm-Message-State: AO0yUKVGFbz20h6ipSOryVd1Dya+CyG7oDRmF5XfadjTFb1sSyrg8vvY n5ibRy+z0ce2YWQNEBbuQZIutRIB6BQ= X-Google-Smtp-Source: AK7set9hR8JRk82RlKZHeYCMxCYi1ok7+igo70HcdJvHzw/Nn50rv4GE5Ev7Sgo0D41w7o23DTD5b6roJhQ= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a5b:c41:0:b0:91d:ddda:665f with SMTP id d1-20020a5b0c41000000b0091dddda665fmr396601ybr.589.1676524741694; Wed, 15 Feb 2023 21:19:01 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:43 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-29-surenb@google.com> Subject: [PATCH v3 28/35] mm: prevent do_swap_page from handling page faults under VMA lock From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan , Laurent Dufour Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Due to the possibility of do_swap_page dropping mmap_lock, abort fault handling under VMA lock and retry holding mmap_lock. This can be handled more gracefully in the future. Signed-off-by: Suren Baghdasaryan Reviewed-by: Laurent Dufour --- mm/memory.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 13369ff15ec1..555612d153ad 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3688,6 +3688,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!pte_unmap_same(vmf)) goto out; =20 + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + ret =3D VM_FAULT_RETRY; + goto out; + } + entry =3D pte_to_swp_entry(vmf->orig_pte); if (unlikely(non_swap_entry(entry))) { if (is_migration_entry(entry)) { --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 843FDC636CC for ; Thu, 16 Feb 2023 05:21:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229667AbjBPFVF (ORCPT ); Thu, 16 Feb 2023 00:21:05 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46300 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229744AbjBPFT7 (ORCPT ); Thu, 16 Feb 2023 00:19:59 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 3886A47435 for ; Wed, 15 Feb 2023 21:19:05 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-52f1641b79aso8473097b3.16 for ; Wed, 15 Feb 2023 21:19:05 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=2G5AxmmOVrsRnXlCrW6/hvh5FUxSjD+0mS6LSWzYfho=; b=sqrDU+BCO7AIuj2EKpC5JjJjchpfs4e7nICzmha43sQtLyfoiEZwLUOwsCD209H5Ui lu0l/EwT0lWrDRHYoglaYWRXNYPJ1g8O+GpgkaLO5UTCsF2sShD98xQNq7PDWtH66r6g ZsB+oua1JfA0KymwRm2/PgdgPfreEDDqnwQyHLX1ndmfndQgCQh8neVqDDo8QNluQIit psaYv9QythR+X2eyBbc1tAjHl/fZRS54lIuiB1Ux7odRjp5k2KlmRL+IIzajgpgWcCV1 agKdJl/mJ1QtI4UVDWAjmp+dEFlVMZYnML3DG7HfRey4EeaADk1hZvmemdIk3Yb/sCoK 4MXQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=2G5AxmmOVrsRnXlCrW6/hvh5FUxSjD+0mS6LSWzYfho=; b=qcvcJ7dngs44avmgN2LjXdG3axYwu76OjwEDxZMCxitMtZmyibKsvEieuLyIaDIgMT Ux9wBggc/OxN5ps3xj4qIVEzv1iLEt8Qj2FkAkXpspsWYuaSL6zSiuSy1ab/2sS0RNXb KM8f6tyGsD//Z+eT5vtsJJZHDI95ML/IT6riKW13ChKKyTuOOIWwCg2EBjdrN5fsf7X7 KFPkdUlkTBooBc9ux+B4zvNL3AttzdcrIlMsXsBTk7UZsNHeES0g3T5g5Q4W+eThqu4U zFCmeMyvQazWW1qHwvKxGempdmVRABGpRMde9dpQKLqTV8u2UVG+zyhPM5SRK/rgR1R9 esGw== X-Gm-Message-State: AO0yUKXlruymgF53lwLGKjQnHxo2z7Ccm+A2u2ihQPj9RIP0GdTT2KRm 8gFlMHt/odByyC+J98YaIDEquQxG4zw= X-Google-Smtp-Source: AK7set/QuR9OxnmNolAau/sd0Kj8g/BkdB7enEU88ehNvfonJ32Y6Yzdt5dUleuzBy4yluRtxlQTgtsiJAU= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a05:6902:13c6:b0:8da:3163:224 with SMTP id y6-20020a05690213c600b008da31630224mr7ybu.0.1676524744013; Wed, 15 Feb 2023 21:19:04 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:44 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-30-surenb@google.com> Subject: [PATCH v3 29/35] mm: prevent userfaults to be handled under per-vma lock From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Due to the possibility of handle_userfault dropping mmap_lock, avoid fault handling under VMA lock and retry holding mmap_lock. This can be handled more gracefully in the future. Signed-off-by: Suren Baghdasaryan Suggested-by: Peter Xu --- mm/memory.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 555612d153ad..751aebc1b29f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5254,6 +5254,15 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_= struct *mm, if (!vma_start_read(vma)) goto inval; =20 + /* + * Due to the possibility of userfault handler dropping mmap_lock, avoid + * it for now and fall back to page fault handling under mmap_lock. + */ + if (userfaultfd_armed(vma)) { + vma_end_read(vma); + goto inval; + } + /* Check since vm_start/vm_end might change before we lock the VMA */ if (unlikely(address < vma->vm_start || address >=3D vma->vm_end)) { vma_end_read(vma); --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 9D952C61DA4 for ; Thu, 16 Feb 2023 05:21:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229873AbjBPFVJ (ORCPT ); Thu, 16 Feb 2023 00:21:09 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46610 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229810AbjBPFUO (ORCPT ); Thu, 16 Feb 2023 00:20:14 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C6B0A474DA for ; Wed, 15 Feb 2023 21:19:08 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-4fa63c84621so8412227b3.20 for ; Wed, 15 Feb 2023 21:19:08 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=nAnyYB3Nw50vBlFaI5emapvq5zyyV08CcfGYNR6eiBY=; b=a3wN6yhQzn6ykzBbjResLcztZyUFHxuHfZtRwk9INETFTCK3R+0qznDveEPm+o/T6G wJ3wEiieeEUcjgJFPl0Q1WJhoSjHld23FZEQoY2Aq8Bhu2X6QB+bcNMvcqIxTt4gpYGo 62C0Ezu+RzKHJNHqe8a2WWwzq3BWBtjKwlH9vropbhb+eXqNxVp6cua3PyUJuPb2PSda pEBwm4EDV55dFWt5TRT7+9jNt1+V85COwPZnyzclXtLSHEwvb2nMc9rLXuPiU2TfsCi1 1h3mje1L/1Q/DM34hGjfe9SSEWf4rBhdCLw8Ngy6TcviKT73/jmv3Lf0iRWhfdK1hSLg WjQA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=nAnyYB3Nw50vBlFaI5emapvq5zyyV08CcfGYNR6eiBY=; b=uYUQxAKDyWERMQyQmXI5wERwaB0DatQd0b7hpuNZDZSUUU/N+8V1LX42K1/2HCt/2m hklzVtBxRmaVEByojFJXUDiSpZVyoNLNfghPj3K1pXv8suL0m3RsObeLyE8RuG9Gm1nl JHluN9PadzpgHw7btQNOEzcHevZ3lDNlT5u9fV6b+TzCQj6vOqWxKqhK431FO9MQuGhb pu7qWFju0v4zOkujUNAe2N+/zUFqFhrrdBVekQUNOZuvBDDctrMuS24i1thIOV3riaw6 MiiB2vZkzipPoke85ipDK0eEWDIgetTrPaZ8IZMlfdAGp+I/FiaOFwGfNFiN8vVMl01h Yu3w== X-Gm-Message-State: AO0yUKXCCqqiQAPrEKLw3HPyLmpvJLvLj1myWeS7UoQmq/OXY9uttLmW M1ZOod600BFtLqzimwhhf52pS7NqJfk= X-Google-Smtp-Source: AK7set9XJXC8LdbYcGXxryYVlcRlcBZQcF1FMhZd/N4oCcpN8tIB1OMR+nLJJI9Pn7NEIXJu3Q5fjvT0dR0= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a05:6902:18cc:b0:8a3:d147:280b with SMTP id ck12-20020a05690218cc00b008a3d147280bmr5ybb.3.1676524746144; Wed, 15 Feb 2023 21:19:06 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:45 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-31-surenb@google.com> Subject: [PATCH v3 30/35] mm: introduce per-VMA lock statistics From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a new CONFIG_PER_VMA_LOCK_STATS config option to dump extra statistics about handling page fault under VMA lock. Signed-off-by: Suren Baghdasaryan --- include/linux/vm_event_item.h | 6 ++++++ include/linux/vmstat.h | 6 ++++++ mm/Kconfig.debug | 6 ++++++ mm/memory.c | 2 ++ mm/vmstat.c | 6 ++++++ 5 files changed, 26 insertions(+) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 7f5d1caf5890..8abfa1240040 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -149,6 +149,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, DIRECT_MAP_LEVEL3_SPLIT, +#endif +#ifdef CONFIG_PER_VMA_LOCK_STATS + VMA_LOCK_SUCCESS, + VMA_LOCK_ABORT, + VMA_LOCK_RETRY, + VMA_LOCK_MISS, #endif NR_VM_EVENT_ITEMS }; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 19cf5b6892ce..fed855bae6d8 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -125,6 +125,12 @@ static inline void vm_events_fold_cpu(int cpu) #define count_vm_tlb_events(x, y) do { (void)(y); } while (0) #endif =20 +#ifdef CONFIG_PER_VMA_LOCK_STATS +#define count_vm_vma_lock_event(x) count_vm_event(x) +#else +#define count_vm_vma_lock_event(x) do {} while (0) +#endif + #define __count_zid_vm_events(item, zid, delta) \ __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta) =20 diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index c3547a373c9c..4965a7333a3f 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -279,3 +279,9 @@ config DEBUG_KMEMLEAK_AUTO_SCAN =20 If unsure, say Y. =20 +config PER_VMA_LOCK_STATS + bool "Statistics for per-vma locks" + depends on PER_VMA_LOCK + default y + help + Statistics for per-vma locks. diff --git a/mm/memory.c b/mm/memory.c index 751aebc1b29f..94194a45ffa7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5272,6 +5272,7 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_s= truct *mm, /* Check if the VMA got isolated after we found it */ if (vma->detached) { vma_end_read(vma); + count_vm_vma_lock_event(VMA_LOCK_MISS); /* The area was replaced with another one */ goto retry; } @@ -5280,6 +5281,7 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_s= truct *mm, return vma; inval: rcu_read_unlock(); + count_vm_vma_lock_event(VMA_LOCK_ABORT); return NULL; } #endif /* CONFIG_PER_VMA_LOCK */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 1ea6a5ce1c41..4f1089a1860e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1399,6 +1399,12 @@ const char * const vmstat_text[] =3D { "direct_map_level2_splits", "direct_map_level3_splits", #endif +#ifdef CONFIG_PER_VMA_LOCK_STATS + "vma_lock_success", + "vma_lock_abort", + "vma_lock_retry", + "vma_lock_miss", +#endif #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ }; #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */ --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 88D22C61DA4 for ; Thu, 16 Feb 2023 05:21:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229880AbjBPFVR (ORCPT ); Thu, 16 Feb 2023 00:21:17 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45790 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229820AbjBPFUW (ORCPT ); Thu, 16 Feb 2023 00:20:22 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8594B43452 for ; Wed, 15 Feb 2023 21:19:11 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-52ee93d7863so8376357b3.18 for ; Wed, 15 Feb 2023 21:19:11 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=IMepWjMdRmbly/mfPnmeELsKhzAKjn/e+MmdnnCshxo=; b=VYBN/Sx6H1zkhlZQfZvD0Liynh1SzBUBQ9VOXzPL7TQljh9ke7q1U13nsJvyL8jhTT VJdOiIwIjVau+ArgVKuyZBGkikb4wPVpg6tKpQ0o6b7A2xVbgU2vETmFbJMmm/kSYK3W jjQeULRVDfrD7eqjNl25w/HYQSurUi+SuUv5YnVVfpqkT7+a9RL2gs3g5vQZ1Iey2WKK za8Zu9gxbyi43cwHcYzE3Uj7rIHDEJE8sD5soBil1CQae5YChuN7lRgwr3AT9vKkAkhX nNMop/PJ3OYQ+4sMUjA9lss931diz+voYeLd0iyVQtxE8G6e+SWaxHgo0zdZNYDoa0CR el+g== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=IMepWjMdRmbly/mfPnmeELsKhzAKjn/e+MmdnnCshxo=; b=lXMIWEVuFCi8rc3d7zJAloqNOzmU2mJ/Z1sdb5HEuMj7c2+CDqwd9KMRdn9WNfrK1z rmNmEHTTHVNVP+xqUnVeBji0XiVrCLEIztGGmpXEUK8LBu4RMyfb2A+NLQYje9rM7Tts 6TkaxHVbVRIfQKyqxNUrjrq7GLv0cbwPEyTpdBe4novE2CV9i2IuIDOVIANsMNzMcV+r 9++Sg2ROja3hFt/rxyLIN3jkoAmQfks8NasOQwiPpDZ2qKBx9r5R93Dd3N0Oo/i2DTfU V7gSbCg8k77qsEQw1b1d5ELg+T9jvJPI2iQFFiuJiZMYY7MonZrFINiCXTWt8X5G89sU 6DqA== X-Gm-Message-State: AO0yUKVq/qeBcNOYeZvMFlGgHmxWiV0tBbJFxImHoJSfkm1ds3B+NYzd Hr7VUoTvDhVNEy29WMiliNRxl2a058Q= X-Google-Smtp-Source: AK7set9HCvy2lOclXRQNI1sLIIJkNYQ8CImwbHHTA3gwi641v9vkQcIdLDEBqeWHm5koRinED1uR5T68Tno= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a81:4516:0:b0:52e:e718:9641 with SMTP id s22-20020a814516000000b0052ee7189641mr16ywa.9.1676524748625; Wed, 15 Feb 2023 21:19:08 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:46 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-32-surenb@google.com> Subject: [PATCH v3 31/35] x86/mm: try VMA lock-based page fault handling first From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Attempt VMA lock-based page fault handling first, and fall back to the existing mmap_lock-based handling if that fails. Signed-off-by: Suren Baghdasaryan --- arch/x86/Kconfig | 1 + arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3604074a878b..3647f7bdb110 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86_64 # Options that are inherently 64-bit kernel only: select ARCH_HAS_GIGANTIC_PAGE select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 + select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY select MODULES_USE_ELF_RELA diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a498ae1fbe66..e4399983c50c 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -19,6 +19,7 @@ #include /* faulthandler_disabled() */ #include /* efi_crash_gracefully_on_page_fault()*/ #include +#include /* find_and_lock_vma() */ =20 #include /* boot_cpu_has, ... */ #include /* dotraplinkage, ... */ @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs, } #endif =20 +#ifdef CONFIG_PER_VMA_LOCK + if (!(flags & FAULT_FLAG_USER)) + goto lock_mmap; + + vma =3D lock_vma_under_rcu(mm, address); + if (!vma) + goto lock_mmap; + + if (unlikely(access_error(error_code, vma))) { + vma_end_read(vma); + goto lock_mmap; + } + fault =3D handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs= ); + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + kernelmode_fixup_or_oops(regs, error_code, address, + SIGBUS, BUS_ADRERR, + ARCH_DEFAULT_PKEY); + return; + } +lock_mmap: +#endif /* CONFIG_PER_VMA_LOCK */ + /* * Kernel-mode access to the user address space should only occur * on well-defined single instructions listed in the exception @@ -1433,6 +1466,9 @@ void do_user_addr_fault(struct pt_regs *regs, } =20 mmap_read_unlock(mm); +#ifdef CONFIG_PER_VMA_LOCK +done: +#endif if (likely(!(fault & VM_FAULT_ERROR))) return; =20 --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 14E08C636D7 for ; Thu, 16 Feb 2023 05:21:22 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229580AbjBPFVU (ORCPT ); Thu, 16 Feb 2023 00:21:20 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46710 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229828AbjBPFUX (ORCPT ); Thu, 16 Feb 2023 00:20:23 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E2413474E7 for ; Wed, 15 Feb 2023 21:19:13 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-5005ef73cf3so8815987b3.2 for ; Wed, 15 Feb 2023 21:19:13 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=REPTBg+i2LhaxaiXpF8lrF3nvI6zBhAH5JQxrJ1l/Gg=; b=aft7yDPM8GMTHB06MFPSYtKh6oWhZ8a8LMFo6PcEI9pnNLntVOlXFNQs/1Yvf52/5v q7Mb9j3IbqGydOQagHUnsSeX/sfiJUssC8Nuv1RApw1dHW5T8YzKas/LnJ1orI0tqnNj 0nSP5pSWAhACQCuzYc7axzYrfhmKcyRBbpYSaW4sNEVU5J76PVU7lF/u3PAFDwnMoS0w 449nI+g12YKzlYuLkXvI0hBRvnRJV+xrScZAqKlTKhgGwG9sA/5/r9CeoVq4yrFt/yH+ v8L5/t9gLYQIZy7UnUIKS96KONHOM4/bkOuXUpcfaqGPz0nskdK5CUeyDODt2k3AI1C9 6jaA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=REPTBg+i2LhaxaiXpF8lrF3nvI6zBhAH5JQxrJ1l/Gg=; b=Tj/vMUswQumq0ZHhxk8SiSna+fu3asJHNsf84EX/CF5d9P9yBy/OdYNBsXP5waug2R sHH0fwbEUWHjhS/2OGiHq5l5k2YsyihD8qTbPmsTPy3WLwiCCxOcBb/R8N3ijIZ8i5T6 p78nhsxAvCPU9O+jOiNDPuysgY3erUYTwknG56DBAWQ+cPEqbGJorkNN43k/m60tYli2 qUxIGcFkKixS6Lpr8Oyx9Nk7cluZgsvAr0ebEOWRaaksq35ao6gzVfYgr6DGYrJxBxx7 +lkV4ykQBHJz3dEEwRvPzKOnx3W65iFvIHuf02pNbXEggCFRorVVJKY02XfB7nbwH+eI hxkA== X-Gm-Message-State: AO0yUKVOWaRwqLDftdiBR2RGN2ay2hhw+7D7Qz+Xv2Or063mhy5ZcBvS ZNq5wnMS8Z+ZzHiSC5l2zcnx/U3HnKI= X-Google-Smtp-Source: AK7set+v0FR2W9n0VmO9fC7d0HyR1AW4QGVl+/6/uhiM7Ww0w+Lt/AynlEHt7RfffNYQztU45jTBm0A6vi0= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a5b:d49:0:b0:90e:37e9:1e01 with SMTP id f9-20020a5b0d49000000b0090e37e91e01mr1ybr.1.1676524751194; Wed, 15 Feb 2023 21:19:11 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:47 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-33-surenb@google.com> Subject: [PATCH v3 32/35] arm64/mm: try VMA lock-based page fault handling first From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Attempt VMA lock-based page fault handling first, and fall back to the existing mmap_lock-based handling if that fails. Signed-off-by: Suren Baghdasaryan --- arch/arm64/Kconfig | 1 + arch/arm64/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c5ccca26a408..9f2c0e352da3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -95,6 +95,7 @@ config ARM64 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_PAGE_TABLE_CHECK + select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f4cb0f85ccf4..9e0db5c387e3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -535,6 +535,9 @@ static int __kprobes do_page_fault(unsigned long far, u= nsigned long esr, unsigned long vm_flags; unsigned int mm_flags =3D FAULT_FLAG_DEFAULT; unsigned long addr =3D untagged_addr(far); +#ifdef CONFIG_PER_VMA_LOCK + struct vm_area_struct *vma; +#endif =20 if (kprobe_page_fault(regs, esr)) return 0; @@ -585,6 +588,36 @@ static int __kprobes do_page_fault(unsigned long far, = unsigned long esr, =20 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); =20 +#ifdef CONFIG_PER_VMA_LOCK + if (!(mm_flags & FAULT_FLAG_USER)) + goto lock_mmap; + + vma =3D lock_vma_under_rcu(mm, addr); + if (!vma) + goto lock_mmap; + + if (!(vma->vm_flags & vm_flags)) { + vma_end_read(vma); + goto lock_mmap; + } + fault =3D handle_mm_fault(vma, addr & PAGE_MASK, + mm_flags | FAULT_FLAG_VMA_LOCK, regs); + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + goto no_context; + return 0; + } +lock_mmap: +#endif /* CONFIG_PER_VMA_LOCK */ /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -628,6 +661,9 @@ static int __kprobes do_page_fault(unsigned long far, u= nsigned long esr, } mmap_read_unlock(mm); =20 +#ifdef CONFIG_PER_VMA_LOCK +done: +#endif /* * Handle the "normal" (no error) case first. */ --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 29D4CC61DA4 for ; Thu, 16 Feb 2023 05:21:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229728AbjBPFVa (ORCPT ); Thu, 16 Feb 2023 00:21:30 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46828 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229767AbjBPFUf (ORCPT ); Thu, 16 Feb 2023 00:20:35 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 718B642BC0 for ; Wed, 15 Feb 2023 21:19:18 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-5337e2cbc58so8660367b3.10 for ; Wed, 15 Feb 2023 21:19:18 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=UQ3ySmwJxDlPKOGcVlpRpfqxPlYDfL6UGC9Ax4HzzOw=; b=KGDJpcNA4oBtC5xQjxkPPkTW0kNBhMrSybgGLHrbBGSwSGEq+rySxMS3rk0dGM+Qo/ JpvUuwvGZIqjAVk+FVRsZ45D9AgvgQzyseylKUSLhPKRWFnmJExh3o/n5uArNAg9pZmc szoMYmExx5uq8bOtsbN9zvU5WJmapQpCTW9YfX5yCKinl4hK2fSO0QEupE6sIVAF6mis 0qvHvTjkF2AHE/zcyOYTuhnORntosRwKL2siaVlbrxc1aSWTq08YWLJWNhdZghaVxqan vIZPE33VZDXNYNH6tC5BE6Aqx0uXP/yBJaWCtfFr0Yvy7z7UrJ4q+Ys5HfXN3+WVCYvy Y1vg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=UQ3ySmwJxDlPKOGcVlpRpfqxPlYDfL6UGC9Ax4HzzOw=; b=IlXF2F9OTqesI8pEhb1GvpT2QzL0mUv0EuG6asHPGcNWDlW55ZAk8jtl6h7EyAeBar f36HBv5gRS9mWypoWL7w+uBXfq6LjztmBXDdO1dXuzv5Ud+QxwedSdfqmyJPk+VJJlKP KBUnPQD10Rp8AUBH30r3S5VZ5Bnpe0/xhKgkHLFQU7j0EezIgmnIRkbXYGxjtOc8q8PC L7R9px4no80ymgFirtrIUJ3A4BcFx0L+TXCWKZl6wIfKTtemUmCppX6dgFfG4n1Xuu23 y0A5mAfzFQU2KHUShz+tsU5s8ZV6HTTBF5X4paDdvCSJ1WcRSpMqtvoBMY1KLeB+6ihS 8yoA== X-Gm-Message-State: AO0yUKUNDb5BPAO0acVjbBmFIMrxDt5AOvYgOkttecgIjRJ5039gtUZw lUjyGNHPfnY5CGCYsTDNTOqIcYb4Jtk= X-Google-Smtp-Source: AK7set8KqluIZbArnLHWo7hLv5kb+HlUQLsv3CceFcKXduycZudkxodptdsBc1Ay4P83RAwvWnVWegqhxxw= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a05:6902:114f:b0:855:fdcb:4467 with SMTP id p15-20020a056902114f00b00855fdcb4467mr33ybu.0.1676524754468; Wed, 15 Feb 2023 21:19:14 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:48 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-34-surenb@google.com> Subject: [PATCH v3 33/35] powerc/mm: try VMA lock-based page fault handling first From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Laurent Dufour Attempt VMA lock-based page fault handling first, and fall back to the existing mmap_lock-based handling if that fails. Copied from "x86/mm: try VMA lock-based page fault handling first" Signed-off-by: Laurent Dufour Signed-off-by: Suren Baghdasaryan --- arch/powerpc/mm/fault.c | 41 ++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/Kconfig | 1 + arch/powerpc/platforms/pseries/Kconfig | 1 + 3 files changed, 43 insertions(+) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 2bef19cc1b98..c7ae86b04b8a 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -469,6 +469,44 @@ static int ___do_page_fault(struct pt_regs *regs, unsi= gned long address, if (is_exec) flags |=3D FAULT_FLAG_INSTRUCTION; =20 +#ifdef CONFIG_PER_VMA_LOCK + if (!(flags & FAULT_FLAG_USER)) + goto lock_mmap; + + vma =3D lock_vma_under_rcu(mm, address); + if (!vma) + goto lock_mmap; + + if (unlikely(access_pkey_error(is_write, is_exec, + (error_code & DSISR_KEYFAULT), vma))) { + int rc =3D bad_access_pkey(regs, address, vma); + + vma_end_read(vma); + return rc; + } + + if (unlikely(access_error(is_write, is_exec, vma))) { + int rc =3D bad_access(regs, address); + + vma_end_read(vma); + return rc; + } + + fault =3D handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs= ); + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + + if (fault_signal_pending(fault, regs)) + return user_mode(regs) ? 0 : SIGBUS; + +lock_mmap: +#endif /* CONFIG_PER_VMA_LOCK */ + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -545,6 +583,9 @@ static int ___do_page_fault(struct pt_regs *regs, unsig= ned long address, =20 mmap_read_unlock(current->mm); =20 +#ifdef CONFIG_PER_VMA_LOCK +done: +#endif if (unlikely(fault & VM_FAULT_ERROR)) return mm_fault_error(regs, address, fault); =20 diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platform= s/powernv/Kconfig index ae248a161b43..70a46acc70d6 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -16,6 +16,7 @@ config PPC_POWERNV select PPC_DOORBELL select MMU_NOTIFIER select FORCE_SMP + select ARCH_SUPPORTS_PER_VMA_LOCK default y =20 config OPAL_PRD diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platform= s/pseries/Kconfig index a3b4d99567cb..e036a04ff1ca 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -21,6 +21,7 @@ config PPC_PSERIES select HOTPLUG_CPU select FORCE_SMP select SWIOTLB + select ARCH_SUPPORTS_PER_VMA_LOCK default y =20 config PARAVIRT --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 43CEFC636CC for ; Thu, 16 Feb 2023 05:21:34 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229901AbjBPFVd (ORCPT ); Thu, 16 Feb 2023 00:21:33 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46878 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229717AbjBPFUs (ORCPT ); Thu, 16 Feb 2023 00:20:48 -0500 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 78AF1460A3 for ; Wed, 15 Feb 2023 21:19:19 -0800 (PST) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-52ec987d600so8366607b3.21 for ; Wed, 15 Feb 2023 21:19:19 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=gCR40Nt3TWmXm3CiMW8diiOUlGd9n6tZbm3lsLizpE8=; b=JAL2Aq1jm/4r1JqeZYEWBrue/W3MPTrJmtqCzmWLkH/WIKCq/WNMS7CZcuyalyxOey IosycXB+MxcwT4QBmmHaciSLx6z1YK/qs/xuZHAZ6O8LZPapUolphd/QYky6xf7HXG4m TIajJoSTXZu0Yk9TmYuKl91OepBHI9VMsFb9wtZhCPHchjEtdJSeWxPPYnIrHgMp6AHH zReb74rJ9HRrecOXakA/GD9/YkepqNwyoeE0Te0FwV1uNkov7IGEqryUYtJWv/bTKuMJ k46shKzXYlPWW+imaaqXvyzMwcruK2EkfBqoUiODxJ9SCK0t0Bhmm1Z4Ww2xAI1rN1gq fZJA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=gCR40Nt3TWmXm3CiMW8diiOUlGd9n6tZbm3lsLizpE8=; b=ne2K9KfM1ct5nTmkX7ZN4nf31K0RWOY4fgfp3HjE1tLCv5IRVCzXsWF9smSE+pmDs7 cu49UBbmaUYU8wp6VqKk3mztEfx+B+cgIHtXax+NjwmCul2lnt2XvGlVycCd1fuygooC 06E9MJ0esIG+qcz6SjeyyNKzGt64hAJzEMlxnjlnIgXfP6gzvFqC+fTFSuzDxQhU152m x+NJbYnFYSFWRrYBxXZCLH+bymZShB/E0BA1/++vZMGsmlIKy7hgfb6kBGDw6tMwHsws wm9vQlYqxFfRiUG9EQiCwyFQiT//Kdqh/R/hm6QtZ1u/KAGPVavZJmDWAcTIdYdxEKvm Ns5g== X-Gm-Message-State: AO0yUKUq/dtOs4gzVO6zXEXxAZYPc8QqHZOSck2JtSzb0qpbZG41Lnq0 RHDB7C51Ux3ZvLUhLhtzmlVBfSklfzg= X-Google-Smtp-Source: AK7set+zfCXRRrabneP7jbhvgAuIkh3ByJMNAQijkS9aLyX/yWFovh9nbJLYkoOZH5rfO4SiFvvO8qqtNyU= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a5b:609:0:b0:80b:adf8:4f9b with SMTP id d9-20020a5b0609000000b0080badf84f9bmr575565ybq.33.1676524756940; Wed, 15 Feb 2023 21:19:16 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:49 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-35-surenb@google.com> Subject: [PATCH v3 34/35] mm/mmap: free vm_area_struct without call_rcu in exit_mmap From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" call_rcu() can take a long time when callback offloading is enabled. Its use in the vm_area_free can cause regressions in the exit path when multiple VMAs are being freed. Because exit_mmap() is called only after the last mm user drops its refcount, the page fault handlers can't be racing with it. Any other possible user like oom-reaper or process_mrelease are already synchronized using mmap_lock. Therefore exit_mmap() can free VMAs directly, without the use of call_rcu(). Expose __vm_area_free() and use it from exit_mmap() to avoid possible call_rcu() floods and performance regressions caused by it. Signed-off-by: Suren Baghdasaryan --- include/linux/mm.h | 2 ++ kernel/fork.c | 2 +- mm/mmap.c | 11 +++++++---- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 3c9167529417..cedef02dfd2b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -256,6 +256,8 @@ void setup_initial_init_mm(void *start_code, void *end_= code, struct vm_area_struct *vm_area_alloc(struct mm_struct *); struct vm_area_struct *vm_area_dup(struct vm_area_struct *); void vm_area_free(struct vm_area_struct *); +/* Use only if VMA has no other users */ +void __vm_area_free(struct vm_area_struct *vma); =20 #ifndef CONFIG_MMU extern struct rb_root nommu_region_tree; diff --git a/kernel/fork.c b/kernel/fork.c index a08cc0e2bfde..d0999de82f94 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -480,7 +480,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struc= t *orig) return new; } =20 -static void __vm_area_free(struct vm_area_struct *vma) +void __vm_area_free(struct vm_area_struct *vma) { free_anon_vma_name(vma); kmem_cache_free(vm_area_cachep, vma); diff --git a/mm/mmap.c b/mm/mmap.c index adf40177e68f..d847be615720 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -133,7 +133,7 @@ void unlink_file_vma(struct vm_area_struct *vma) /* * Close a vm structure and free it. */ -static void remove_vma(struct vm_area_struct *vma) +static void remove_vma(struct vm_area_struct *vma, bool unreachable) { might_sleep(); if (vma->vm_ops && vma->vm_ops->close) @@ -141,7 +141,10 @@ static void remove_vma(struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); - vm_area_free(vma); + if (unreachable) + __vm_area_free(vma); + else + vm_area_free(vma); } =20 static inline struct vm_area_struct *vma_prev_limit(struct vma_iterator *v= mi, @@ -2135,7 +2138,7 @@ static inline void remove_mt(struct mm_struct *mm, st= ruct ma_state *mas) if (vma->vm_flags & VM_ACCOUNT) nr_accounted +=3D nrpages; vm_stat_account(mm, vma->vm_flags, -nrpages); - remove_vma(vma); + remove_vma(vma, false); } vm_unacct_memory(nr_accounted); validate_mm(mm); @@ -3085,7 +3088,7 @@ void exit_mmap(struct mm_struct *mm) do { if (vma->vm_flags & VM_ACCOUNT) nr_accounted +=3D vma_pages(vma); - remove_vma(vma); + remove_vma(vma, true); count++; cond_resched(); } while ((vma =3D mas_find(&mas, ULONG_MAX)) !=3D NULL); --=20 2.39.1 From nobody Thu Sep 11 20:31:25 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 695F4C61DA4 for ; Thu, 16 Feb 2023 05:21:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229790AbjBPFVf (ORCPT ); Thu, 16 Feb 2023 00:21:35 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:46900 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229776AbjBPFUt (ORCPT ); Thu, 16 Feb 2023 00:20:49 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2AFB643469 for ; Wed, 15 Feb 2023 21:19:21 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id k9-20020a25bec9000000b00944353b6a81so864551ybm.7 for ; Wed, 15 Feb 2023 21:19:21 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=/bIj4YrLTVPG3QwseSVPiqG0bxdZfORy7sIu/6u8O1k=; b=kcrkMPk51QPn4U+Qy9PCMBVvz0+hngRjWyUVg3mDY5eCPo5kdNdGQXXAF5nSKjvtxI ovQfwzu/2jGBzDo4t58Z0TFwD6xaqMMee+NhMptwfny7jSZBnt/+oKNA3aeQgGljd8sS akXety944Mg3EUnDXa5SJO4Zw7aGeTPZtwigMyHbadec4ytdul9kOPOFJ5Xa+9QmQgoM XeWGGTE6ollJyfe3sBlUvV2EFNH4AWaioDHTISUd5H/KMkPUAsjYGzb18bayjRFOkrs2 88EaIEp+Df0ZGeekcwiDAUNprBAPetgMpUou+tgl+Oy9Z03QrXgN/piqimqVkufqqcTw xlTw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=/bIj4YrLTVPG3QwseSVPiqG0bxdZfORy7sIu/6u8O1k=; b=DQhhhcQxTqFfKwjtkbtZ+npx3UhLA0Tn0hgwW5AgAjcXAzMwobubUJr919IreKbDwE 9AIQddKGd7MLyWfWru1d+xkQblxK2nvp+gdOelQoxm0oD3CaR1rqSti9JJB0Fy1S+AB/ T8pel0u3zVv1HkUWtNLcW+pzpPyt2lRTpEIUDKAhvIRVuKda5FnSoC09mxRXLa/1oDJV w+p7+E1XAxwVz+Oic5JHmoRIwDuH7eNR1DtMmiLyP0PMxa6qSDUs9cKhwLI/bT4Yr/b2 lci9JcY8154OeRbDe+fPwN537do8c2dKCGcAg3wcUMu2uPTn4L/P6Xw4/dOuVw1/DZ8m Es/A== X-Gm-Message-State: AO0yUKU3i4TBtYYC0OvXrl4HbxsBDSYjCsTkBBhNR0isRxQcYrz0OHMP P4KNVfwRfIbuU8f9e596BLNciVDl94Y= X-Google-Smtp-Source: AK7set8myT7mAaWBlS7bx8SxyKn+Tp8Tz7m7loe5CFurrliPBtK4ZSRcKwf3y/QxM2DkFEVl7Wxm2+r0UC8= X-Received: from surenb-desktop.mtv.corp.google.com ([2620:15c:211:200:f781:d5ed:1806:6ebb]) (user=surenb job=sendgmr) by 2002:a0d:ea05:0:b0:52e:fbcd:e1b4 with SMTP id t5-20020a0dea05000000b0052efbcde1b4mr590898ywe.0.1676524759267; Wed, 15 Feb 2023 21:19:19 -0800 (PST) Date: Wed, 15 Feb 2023 21:17:50 -0800 In-Reply-To: <20230216051750.3125598-1-surenb@google.com> Mime-Version: 1.0 References: <20230216051750.3125598-1-surenb@google.com> X-Mailer: git-send-email 2.39.1.581.gbfd45094c4-goog Message-ID: <20230216051750.3125598-36-surenb@google.com> Subject: [PATCH v3 35/35] mm: separate vma->lock from vm_area_struct From: Suren Baghdasaryan To: akpm@linux-foundation.org Cc: michel@lespinasse.org, jglisse@google.com, mhocko@suse.com, vbabka@suse.cz, hannes@cmpxchg.org, mgorman@techsingularity.net, dave@stgolabs.net, willy@infradead.org, liam.howlett@oracle.com, peterz@infradead.org, ldufour@linux.ibm.com, paulmck@kernel.org, mingo@redhat.com, will@kernel.org, luto@kernel.org, songliubraving@fb.com, peterx@redhat.com, david@redhat.com, dhowells@redhat.com, hughd@google.com, bigeasy@linutronix.de, kent.overstreet@linux.dev, punit.agrawal@bytedance.com, lstoakes@gmail.com, peterjung1337@gmail.com, rientjes@google.com, chriscli@google.com, axelrasmussen@google.com, joelaf@google.com, minchan@google.com, rppt@kernel.org, jannh@google.com, shakeelb@google.com, tatashin@google.com, edumazet@google.com, gthelen@google.com, gurua@google.com, arjunroy@google.com, soheil@google.com, leewalsh@google.com, posk@google.com, michalechner92@googlemail.com, linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org, linuxppc-dev@lists.ozlabs.org, x86@kernel.org, linux-kernel@vger.kernel.org, kernel-team@android.com, Suren Baghdasaryan Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" vma->lock being part of the vm_area_struct causes performance regression during page faults because during contention its count and owner fields are constantly updated and having other parts of vm_area_struct used during page fault handling next to them causes constant cache line bouncing. Fix that by moving the lock outside of the vm_area_struct. All attempts to keep vma->lock inside vm_area_struct in a separate cache line still produce performance regression especially on NUMA machines. Smallest regression was achieved when lock is placed in the fourth cache line but that bloats vm_area_struct to 256 bytes. Considering performance and memory impact, separate lock looks like the best option. It increases memory footprint of each VMA but that can be optimized later if the new size causes issues. Note that after this change vma_init() does not allocate or initialize vma->lock anymore. A number of drivers allocate a pseudo VMA on the stack but they never use the VMA's lock, therefore it does not need to be allocated. The future drivers which might need the VMA lock should use vm_area_alloc()/vm_area_free() to allocate the VMA. Signed-off-by: Suren Baghdasaryan --- include/linux/mm.h | 23 ++++++------- include/linux/mm_types.h | 6 +++- kernel/fork.c | 73 ++++++++++++++++++++++++++++++++-------- 3 files changed, 74 insertions(+), 28 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index cedef02dfd2b..96b18ef3bfa3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -627,12 +627,6 @@ struct vm_operations_struct { }; =20 #ifdef CONFIG_PER_VMA_LOCK -static inline void vma_init_lock(struct vm_area_struct *vma) -{ - init_rwsem(&vma->lock); - vma->vm_lock_seq =3D -1; -} - /* * Try to read-lock a vma. The function is allowed to occasionally yield f= alse * locked result to avoid performance overhead, in which case we fall back= to @@ -644,17 +638,17 @@ static inline bool vma_start_read(struct vm_area_stru= ct *vma) if (vma->vm_lock_seq =3D=3D READ_ONCE(vma->vm_mm->mm_lock_seq)) return false; =20 - if (unlikely(down_read_trylock(&vma->lock) =3D=3D 0)) + if (unlikely(down_read_trylock(&vma->vm_lock->lock) =3D=3D 0)) return false; =20 /* * Overflow might produce false locked result. * False unlocked result is impossible because we modify and check - * vma->vm_lock_seq under vma->lock protection and mm->mm_lock_seq + * vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq * modification invalidates all existing locks. */ if (unlikely(vma->vm_lock_seq =3D=3D READ_ONCE(vma->vm_mm->mm_lock_seq)))= { - up_read(&vma->lock); + up_read(&vma->vm_lock->lock); return false; } return true; @@ -663,7 +657,7 @@ static inline bool vma_start_read(struct vm_area_struct= *vma) static inline void vma_end_read(struct vm_area_struct *vma) { rcu_read_lock(); /* keeps vma alive till the end of up_read */ - up_read(&vma->lock); + up_read(&vma->vm_lock->lock); rcu_read_unlock(); } =20 @@ -681,9 +675,9 @@ static inline void vma_start_write(struct vm_area_struc= t *vma) if (vma->vm_lock_seq =3D=3D mm_lock_seq) return; =20 - down_write(&vma->lock); + down_write(&vma->vm_lock->lock); vma->vm_lock_seq =3D mm_lock_seq; - up_write(&vma->lock); + up_write(&vma->vm_lock->lock); } =20 static inline void vma_assert_write_locked(struct vm_area_struct *vma) @@ -720,6 +714,10 @@ static inline void vma_mark_detached(struct vm_area_st= ruct *vma, =20 #endif /* CONFIG_PER_VMA_LOCK */ =20 +/* + * WARNING: vma_init does not initialize vma->vm_lock. + * Use vm_area_alloc()/vm_area_free() if vma needs locking. + */ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *= mm) { static const struct vm_operations_struct dummy_vm_ops =3D {}; @@ -729,7 +727,6 @@ static inline void vma_init(struct vm_area_struct *vma,= struct mm_struct *mm) vma->vm_ops =3D &dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); vma_mark_detached(vma, false); - vma_init_lock(vma); } =20 /* Use when VMA is not part of the VMA tree and needs no locking */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 212e7f923a69..30d4f867ae56 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -471,6 +471,10 @@ struct anon_vma_name { char name[]; }; =20 +struct vma_lock { + struct rw_semaphore lock; +}; + /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -510,7 +514,7 @@ struct vm_area_struct { =20 #ifdef CONFIG_PER_VMA_LOCK int vm_lock_seq; - struct rw_semaphore lock; + struct vma_lock *vm_lock; =20 /* Flag to indicate areas detached from the mm->mm_mt tree */ bool detached; diff --git a/kernel/fork.c b/kernel/fork.c index d0999de82f94..a152804faa14 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -451,13 +451,49 @@ static struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; =20 +#ifdef CONFIG_PER_VMA_LOCK + +/* SLAB cache for vm_area_struct.lock */ +static struct kmem_cache *vma_lock_cachep; + +static bool vma_lock_alloc(struct vm_area_struct *vma) +{ + vma->vm_lock =3D kmem_cache_alloc(vma_lock_cachep, GFP_KERNEL); + if (!vma->vm_lock) + return false; + + init_rwsem(&vma->vm_lock->lock); + vma->vm_lock_seq =3D -1; + + return true; +} + +static inline void vma_lock_free(struct vm_area_struct *vma) +{ + kmem_cache_free(vma_lock_cachep, vma->vm_lock); +} + +#else /* CONFIG_PER_VMA_LOCK */ + +static inline bool vma_lock_alloc(struct vm_area_struct *vma) { return tru= e; } +static inline void vma_lock_free(struct vm_area_struct *vma) {} + +#endif /* CONFIG_PER_VMA_LOCK */ + struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) { struct vm_area_struct *vma; =20 vma =3D kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); - if (vma) - vma_init(vma, mm); + if (!vma) + return NULL; + + vma_init(vma, mm); + if (!vma_lock_alloc(vma)) { + kmem_cache_free(vm_area_cachep, vma); + return NULL; + } + return vma; } =20 @@ -465,24 +501,30 @@ struct vm_area_struct *vm_area_dup(struct vm_area_str= uct *orig) { struct vm_area_struct *new =3D kmem_cache_alloc(vm_area_cachep, GFP_KERNE= L); =20 - if (new) { - ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); - ASSERT_EXCLUSIVE_WRITER(orig->vm_file); - /* - * orig->shared.rb may be modified concurrently, but the clone - * will be reinitialized. - */ - data_race(memcpy(new, orig, sizeof(*new))); - INIT_LIST_HEAD(&new->anon_vma_chain); - vma_init_lock(new); - dup_anon_vma_name(orig, new); + if (!new) + return NULL; + + ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); + ASSERT_EXCLUSIVE_WRITER(orig->vm_file); + /* + * orig->shared.rb may be modified concurrently, but the clone + * will be reinitialized. + */ + data_race(memcpy(new, orig, sizeof(*new))); + if (!vma_lock_alloc(new)) { + kmem_cache_free(vm_area_cachep, new); + return NULL; } + INIT_LIST_HEAD(&new->anon_vma_chain); + dup_anon_vma_name(orig, new); + return new; } =20 void __vm_area_free(struct vm_area_struct *vma) { free_anon_vma_name(vma); + vma_lock_free(vma); kmem_cache_free(vm_area_cachep, vma); } =20 @@ -493,7 +535,7 @@ static void vm_area_free_rcu_cb(struct rcu_head *head) vm_rcu); =20 /* The vma should not be locked while being destroyed. */ - VM_BUG_ON_VMA(rwsem_is_locked(&vma->lock), vma); + VM_BUG_ON_VMA(rwsem_is_locked(&vma->vm_lock->lock), vma); __vm_area_free(vma); } #endif @@ -3089,6 +3131,9 @@ void __init proc_caches_init(void) NULL); =20 vm_area_cachep =3D KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); +#ifdef CONFIG_PER_VMA_LOCK + vma_lock_cachep =3D KMEM_CACHE(vma_lock, SLAB_PANIC|SLAB_ACCOUNT); +#endif mmap_init(); nsproxy_cache_init(); } --=20 2.39.1