From nobody Sat Apr 18 14:08:10 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5B7A5C43334 for ; Wed, 13 Jul 2022 07:07:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233927AbiGMHHU (ORCPT ); Wed, 13 Jul 2022 03:07:20 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:34344 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231444AbiGMHHR (ORCPT ); Wed, 13 Jul 2022 03:07:17 -0400 Received: from mail-pj1-x102b.google.com (mail-pj1-x102b.google.com [IPv6:2607:f8b0:4864:20::102b]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 373B8BBD3B for ; Wed, 13 Jul 2022 00:07:16 -0700 (PDT) Received: by mail-pj1-x102b.google.com with SMTP id j1-20020a17090aeb0100b001ef777a7befso3255260pjz.0 for ; Wed, 13 Jul 2022 00:07:16 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=nIVi6szaPftFCT7jYhps4oIfP07eIgzoCYWVaDAqgoQ=; b=BFD/C46Q87ejWKOc9b/qlPNL9CZxExulinW0M9tXzfP5kj9IrthSOh9/uxRdhaPOfj Q8Q73ICx6+yBN7pFHe/q+Nsp/adJQZgF+pSXS0JSmaEkEFtA/UyQvgHQDGyGijwMjdwd sKfRXfLSzLL25IA8/EfuASHG6SlcCa/3CiOaZCWEL2fItVZFiOGE1zPY4ENgoONQ6V0i DIQp75cLnrCpP/ZSOWcDWTU2Sq5+3Vb9lWeCBH6VPyQy/JdZlvx1GOTlpm9OUivcCAhY g32pjLicdwrImEtEVimMLKCjBLcyNjxp0HWZBjcrYDieOtpLwW8Ly47sk2dEoOD6Y8En jNTw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=nIVi6szaPftFCT7jYhps4oIfP07eIgzoCYWVaDAqgoQ=; b=wWAVg2bZaWuiNa2iCRNCu1ZlinIl/LO9hbeTY/s/xSARF4MrQY4ZAtWc2tumXd0oAj xhNehwrFtsf8YmUhAJmb4L0HnpDbS+RcNTYjH2P634d6wIdH/b5RQ1MVGzWdazrRwoSb pdH2vjT704updSdlT2kik5cibnEMuVMJXFEPSRIIxbJgXiLInr8z1pTJmnDNaTEN/zrR oHt4OGWjYHi7404/QD2QZFA4fytgekXZ+99SzwNUs4sF4SI4aEPt0xSGV9FS705NCYqu 2mI0f8oAp65ID5fDX4la80B5ZPYAza/7q43RJyQWDU8itgezdEGh+SunmUFr1n9SRlqi ULDQ== X-Gm-Message-State: AJIora8Vx71g6Gr5So+eCvQU7L5TH6aDvdc6AYTJ57ZjHaA3eD8zhRqc I34kz0Kv2nj3R7k0xJclHJGhbsVNfrQ= X-Google-Smtp-Source: AGRyM1sy3hCOh9JjKUCk6MzP07UHRkxvstO04sI7CRb3TdEYRMhUsRjqRLh0WqE97QHODrhezngF4A== X-Received: by 2002:a17:902:c64b:b0:16b:d51a:dc24 with SMTP id s11-20020a170902c64b00b0016bd51adc24mr2096645pls.48.1657696035751; Wed, 13 Jul 2022 00:07:15 -0700 (PDT) Received: from bobo.ozlabs.ibm.com (193-116-203-247.tpgi.com.au. [193.116.203.247]) by smtp.gmail.com with ESMTPSA id d11-20020a170902cecb00b0016bd5da20casm8099061plg.134.2022.07.13.00.07.12 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 13 Jul 2022 00:07:15 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , "linux-kernel @ vger . kernel . org" Subject: [PATCH v2 01/12] locking/qspinlock: remove pv_node abstraction Date: Wed, 13 Jul 2022 17:06:53 +1000 Message-Id: <20220713070704.308394-2-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220713070704.308394-1-npiggin@gmail.com> References: <20220713070704.308394-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" There isn't much point trying to separate struct qnode from struct pv_node when struct qnode has to know about pv_node anyway. Acked-by: Boqun Feng Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 3 ++- kernel/locking/qspinlock_paravirt.h | 34 +++++++++++------------------ 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 65a9a10caa6f..a0fc21d99199 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -82,7 +82,8 @@ struct qnode { struct mcs_spinlock mcs; #ifdef CONFIG_PARAVIRT_SPINLOCKS - long reserved[2]; + int cpu; + u8 state; #endif }; =20 diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock= _paravirt.h index e84d21aa0722..4efe00e6b441 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -47,12 +47,6 @@ enum vcpu_state { vcpu_hashed, /* =3D pv_hash'ed + vcpu_halted */ }; =20 -struct pv_node { - struct mcs_spinlock mcs; - int cpu; - u8 state; -}; - /* * Hybrid PV queued/unfair lock * @@ -170,7 +164,7 @@ static __always_inline int trylock_clear_pending(struct= qspinlock *lock) */ struct pv_hash_entry { struct qspinlock *lock; - struct pv_node *node; + struct qnode *node; }; =20 #define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry)) @@ -209,7 +203,7 @@ void __init __pv_init_lock_hash(void) offset < (1 << pv_lock_hash_bits); \ offset++, he =3D &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash= _bits) - 1)]) =20 -static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *= node) +static struct qspinlock **pv_hash(struct qspinlock *lock, struct qnode *no= de) { unsigned long offset, hash =3D hash_ptr(lock, pv_lock_hash_bits); struct pv_hash_entry *he; @@ -236,11 +230,11 @@ static struct qspinlock **pv_hash(struct qspinlock *l= ock, struct pv_node *node) BUG(); } =20 -static struct pv_node *pv_unhash(struct qspinlock *lock) +static struct qnode *pv_unhash(struct qspinlock *lock) { unsigned long offset, hash =3D hash_ptr(lock, pv_lock_hash_bits); struct pv_hash_entry *he; - struct pv_node *node; + struct qnode *node; =20 for_each_hash_entry(he, offset, hash) { if (READ_ONCE(he->lock) =3D=3D lock) { @@ -264,7 +258,7 @@ static struct pv_node *pv_unhash(struct qspinlock *lock) * in a running state. */ static inline bool -pv_wait_early(struct pv_node *prev, int loop) +pv_wait_early(struct qnode *prev, int loop) { if ((loop & PV_PREV_CHECK_MASK) !=3D 0) return false; @@ -277,9 +271,7 @@ pv_wait_early(struct pv_node *prev, int loop) */ static void pv_init_node(struct mcs_spinlock *node) { - struct pv_node *pn =3D (struct pv_node *)node; - - BUILD_BUG_ON(sizeof(struct pv_node) > sizeof(struct qnode)); + struct qnode *pn =3D (struct qnode *)node; =20 pn->cpu =3D smp_processor_id(); pn->state =3D vcpu_running; @@ -292,8 +284,8 @@ static void pv_init_node(struct mcs_spinlock *node) */ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *p= rev) { - struct pv_node *pn =3D (struct pv_node *)node; - struct pv_node *pp =3D (struct pv_node *)prev; + struct qnode *pn =3D (struct qnode *)node; + struct qnode *pp =3D (struct qnode *)prev; int loop; bool wait_early; =20 @@ -359,7 +351,7 @@ static void pv_wait_node(struct mcs_spinlock *node, str= uct mcs_spinlock *prev) */ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { - struct pv_node *pn =3D (struct pv_node *)node; + struct qnode *pn =3D (struct qnode *)node; =20 /* * If the vCPU is indeed halted, advance its state to match that of @@ -402,7 +394,7 @@ static void pv_kick_node(struct qspinlock *lock, struct= mcs_spinlock *node) static u32 pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) { - struct pv_node *pn =3D (struct pv_node *)node; + struct qnode *pn =3D (struct qnode *)node; struct qspinlock **lp =3D NULL; int waitcnt =3D 0; int loop; @@ -492,7 +484,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs= _spinlock *node) __visible void __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) { - struct pv_node *node; + struct qnode *node; =20 if (unlikely(locked !=3D _Q_SLOW_VAL)) { WARN(!debug_locks_silent, @@ -517,14 +509,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lo= ck, u8 locked) node =3D pv_unhash(lock); =20 /* - * Now that we have a reference to the (likely) blocked pv_node, + * Now that we have a reference to the (likely) blocked qnode, * release the lock. */ smp_store_release(&lock->locked, 0); =20 /* * At this point the memory pointed at by lock can be freed/reused, - * however we can still use the pv_node to kick the CPU. + * however we can still use the qnode to kick the CPU. * The other vCPU may not really be halted, but kicking an active * vCPU is harmless other than the additional latency in completing * the unlock. --=20 2.35.1 From nobody Sat Apr 18 14:08:10 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A3D55C43334 for ; Wed, 13 Jul 2022 07:07:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234534AbiGMHHa (ORCPT ); Wed, 13 Jul 2022 03:07:30 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:34488 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234110AbiGMHHW (ORCPT ); Wed, 13 Jul 2022 03:07:22 -0400 Received: from mail-pg1-x530.google.com (mail-pg1-x530.google.com [IPv6:2607:f8b0:4864:20::530]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 94BF7E1936 for ; Wed, 13 Jul 2022 00:07:20 -0700 (PDT) Received: by mail-pg1-x530.google.com with SMTP id o18so9657319pgu.9 for ; Wed, 13 Jul 2022 00:07:20 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=+NCyqPa70fv3Z42VUp89XGP6TUd4gmTjk1XFf+/Ifcs=; b=QdUAd5JfSGrvjLVhQmK2BTaamc7nXjK/hKPcTTs/qin7R2KALMmV/fJ7NiDlPZEV0P vnctDFYTA3YtOkQ3U5j4QB63N6MXmzO6ElJvUmPhL3aa0eQggs+KcpDYftP5YTkLuEUV GS1kAOmVvce2kaZj1ruyfFsP6PXqBWJlFNKEWh+735hqaF5dQMycEzKGzS7YanETmVxS zA7Lnhn1sxZOj4bMHVfruZ5ALlK2O4wiS2TcrN9liMQYNdRshgfvfRCCC+JbJdScgojJ gKaNmJvEnkvk360wKkyfkRG1oq1t+/68yIw32krh8fASZIZaoMJhro43Ov8q2MDECxLP u+6Q== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=+NCyqPa70fv3Z42VUp89XGP6TUd4gmTjk1XFf+/Ifcs=; b=JVmmWR+Vjk7NuLSgk3xHnVicR4gYHhyKUptnzbUpMmDT1DnJH8vv/rXSMoVGKIM/E+ xQWP1HNKJpW0W5LYeqz6QFaVxpXLaGR0ADm2wncsKWAipkkboZcngEBStXhrs0eYTNrl K8Oe6TZEtgW2Xtvzcc8wmuq5GWrIfTRasz42UnqsYZjeSUX1+10c2NEIuul1wKnJdPRz uXMoZoQqDOtXBddeDAiJaGa42+26R3j6ibdFaRH2TGN9gKXOCaAWXcctkeXpDzGqIRJN zcSbGyyNGew/aubelnJ1tCrnRpOgEZmcA+U5LMryUfn/g9OP5flYkpg4pLap1W2swf1a yLwA== X-Gm-Message-State: AJIora8a6bOPy6TY3braTL5tJy7z+PYo4thghOLp0AeaJnXQQUcSDCMp 1KCkOODaK8mNbmf10hkKyOgnmS5Kung= X-Google-Smtp-Source: AGRyM1soFb7ZrpjIwg1Zrw0ubdV34FFonDw/28eLQnwXn/qBwN1AkVb6mqtwn4pKMXVir2EHdjVwDQ== X-Received: by 2002:a05:6a00:1485:b0:52a:ca0b:5383 with SMTP id v5-20020a056a00148500b0052aca0b5383mr1795737pfu.0.1657696039649; Wed, 13 Jul 2022 00:07:19 -0700 (PDT) Received: from bobo.ozlabs.ibm.com (193-116-203-247.tpgi.com.au. [193.116.203.247]) by smtp.gmail.com with ESMTPSA id d11-20020a170902cecb00b0016bd5da20casm8099061plg.134.2022.07.13.00.07.16 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 13 Jul 2022 00:07:19 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , "linux-kernel @ vger . kernel . org" Subject: [PATCH v2 02/12] locking/qspinlock: inline mcs_spinlock functions into qspinlock Date: Wed, 13 Jul 2022 17:06:54 +1000 Message-Id: <20220713070704.308394-3-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220713070704.308394-1-npiggin@gmail.com> References: <20220713070704.308394-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" qspinlock uses mcs_spinlock for the struct type (.next, .locked, and the misplaced .count), and arch_mcs_spin_{un}lock_contended(). These can be trivially inlined into qspinlock, the only arch that overrides them is arm, and it does not implement qspinlock. The now-unused mcs_spinlock code is removed. Signed-off-by: Nicholas Piggin --- arch/alpha/include/asm/Kbuild | 1 - arch/arc/include/asm/Kbuild | 1 - arch/arm/include/asm/mcs_spinlock.h | 24 ------ arch/arm64/include/asm/Kbuild | 1 - arch/hexagon/include/asm/Kbuild | 1 - arch/ia64/include/asm/Kbuild | 1 - arch/m68k/include/asm/Kbuild | 1 - arch/microblaze/include/asm/Kbuild | 1 - arch/mips/include/asm/Kbuild | 1 - arch/nios2/include/asm/Kbuild | 1 - arch/parisc/include/asm/Kbuild | 1 - arch/powerpc/include/asm/Kbuild | 1 - arch/s390/include/asm/Kbuild | 1 - arch/sh/include/asm/Kbuild | 1 - arch/sparc/include/asm/Kbuild | 1 - arch/um/include/asm/Kbuild | 1 - arch/x86/include/asm/Kbuild | 1 - arch/xtensa/include/asm/Kbuild | 1 - include/asm-generic/mcs_spinlock.h | 13 --- kernel/locking/mcs_spinlock.h | 121 ---------------------------- kernel/locking/qspinlock.c | 38 ++++----- kernel/locking/qspinlock_paravirt.h | 53 ++++++------ 22 files changed, 43 insertions(+), 223 deletions(-) delete mode 100644 arch/arm/include/asm/mcs_spinlock.h delete mode 100644 include/asm-generic/mcs_spinlock.h delete mode 100644 kernel/locking/mcs_spinlock.h diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 42911c8340c7..d21cf7b3173a 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -3,4 +3,3 @@ generated-y +=3D syscall_table.h generic-y +=3D export.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 3c1afa524b9c..5ae4337a9301 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generic-y +=3D extable.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D parport.h generic-y +=3D user.h diff --git a/arch/arm/include/asm/mcs_spinlock.h b/arch/arm/include/asm/mcs= _spinlock.h deleted file mode 100644 index 529d2cf4d06f..000000000000 --- a/arch/arm/include/asm/mcs_spinlock.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MCS_LOCK_H -#define __ASM_MCS_LOCK_H - -#ifdef CONFIG_SMP -#include - -/* MCS spin-locking. */ -#define arch_mcs_spin_lock_contended(lock) \ -do { \ - /* Ensure prior stores are observed before we enter wfe. */ \ - smp_mb(); \ - while (!(smp_load_acquire(lock))) \ - wfe(); \ -} while (0) \ - -#define arch_mcs_spin_unlock_contended(lock) \ -do { \ - smp_store_release(lock, 1); \ - dsb_sev(); \ -} while (0) - -#endif /* CONFIG_SMP */ -#endif /* __ASM_MCS_LOCK_H */ diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 5c8ee5a541d2..57e9ad366d25 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generic-y +=3D early_ioremap.h -generic-y +=3D mcs_spinlock.h generic-y +=3D qrwlock.h generic-y +=3D qspinlock.h generic-y +=3D parport.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbu= ild index 3ece3c93fe08..37bbf99f66d4 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -2,4 +2,3 @@ generic-y +=3D extable.h generic-y +=3D iomap.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index f994c1daf9d4..a0198c12e339 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 generated-y +=3D syscall_table.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D vtime.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 1b720299deb1..8dbef73ce01d 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -3,5 +3,4 @@ generated-y +=3D syscall_table.h generic-y +=3D export.h generic-y +=3D extable.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D spinlock.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/a= sm/Kbuild index a055f5dbe00a..7615a27e0851 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -3,7 +3,6 @@ generated-y +=3D syscall_table.h generic-y +=3D cmpxchg.h generic-y +=3D extable.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D parport.h generic-y +=3D syscalls.h generic-y +=3D tlb.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index dee172716581..65cedca08771 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -9,7 +9,6 @@ generated-y +=3D unistd_nr_o32.h =20 generic-y +=3D export.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D parport.h generic-y +=3D qrwlock.h generic-y +=3D qspinlock.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 7fe7437555fb..5718eee9665c 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -2,6 +2,5 @@ generic-y +=3D cmpxchg.h generic-y +=3D extable.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D spinlock.h generic-y +=3D user.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index e6e7f74c8ac9..1f0c28d74c88 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -2,5 +2,4 @@ generated-y +=3D syscall_table_32.h generated-y +=3D syscall_table_64.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D user.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbu= ild index bcf95ce0964f..813a8c3405ad 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -4,7 +4,6 @@ generated-y +=3D syscall_table_64.h generated-y +=3D syscall_table_spu.h generic-y +=3D export.h generic-y +=3D kvm_types.h -generic-y +=3D mcs_spinlock.h generic-y +=3D qrwlock.h generic-y +=3D vtime.h generic-y +=3D early_ioremap.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 1a18d7b82f86..8b036a4ee2ca 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -7,4 +7,3 @@ generated-y +=3D unistd_nr.h generic-y +=3D asm-offsets.h generic-y +=3D export.h generic-y +=3D kvm_types.h -generic-y +=3D mcs_spinlock.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index fc44d9c88b41..3192f19bcf85 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 generated-y +=3D syscall_table.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D parport.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 0b9d98ced34a..f0b913f7ba05 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -3,4 +3,3 @@ generated-y +=3D syscall_table_32.h generated-y +=3D syscall_table_64.h generic-y +=3D export.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index b2d834a29f3a..04080c0c1aec 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -14,7 +14,6 @@ generic-y +=3D hw_irq.h generic-y +=3D irq_regs.h generic-y +=3D irq_work.h generic-y +=3D kdebug.h -generic-y +=3D mcs_spinlock.h generic-y +=3D mmiowb.h generic-y +=3D module.lds.h generic-y +=3D param.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 1e51650b79d7..beb7683f7b8f 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -10,4 +10,3 @@ generated-y +=3D xen-hypercalls.h =20 generic-y +=3D early_ioremap.h generic-y +=3D export.h -generic-y +=3D mcs_spinlock.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index fa07c686cbcc..29ae65cb68c2 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -2,7 +2,6 @@ generated-y +=3D syscall_table.h generic-y +=3D extable.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D param.h generic-y +=3D parport.h generic-y +=3D qrwlock.h diff --git a/include/asm-generic/mcs_spinlock.h b/include/asm-generic/mcs_s= pinlock.h deleted file mode 100644 index 10cd4ffc6ba2..000000000000 --- a/include/asm-generic/mcs_spinlock.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __ASM_MCS_SPINLOCK_H -#define __ASM_MCS_SPINLOCK_H - -/* - * Architectures can define their own: - * - * arch_mcs_spin_lock_contended(l) - * arch_mcs_spin_unlock_contended(l) - * - * See kernel/locking/mcs_spinlock.c. - */ - -#endif /* __ASM_MCS_SPINLOCK_H */ diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h deleted file mode 100644 index 85251d8771d9..000000000000 --- a/kernel/locking/mcs_spinlock.h +++ /dev/null @@ -1,121 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * MCS lock defines - * - * This file contains the main data structure and API definitions of MCS l= ock. - * - * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lo= ck - * with the desirable properties of being fair, and with each cpu trying - * to acquire the lock spinning on a local variable. - * It avoids expensive cache bounces that common test-and-set spin-lock - * implementations incur. - */ -#ifndef __LINUX_MCS_SPINLOCK_H -#define __LINUX_MCS_SPINLOCK_H - -#include - -struct mcs_spinlock { - struct mcs_spinlock *next; - int locked; /* 1 if lock acquired */ - int count; /* nesting count, see qspinlock.c */ -}; - -#ifndef arch_mcs_spin_lock_contended -/* - * Using smp_cond_load_acquire() provides the acquire semantics - * required so that subsequent operations happen after the - * lock is acquired. Additionally, some architectures such as - * ARM64 would like to do spin-waiting instead of purely - * spinning, and smp_cond_load_acquire() provides that behavior. - */ -#define arch_mcs_spin_lock_contended(l) \ -do { \ - smp_cond_load_acquire(l, VAL); \ -} while (0) -#endif - -#ifndef arch_mcs_spin_unlock_contended -/* - * smp_store_release() provides a memory barrier to ensure all - * operations in the critical section has been completed before - * unlocking. - */ -#define arch_mcs_spin_unlock_contended(l) \ - smp_store_release((l), 1) -#endif - -/* - * Note: the smp_load_acquire/smp_store_release pair is not - * sufficient to form a full memory barrier across - * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. - * For applications that need a full barrier across multiple cpus - * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be - * used after mcs_lock. - */ - -/* - * In order to acquire the lock, the caller should declare a local node and - * pass a reference of the node to this function in addition to the lock. - * If the lock has already been acquired, then this will proceed to spin - * on this node->locked until the previous lock holder sets the node->lock= ed - * in mcs_spin_unlock(). - */ -static inline -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *prev; - - /* Init node */ - node->locked =3D 0; - node->next =3D NULL; - - /* - * We rely on the full barrier with global transitivity implied by the - * below xchg() to order the initialization stores above against any - * observation of @node. And to provide the ACQUIRE ordering associated - * with a LOCK primitive. - */ - prev =3D xchg(lock, node); - if (likely(prev =3D=3D NULL)) { - /* - * Lock acquired, don't need to set node->locked to 1. Threads - * only spin on its own node->locked value for lock acquisition. - * However, since this thread can immediately acquire the lock - * and does not proceed to spin on its own node->locked, this - * value won't be used. If a debug mode is needed to - * audit lock status, then set node->locked value here. - */ - return; - } - WRITE_ONCE(prev->next, node); - - /* Wait until the lock holder passes the lock down. */ - arch_mcs_spin_lock_contended(&node->locked); -} - -/* - * Releases the lock. The caller should pass in the corresponding node that - * was used to acquire the lock. - */ -static inline -void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *next =3D READ_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (likely(cmpxchg_release(lock, node, NULL) =3D=3D node)) - return; - /* Wait until the next pointer is set */ - while (!(next =3D READ_ONCE(node->next))) - cpu_relax(); - } - - /* Pass lock to next waiter. */ - arch_mcs_spin_unlock_contended(&next->locked); -} - -#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index a0fc21d99199..32f401e966ab 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -66,11 +66,10 @@ * */ =20 -#include "mcs_spinlock.h" #define MAX_NODES 4 =20 /* - * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in + * On 64-bit architectures, the qnode structure will be 16 bytes in * size and four of them will fit nicely in one 64-byte cacheline. For * pvqspinlock, however, we need more space for extra data. To accommodate * that, we insert two more long words to pad it up to 32 bytes. IOW, only @@ -80,7 +79,9 @@ * qspinlocks. */ struct qnode { - struct mcs_spinlock mcs; + struct qnode *next; + int locked; /* 1 if lock acquired */ + int count; /* nesting count */ #ifdef CONFIG_PARAVIRT_SPINLOCKS int cpu; u8 state; @@ -124,18 +125,18 @@ static inline __pure u32 encode_tail(int cpu, int idx) return tail; } =20 -static inline __pure struct mcs_spinlock *decode_tail(u32 tail) +static inline __pure struct qnode *decode_tail(u32 tail) { int cpu =3D (tail >> _Q_TAIL_CPU_OFFSET) - 1; int idx =3D (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; =20 - return per_cpu_ptr(&qnodes[idx].mcs, cpu); + return per_cpu_ptr(&qnodes[idx], cpu); } =20 static inline __pure -struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx) +struct qnode *grab_qnode(struct qnode *base, int idx) { - return &((struct qnode *)base + idx)->mcs; + return &base[idx]; } =20 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) @@ -271,13 +272,13 @@ static __always_inline void set_locked(struct qspinlo= ck *lock) * all the PV callbacks. */ =20 -static __always_inline void __pv_init_node(struct mcs_spinlock *node) { } -static __always_inline void __pv_wait_node(struct mcs_spinlock *node, - struct mcs_spinlock *prev) { } +static __always_inline void __pv_init_node(struct qnode *node) { } +static __always_inline void __pv_wait_node(struct qnode *node, + struct qnode *prev) { } static __always_inline void __pv_kick_node(struct qspinlock *lock, - struct mcs_spinlock *node) { } + struct qnode *node) { } static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, - struct mcs_spinlock *node) + struct qnode *node) { return 0; } =20 #define pv_enabled() false @@ -316,7 +317,7 @@ static __always_inline u32 __pv_wait_head_or_lock(stru= ct qspinlock *lock, */ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { - struct mcs_spinlock *prev, *next, *node; + struct qnode *prev, *next, *node; u32 old, tail; int idx; =20 @@ -399,7 +400,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, = u32 val) queue: lockevent_inc(lock_slowpath); pv_queue: - node =3D this_cpu_ptr(&qnodes[0].mcs); + node =3D this_cpu_ptr(&qnodes[0]); idx =3D node->count++; tail =3D encode_tail(smp_processor_id(), idx); =20 @@ -421,7 +422,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, = u32 val) goto release; } =20 - node =3D grab_mcs_node(node, idx); + node =3D grab_qnode(node, idx); =20 /* * Keep counts of non-zero index values: @@ -475,7 +476,8 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, = u32 val) WRITE_ONCE(prev->next, node); =20 pv_wait_node(node, prev); - arch_mcs_spin_lock_contended(&node->locked); + /* Wait for mcs node lock to be released */ + smp_cond_load_acquire(&node->locked, VAL); =20 /* * While waiting for the MCS lock, the next pointer may have @@ -554,7 +556,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, = u32 val) if (!next) next =3D smp_cond_load_relaxed(&node->next, (VAL)); =20 - arch_mcs_spin_unlock_contended(&next->locked); + smp_store_release(&next->locked, 1); /* unlock the mcs node lock */ pv_kick_node(lock, next); =20 release: @@ -563,7 +565,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, = u32 val) /* * release the node */ - __this_cpu_dec(qnodes[0].mcs.count); + __this_cpu_dec(qnodes[0].count); } EXPORT_SYMBOL(queued_spin_lock_slowpath); =20 diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock= _paravirt.h index 4efe00e6b441..cce3d3dde216 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -267,14 +267,12 @@ pv_wait_early(struct qnode *prev, int loop) } =20 /* - * Initialize the PV part of the mcs_spinlock node. + * Initialize the PV part of the qnode. */ -static void pv_init_node(struct mcs_spinlock *node) +static void pv_init_node(struct qnode *node) { - struct qnode *pn =3D (struct qnode *)node; - - pn->cpu =3D smp_processor_id(); - pn->state =3D vcpu_running; + node->cpu =3D smp_processor_id(); + node->state =3D vcpu_running; } =20 /* @@ -282,10 +280,8 @@ static void pv_init_node(struct mcs_spinlock *node) * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its * behalf. */ -static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *p= rev) +static void pv_wait_node(struct qnode *node, struct qnode *prev) { - struct qnode *pn =3D (struct qnode *)node; - struct qnode *pp =3D (struct qnode *)prev; int loop; bool wait_early; =20 @@ -293,7 +289,7 @@ static void pv_wait_node(struct mcs_spinlock *node, str= uct mcs_spinlock *prev) for (wait_early =3D false, loop =3D SPIN_THRESHOLD; loop; loop--) { if (READ_ONCE(node->locked)) return; - if (pv_wait_early(pp, loop)) { + if (pv_wait_early(prev, loop)) { wait_early =3D true; break; } @@ -301,20 +297,20 @@ static void pv_wait_node(struct mcs_spinlock *node, s= truct mcs_spinlock *prev) } =20 /* - * Order pn->state vs pn->locked thusly: + * Order node->state vs node->locked thusly: * - * [S] pn->state =3D vcpu_halted [S] next->locked =3D 1 - * MB MB - * [L] pn->locked [RmW] pn->state =3D vcpu_hashed + * [S] node->state =3D vcpu_halted [S] next->locked =3D 1 + * MB MB + * [L] node->locked [RmW] node->state =3D vcpu_hashed * * Matches the cmpxchg() from pv_kick_node(). */ - smp_store_mb(pn->state, vcpu_halted); + smp_store_mb(node->state, vcpu_halted); =20 if (!READ_ONCE(node->locked)) { lockevent_inc(pv_wait_node); lockevent_cond_inc(pv_wait_early, wait_early); - pv_wait(&pn->state, vcpu_halted); + pv_wait(&node->state, vcpu_halted); } =20 /* @@ -322,7 +318,7 @@ static void pv_wait_node(struct mcs_spinlock *node, str= uct mcs_spinlock *prev) * value so that pv_wait_head_or_lock() knows to not also try * to hash this lock. */ - cmpxchg(&pn->state, vcpu_halted, vcpu_running); + cmpxchg(&node->state, vcpu_halted, vcpu_running); =20 /* * If the locked flag is still not set after wakeup, it is a @@ -349,10 +345,8 @@ static void pv_wait_node(struct mcs_spinlock *node, st= ruct mcs_spinlock *prev) * such that they're waiting in pv_wait_head_or_lock(), this avoids a * wake/sleep cycle. */ -static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) +static void pv_kick_node(struct qspinlock *lock, struct qnode *node) { - struct qnode *pn =3D (struct qnode *)node; - /* * If the vCPU is indeed halted, advance its state to match that of * pv_wait_node(). If OTOH this fails, the vCPU was running and will @@ -361,15 +355,15 @@ static void pv_kick_node(struct qspinlock *lock, stru= ct mcs_spinlock *node) * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() * * The write to next->locked in arch_mcs_spin_unlock_contended() - * must be ordered before the read of pn->state in the cmpxchg() + * must be ordered before the read of node->state in the cmpxchg() * below for the code to work correctly. To guarantee full ordering * irrespective of the success or failure of the cmpxchg(), * a relaxed version with explicit barrier is used. The control - * dependency will order the reading of pn->state before any + * dependency will order the reading of node->state before any * subsequent writes. */ smp_mb__before_atomic(); - if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed) + if (cmpxchg_relaxed(&node->state, vcpu_halted, vcpu_hashed) !=3D vcpu_halted) return; =20 @@ -381,7 +375,7 @@ static void pv_kick_node(struct qspinlock *lock, struct= mcs_spinlock *node) * needed. */ WRITE_ONCE(lock->locked, _Q_SLOW_VAL); - (void)pv_hash(lock, pn); + (void)pv_hash(lock, node); } =20 /* @@ -392,9 +386,8 @@ static void pv_kick_node(struct qspinlock *lock, struct= mcs_spinlock *node) * The current value of the lock will be returned for additional processin= g. */ static u32 -pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) +pv_wait_head_or_lock(struct qspinlock *lock, struct qnode *node) { - struct qnode *pn =3D (struct qnode *)node; struct qspinlock **lp =3D NULL; int waitcnt =3D 0; int loop; @@ -403,7 +396,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs= _spinlock *node) * If pv_kick_node() already advanced our state, we don't need to * insert ourselves into the hash table anymore. */ - if (READ_ONCE(pn->state) =3D=3D vcpu_hashed) + if (READ_ONCE(node->state) =3D=3D vcpu_hashed) lp =3D (struct qspinlock **)1; =20 /* @@ -416,7 +409,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs= _spinlock *node) * Set correct vCPU state to be used by queue node wait-early * mechanism. */ - WRITE_ONCE(pn->state, vcpu_running); + WRITE_ONCE(node->state, vcpu_running); =20 /* * Set the pending bit in the active lock spinning loop to @@ -432,7 +425,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs= _spinlock *node) =20 =20 if (!lp) { /* ONCE */ - lp =3D pv_hash(lock, pn); + lp =3D pv_hash(lock, node); =20 /* * We must hash before setting _Q_SLOW_VAL, such that @@ -456,7 +449,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs= _spinlock *node) goto gotlock; } } - WRITE_ONCE(pn->state, vcpu_hashed); + WRITE_ONCE(node->state, vcpu_hashed); lockevent_inc(pv_wait_head); lockevent_cond_inc(pv_wait_again, waitcnt); pv_wait(&lock->locked, _Q_SLOW_VAL); --=20 2.35.1 From nobody Sat Apr 18 14:08:10 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 21ADEC433EF for ; Wed, 13 Jul 2022 07:07:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234540AbiGMHHi (ORCPT ); Wed, 13 Jul 2022 03:07:38 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:34614 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234489AbiGMHH0 (ORCPT ); Wed, 13 Jul 2022 03:07:26 -0400 Received: from mail-pg1-x535.google.com (mail-pg1-x535.google.com [IPv6:2607:f8b0:4864:20::535]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 426A3E2A0B for ; Wed, 13 Jul 2022 00:07:24 -0700 (PDT) Received: by mail-pg1-x535.google.com with SMTP id f65so413133pgc.12 for ; Wed, 13 Jul 2022 00:07:24 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=sT9qOYSF74DDh91Es5esuTKdKBvO8DYK5RYWFWTsIig=; b=NYG8nlUEVkv6JqnRV5lzyZ9TWggIIm2WeOLhCxyzTSmuJmfyWFJGRsDUlux3t4AhBy JlXdh5fRun3SpJ7JqBmIpyZCCLVgg8si+rx8/sDDMDKnI6N7j3+p5xqiN3GWhN9Xib5D Wuz/yAmeeuU7wEdEQ3UKrgviwSmAI/WgIJezQLbmXTR4MM6bwC5zQ5BrvU1qA1m/KrGd AZmISVkCabm0z41veLk+Qflxk8Srf0jq78GjA+6f2xs3QwD+ykL3xO6atjRybsYApNQ1 7RegRnObHKys/fSMzV3ZQGHMzt5hnakYgU5v9ShDn+aa4LQXOk64iEyVJhd3EBW1qHFX nY7A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=sT9qOYSF74DDh91Es5esuTKdKBvO8DYK5RYWFWTsIig=; b=S+YMqGMRV5R4feI8EUQkkln4OIUULh/P6YpwXBtjcNEaxJ+Uodm3XF6TrVawt8P/OA /inW7y9cDwbOzEGZXuw6TuunXbKzuWTvmDthpCkS1Tq+Rmh/rSlKZHAXVOUw6nmJ59nH PhT5P/bngmVjS4+bwV56hmrCGkRullZZ9P22C0etYPOSURiON/I3U3Nr+xV9JSjVFl/9 2rGzHxTC/NwxBBCT6lWlJcacZoYYs2/gKHiRWr6HTDMKNus/gKbcOQ7Bs5OPV3a4lBLx cz+yOvmfCAvfbZe1XQ12g6rw6nzkFCU3wphRgNI3a2FUMeMXxc09tqEV1ZMDana4c4zv RNIg== X-Gm-Message-State: AJIora/C1W7xMG/xeTXyru+zWdXwa3Y8r43Pibvi/x+st+gLIK5H+4Ht kIrvPm9rYxEATt9FT87NFTg= X-Google-Smtp-Source: AGRyM1uIx6UcyGfU+hyhrlQ7rl7xNRpkvodQifCLyUbffLM4y1ltjuOG+DkneD8XMfLfzDrrHk4S4g== X-Received: by 2002:a63:f95c:0:b0:412:9d5b:fbfd with SMTP id q28-20020a63f95c000000b004129d5bfbfdmr1785942pgk.103.1657696043357; Wed, 13 Jul 2022 00:07:23 -0700 (PDT) Received: from bobo.ozlabs.ibm.com (193-116-203-247.tpgi.com.au. [193.116.203.247]) by smtp.gmail.com with ESMTPSA id d11-20020a170902cecb00b0016bd5da20casm8099061plg.134.2022.07.13.00.07.20 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 13 Jul 2022 00:07:22 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , "linux-kernel @ vger . kernel . org" Subject: [PATCH v2 03/12] locking/qspinlock: split common mcs queueing code into its own function Date: Wed, 13 Jul 2022 17:06:55 +1000 Message-Id: <20220713070704.308394-4-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220713070704.308394-1-npiggin@gmail.com> References: <20220713070704.308394-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" pv qspinlocks jumps over a bunch of slowpath code directly to the queueing part. Split the queueing code into its own function and call it explicitly in each pv and !pv cases. This will help to untangle the two cases with subsequent changes. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 208 +++++++++++++++++++------------------ 1 file changed, 108 insertions(+), 100 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 32f401e966ab..7360d643de29 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -294,112 +294,14 @@ static __always_inline u32 __pv_wait_head_or_lock(s= truct qspinlock *lock, =20 #endif /* _GEN_PV_LOCK_SLOWPATH */ =20 -/** - * queued_spin_lock_slowpath - acquire the queued spinlock - * @lock: Pointer to queued spinlock structure - * @val: Current value of the queued spinlock 32-bit word - * - * (queue tail, pending bit, lock value) - * - * fast : slow : u= nlock - * : : - * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (= *,*,0) - * : | ^--------.------. / : - * : v \ \ | : - * pending : (0,1,1) +--> (0,1,0) \ | : - * : | ^--' | | : - * : v | | : - * uncontended : (n,x,y) +--> (n,0,0) --' | : - * queue : | ^--' | : - * : v | : - * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : - * queue : ^--' : - */ -void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) { struct qnode *prev, *next, *node; - u32 old, tail; + u32 val, old, tail; int idx; =20 BUILD_BUG_ON(CONFIG_NR_CPUS >=3D (1U << _Q_TAIL_CPU_BITS)); =20 - if (pv_enabled()) - goto pv_queue; - - if (virt_spin_lock(lock)) - return; - - /* - * Wait for in-progress pending->locked hand-overs with a bounded - * number of spins so that we guarantee forward progress. - * - * 0,1,0 -> 0,0,1 - */ - if (val =3D=3D _Q_PENDING_VAL) { - int cnt =3D _Q_PENDING_LOOPS; - val =3D atomic_cond_read_relaxed(&lock->val, - (VAL !=3D _Q_PENDING_VAL) || !cnt--); - } - - /* - * If we observe any contention; queue. - */ - if (val & ~_Q_LOCKED_MASK) - goto queue; - - /* - * trylock || pending - * - * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock - */ - val =3D queued_fetch_set_pending_acquire(lock); - - /* - * If we observe contention, there is a concurrent locker. - * - * Undo and queue; our setting of PENDING might have made the - * n,0,0 -> 0,0,0 transition fail and it will now be waiting - * on @next to become !NULL. - */ - if (unlikely(val & ~_Q_LOCKED_MASK)) { - - /* Undo PENDING if we set it. */ - if (!(val & _Q_PENDING_MASK)) - clear_pending(lock); - - goto queue; - } - - /* - * We're pending, wait for the owner to go away. - * - * 0,1,1 -> 0,1,0 - * - * this wait loop must be a load-acquire such that we match the - * store-release that clears the locked bit and create lock - * sequentiality; this is because not all - * clear_pending_set_locked() implementations imply full - * barriers. - */ - if (val & _Q_LOCKED_MASK) - atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK)); - - /* - * take ownership and clear the pending bit. - * - * 0,1,0 -> 0,0,1 - */ - clear_pending_set_locked(lock); - lockevent_inc(lock_pending); - return; - - /* - * End of pending bit optimistic spinning and beginning of MCS - * queuing. - */ -queue: - lockevent_inc(lock_slowpath); -pv_queue: node =3D this_cpu_ptr(&qnodes[0]); idx =3D node->count++; tail =3D encode_tail(smp_processor_id(), idx); @@ -567,6 +469,110 @@ void queued_spin_lock_slowpath(struct qspinlock *lock= , u32 val) */ __this_cpu_dec(qnodes[0].count); } + +/** + * queued_spin_lock_slowpath - acquire the queued spinlock + * @lock: Pointer to queued spinlock structure + * @val: Current value of the queued spinlock 32-bit word + * + * (queue tail, pending bit, lock value) + * + * fast : slow : u= nlock + * : : + * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (= *,*,0) + * : | ^--------.------. / : + * : v \ \ | : + * pending : (0,1,1) +--> (0,1,0) \ | : + * : | ^--' | | : + * : v | | : + * uncontended : (n,x,y) +--> (n,0,0) --' | : + * queue : | ^--' | : + * : v | : + * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : + * queue : ^--' : + */ +void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + if (pv_enabled()) { + queued_spin_lock_mcs_queue(lock); + return; + } + + if (virt_spin_lock(lock)) + return; + + /* + * Wait for in-progress pending->locked hand-overs with a bounded + * number of spins so that we guarantee forward progress. + * + * 0,1,0 -> 0,0,1 + */ + if (val =3D=3D _Q_PENDING_VAL) { + int cnt =3D _Q_PENDING_LOOPS; + val =3D atomic_cond_read_relaxed(&lock->val, + (VAL !=3D _Q_PENDING_VAL) || !cnt--); + } + + /* + * If we observe any contention; queue. + */ + if (val & ~_Q_LOCKED_MASK) + goto queue; + + /* + * trylock || pending + * + * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock + */ + val =3D queued_fetch_set_pending_acquire(lock); + + /* + * If we observe contention, there is a concurrent locker. + * + * Undo and queue; our setting of PENDING might have made the + * n,0,0 -> 0,0,0 transition fail and it will now be waiting + * on @next to become !NULL. + */ + if (unlikely(val & ~_Q_LOCKED_MASK)) { + + /* Undo PENDING if we set it. */ + if (!(val & _Q_PENDING_MASK)) + clear_pending(lock); + + goto queue; + } + + /* + * We're pending, wait for the owner to go away. + * + * 0,1,1 -> 0,1,0 + * + * this wait loop must be a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because not all + * clear_pending_set_locked() implementations imply full + * barriers. + */ + if (val & _Q_LOCKED_MASK) + atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK)); + + /* + * take ownership and clear the pending bit. + * + * 0,1,0 -> 0,0,1 + */ + clear_pending_set_locked(lock); + lockevent_inc(lock_pending); + return; + + /* + * End of pending bit optimistic spinning and beginning of MCS + * queuing. + */ +queue: + lockevent_inc(lock_slowpath); + queued_spin_lock_mcs_queue(lock); +} EXPORT_SYMBOL(queued_spin_lock_slowpath); =20 /* @@ -583,6 +589,8 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath); #undef pv_kick_node #undef pv_wait_head_or_lock =20 +#define queued_spin_lock_mcs_queue __pv_queued_spin_lock_mcs_queue + #undef queued_spin_lock_slowpath #define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath =20 --=20 2.35.1 From nobody Sat Apr 18 14:08:10 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id D8F43C43334 for ; Wed, 13 Jul 2022 07:07:43 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234624AbiGMHHm (ORCPT ); Wed, 13 Jul 2022 03:07:42 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:34618 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234164AbiGMHHf (ORCPT ); Wed, 13 Jul 2022 03:07:35 -0400 Received: from mail-pf1-x42f.google.com (mail-pf1-x42f.google.com [IPv6:2607:f8b0:4864:20::42f]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A8BDEE2A19 for ; Wed, 13 Jul 2022 00:07:27 -0700 (PDT) Received: by mail-pf1-x42f.google.com with SMTP id y141so9474318pfb.7 for ; Wed, 13 Jul 2022 00:07:27 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=canSSO8tPQl0HCPeUzAGvF6YloaempkmnCz8lazbEZE=; b=mbBhLQqoKGOEvA3aKocs8l9QK5MA37IeptZGZcJtu8SGy0HvvrXupxyfSMEarYxiru 2HVdls33pseJ61HFDw9xkg0YGEkDNOk3SkFDN1dXPDnatz2juc8bS9/MbXEfC+iJGKng SpIW2yandu/Fs2TFSDzCxmb5etHq1jySkS+qNeVoCvvKoRTiudKZbe6PMjpAlClXoB8C qmxrCPWQBnkAWmXCYlIC8yVT7HC2UzuWxngtm56UyzsKJKUudYdb2BfSo9E7LHaLKQX5 8iq5DBr65LJ7l5FoZuJY6z/0r9oNvKHkeKItMJmb+TfxUGLzmkIZ4KFX4EHd5QX3+/1w VKqA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=canSSO8tPQl0HCPeUzAGvF6YloaempkmnCz8lazbEZE=; b=pdjbVx0hhMb9koO3WMdvrEk+Yde27T0qKol35HF3XzT4YhUvMhT7vxuF50lqSulUvx F8nk0xPd1ux5YgGnIafc+bQfAmw9RxjIQPYw+sC6/0n6DubyOlzT1UyDr/zcu31wnOv6 83+SNfARnme92xtQ+kpZsW/7je2xoe+FVVOXj68xlDZUl2InTemc1bG572h01aI1secV 19l7A5K4n3YZ3oTN6uQzjb6J+giR6MBfv06wdr2lph7bvwSH0UhyktRNR36EJlXF9xg9 AV5XF70hXqO5kkQxVUNtPwQ/Oyrd25OLLQN9C7PBQOfvuRgGZhyNPdjMs8Yv661UC/4n XaYw== X-Gm-Message-State: AJIora9W5NHq7On66+E7nxqvarDiXhDV/HkikdxB1xyDwF0oRPtBWZdk t3j8gwzJT+9WfxQ7I5inrRE= X-Google-Smtp-Source: AGRyM1t2Do9K6rddRIlGiObzzUOWIfmjeKY8x5oYLDeGQA9CzUDJUMkmvp5z9jMhJ2Ao7eAuxSVV/g== X-Received: by 2002:a65:6cc8:0:b0:3fe:2b89:cc00 with SMTP id g8-20020a656cc8000000b003fe2b89cc00mr1785200pgw.599.1657696047165; Wed, 13 Jul 2022 00:07:27 -0700 (PDT) Received: from bobo.ozlabs.ibm.com (193-116-203-247.tpgi.com.au. [193.116.203.247]) by smtp.gmail.com with ESMTPSA id d11-20020a170902cecb00b0016bd5da20casm8099061plg.134.2022.07.13.00.07.23 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 13 Jul 2022 00:07:26 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , "linux-kernel @ vger . kernel . org" Subject: [PATCH v2 04/12] locking/qspinlock: move pv lock word helpers into qspinlock.c Date: Wed, 13 Jul 2022 17:06:56 +1000 Message-Id: <20220713070704.308394-5-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220713070704.308394-1-npiggin@gmail.com> References: <20220713070704.308394-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" There is no real reason not to keep all the bit manipulation together. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 109 +++++++++++++++++----------- kernel/locking/qspinlock_paravirt.h | 51 ------------- 2 files changed, 65 insertions(+), 95 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 7360d643de29..95bf24d276c3 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -141,7 +141,26 @@ struct qnode *grab_qnode(struct qnode *base, int idx) =20 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) =20 +/** + * set_pending - set the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,0,* -> *,1,* + * + * For paravirt, the pending bit is used by the queue head vCPU to indicate + * that it is actively spinning on the lock and no lock stealing is allowe= d. + * Non-paravirt, the pending bit is used to avoid loading the extra node + * cacheline in the likely contended case. + */ +static __always_inline void set_pending(struct qspinlock *lock) +{ #if _Q_PENDING_BITS =3D=3D 8 + WRITE_ONCE(lock->pending, 1); +#else + atomic_or(_Q_PENDING_VAL, &lock->val); +#endif +} + /** * clear_pending - clear the pending bit. * @lock: Pointer to queued spinlock structure @@ -150,7 +169,11 @@ struct qnode *grab_qnode(struct qnode *base, int idx) */ static __always_inline void clear_pending(struct qspinlock *lock) { +#if _Q_PENDING_BITS =3D=3D 8 WRITE_ONCE(lock->pending, 0); +#else + atomic_andnot(_Q_PENDING_VAL, &lock->val); +#endif } =20 /** @@ -163,74 +186,72 @@ static __always_inline void clear_pending(struct qspi= nlock *lock) */ static __always_inline void clear_pending_set_locked(struct qspinlock *loc= k) { +#if _Q_PENDING_BITS =3D=3D 8 WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); +#else + atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); +#endif } =20 -/* - * xchg_tail - Put in the new queue tail code word & retrieve previous one - * @lock : Pointer to queued spinlock structure - * @tail : The new queue tail code word - * Return: The previous queue tail code word - * - * xchg(lock, tail), which heads an address dependency - * - * p,*,* -> n,*,* ; prev =3D xchg(lock, node) - */ -static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) -{ - /* - * We can use relaxed semantics since the caller ensures that the - * MCS node is properly initialized before updating the tail. - */ - return (u32)xchg_relaxed(&lock->tail, - tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; -} - -#else /* _Q_PENDING_BITS =3D=3D 8 */ - /** - * clear_pending - clear the pending bit. + * trylock_clear_pending - try to take ownership and clear the pending bit * @lock: Pointer to queued spinlock structure * - * *,1,* -> *,0,* + * 0,1,0 -> 0,0,1 */ -static __always_inline void clear_pending(struct qspinlock *lock) +static __always_inline int trylock_clear_pending(struct qspinlock *lock) { - atomic_andnot(_Q_PENDING_VAL, &lock->val); -} +#if _Q_PENDING_BITS =3D=3D 8 + return !READ_ONCE(lock->locked) && + (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL, + _Q_LOCKED_VAL) =3D=3D _Q_PENDING_VAL); +#else + int val =3D atomic_read(&lock->val); =20 -/** - * clear_pending_set_locked - take ownership and clear the pending bit. - * @lock: Pointer to queued spinlock structure - * - * *,1,0 -> *,0,1 - */ -static __always_inline void clear_pending_set_locked(struct qspinlock *loc= k) -{ - atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); + for (;;) { + int old, new; + + if (val & _Q_LOCKED_MASK) + break; + + /* + * Try to clear pending bit & set locked bit + */ + old =3D val; + new =3D (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; + val =3D atomic_cmpxchg_acquire(&lock->val, old, new); + + if (val =3D=3D old) + return 1; + } + return 0; +#endif } =20 -/** +/* * xchg_tail - Put in the new queue tail code word & retrieve previous one * @lock : Pointer to queued spinlock structure * @tail : The new queue tail code word * Return: The previous queue tail code word * - * xchg(lock, tail) + * xchg(lock, tail), which heads an address dependency * * p,*,* -> n,*,* ; prev =3D xchg(lock, node) */ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) { + /* + * We can use relaxed semantics since the caller ensures that the + * MCS node is properly initialized before updating the tail. + */ +#if _Q_PENDING_BITS =3D=3D 8 + return (u32)xchg_relaxed(&lock->tail, + tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; +#else u32 old, new, val =3D atomic_read(&lock->val); =20 for (;;) { new =3D (val & _Q_LOCKED_PENDING_MASK) | tail; - /* - * We can use relaxed semantics since the caller ensures that - * the MCS node is properly initialized before updating the - * tail. - */ old =3D atomic_cmpxchg_relaxed(&lock->val, val, new); if (old =3D=3D val) break; @@ -238,8 +259,8 @@ static __always_inline u32 xchg_tail(struct qspinlock *= lock, u32 tail) val =3D old; } return old; +#endif } -#endif /* _Q_PENDING_BITS =3D=3D 8 */ =20 /** * queued_fetch_set_pending_acquire - fetch the whole lock value and set p= ending diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock= _paravirt.h index cce3d3dde216..97385861adc2 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -95,57 +95,6 @@ static inline bool pv_hybrid_queued_unfair_trylock(struc= t qspinlock *lock) return false; } =20 -/* - * The pending bit is used by the queue head vCPU to indicate that it - * is actively spinning on the lock and no lock stealing is allowed. - */ -#if _Q_PENDING_BITS =3D=3D 8 -static __always_inline void set_pending(struct qspinlock *lock) -{ - WRITE_ONCE(lock->pending, 1); -} - -/* - * The pending bit check in pv_queued_spin_steal_lock() isn't a memory - * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the - * lock just to be sure that it will get it. - */ -static __always_inline int trylock_clear_pending(struct qspinlock *lock) -{ - return !READ_ONCE(lock->locked) && - (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL, - _Q_LOCKED_VAL) =3D=3D _Q_PENDING_VAL); -} -#else /* _Q_PENDING_BITS =3D=3D 8 */ -static __always_inline void set_pending(struct qspinlock *lock) -{ - atomic_or(_Q_PENDING_VAL, &lock->val); -} - -static __always_inline int trylock_clear_pending(struct qspinlock *lock) -{ - int val =3D atomic_read(&lock->val); - - for (;;) { - int old, new; - - if (val & _Q_LOCKED_MASK) - break; - - /* - * Try to clear pending bit & set locked bit - */ - old =3D val; - new =3D (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; - val =3D atomic_cmpxchg_acquire(&lock->val, old, new); - - if (val =3D=3D old) - return 1; - } - return 0; -} -#endif /* _Q_PENDING_BITS =3D=3D 8 */ - /* * Lock and MCS node addresses hash table for fast lookup * --=20 2.35.1 From nobody Sat Apr 18 14:08:10 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1D3AAC433EF for ; Wed, 13 Jul 2022 07:07:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234558AbiGMHH4 (ORCPT ); Wed, 13 Jul 2022 03:07:56 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:34814 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234608AbiGMHHi (ORCPT ); Wed, 13 Jul 2022 03:07:38 -0400 Received: from mail-pj1-x1035.google.com (mail-pj1-x1035.google.com [IPv6:2607:f8b0:4864:20::1035]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A8372BF552 for ; Wed, 13 Jul 2022 00:07:31 -0700 (PDT) Received: by mail-pj1-x1035.google.com with SMTP id o5-20020a17090a3d4500b001ef76490983so2121850pjf.2 for ; Wed, 13 Jul 2022 00:07:31 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=P/55yRVKMzcZl/sxLnf9XPYi5esP6NCSmAVTv603IGU=; b=drjTDyKdCjLycUT+Rt1sWIyeHzUOG/WJ4uut6Ul7s2rbwdf0MoFexnxUC5xBybIuWe Sb8dhzZ1Dv+sAqD/LqnrIbONlpnP2+o4yMtuDa3YkTnEIyVgk0zXdIH8etmSTXW8GUiG dIHTHgNcUBgxIxdSPs0DyiTyzEpTSALxN7lMeKVGaYt7dC81imfG5Imqy7QuVgiPUwYS N+t2aHoJzTwy29SKwA3Tb2mvzaM6+CnLmhQZiZIMhVvUMTKd14/H4AoAwqmyFC1xmSD8 LJGO83vPdCLPYLlVEY0EpotV6T41IttYy2pDcD/9oA09a4Sz8dR6fJpAHNH7f9PS4wcx ugug== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=P/55yRVKMzcZl/sxLnf9XPYi5esP6NCSmAVTv603IGU=; b=vblzXBa1UGDXkGZLK0Z74lsQrnucKYtutTv5Rukfa+5eG6KO9jxN3ETDdazc57ycR1 tvbVxZTY0yyOIAwOKuIRAR1m4KAUnMz7YAHF4aiRLJE7MRj1B3+x/d05mMGa1eOKJjzk rWIVDENXj95TA6z9QCgFx8d1isjhry4ldGfdO1TPmHYZbakAMOeV/mw1ygVVshT/B+sB 1Aq4L3h+ASn9ZcXLxJot49tss97VQSoLXnJ7pi/88dzm0Ap+z1FZALJpT/lo/rms9boA 6zgBykG1ZbPBUDcdMryJainG+sNiAc4vKBWzIPMPCTvFC6zxNhz0QAN89L7cs2A1oUt/ bO7Q== X-Gm-Message-State: AJIora/rigiGv9mWISmoUECdxvBGYhqj7e7C+FrrtIifqImjNE/+Kc3s K2hTr8jd0mfk7hsTsZDW9Gs= X-Google-Smtp-Source: AGRyM1vV/vP4cCrjQCnEbXlugcs1RH3z7KsgGgtG9MXcwKr9JUuIRd0zLPPvIAg97NdlTiTbUVQF+g== X-Received: by 2002:a17:902:ba91:b0:16c:6b8e:cd06 with SMTP id k17-20020a170902ba9100b0016c6b8ecd06mr1856753pls.33.1657696050936; Wed, 13 Jul 2022 00:07:30 -0700 (PDT) Received: from bobo.ozlabs.ibm.com (193-116-203-247.tpgi.com.au. [193.116.203.247]) by smtp.gmail.com with ESMTPSA id d11-20020a170902cecb00b0016bd5da20casm8099061plg.134.2022.07.13.00.07.27 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 13 Jul 2022 00:07:30 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , "linux-kernel @ vger . kernel . org" Subject: [PATCH v2 05/12] locking/qspinlock: be less clever with the preprocessor Date: Wed, 13 Jul 2022 17:06:57 +1000 Message-Id: <20220713070704.308394-6-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220713070704.308394-1-npiggin@gmail.com> References: <20220713070704.308394-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Stop qspinlock.c including itself and avoid most of the function renaming with the preprocessor. This is mostly done by having the common slowpath code take a 'bool paravirt' argument and adjusting code based on that. __always_inline ensures the paravirt and non-paravirt cases are kept separate and the compiler can constant-fold the 'paravirt' tests. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 116 ++++++++++++---------------- kernel/locking/qspinlock_paravirt.h | 10 +-- 2 files changed, 52 insertions(+), 74 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 95bf24d276c3..037bd5440cfd 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -11,8 +11,6 @@ * Peter Zijlstra */ =20 -#ifndef _GEN_PV_LOCK_SLOWPATH - #include #include #include @@ -287,35 +285,21 @@ static __always_inline void set_locked(struct qspinlo= ck *lock) WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); } =20 - -/* - * Generate the native code for queued_spin_unlock_slowpath(); provide NOP= s for - * all the PV callbacks. - */ - -static __always_inline void __pv_init_node(struct qnode *node) { } -static __always_inline void __pv_wait_node(struct qnode *node, - struct qnode *prev) { } -static __always_inline void __pv_kick_node(struct qspinlock *lock, - struct qnode *node) { } -static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, - struct qnode *node) - { return 0; } - -#define pv_enabled() false - -#define pv_init_node __pv_init_node -#define pv_wait_node __pv_wait_node -#define pv_kick_node __pv_kick_node -#define pv_wait_head_or_lock __pv_wait_head_or_lock - #ifdef CONFIG_PARAVIRT_SPINLOCKS -#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath -#endif - -#endif /* _GEN_PV_LOCK_SLOWPATH */ +#include "qspinlock_paravirt.h" +#else /* CONFIG_PARAVIRT_SPINLOCKS */ +static __always_inline void pv_init_node(struct qnode *node) { } +static __always_inline void pv_wait_node(struct qnode *node, + struct qnode *prev) { } +static __always_inline void pv_kick_node(struct qspinlock *lock, + struct qnode *node) { } +static __always_inline u32 pv_wait_head_or_lock(struct qspinlock *lock, + struct qnode *node) + { return 0; } +static __always_inline bool pv_hybrid_queued_unfair_trylock(struct qspinlo= ck *lock) { BUILD_BUG(); } +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ =20 -static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) +static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *l= ock, bool paravirt) { struct qnode *prev, *next, *node; u32 val, old, tail; @@ -340,8 +324,13 @@ static inline void queued_spin_lock_mcs_queue(struct q= spinlock *lock) */ if (unlikely(idx >=3D MAX_NODES)) { lockevent_inc(lock_no_node); - while (!queued_spin_trylock(lock)) - cpu_relax(); + if (paravirt) { + while (!pv_hybrid_queued_unfair_trylock(lock)) + cpu_relax(); + } else { + while (!queued_spin_trylock(lock)) + cpu_relax(); + } goto release; } =20 @@ -361,15 +350,21 @@ static inline void queued_spin_lock_mcs_queue(struct = qspinlock *lock) =20 node->locked =3D 0; node->next =3D NULL; - pv_init_node(node); + if (paravirt) + pv_init_node(node); =20 /* * We touched a (possibly) cold cacheline in the per-cpu queue node; * attempt the trylock once more in the hope someone let go while we * weren't watching. */ - if (queued_spin_trylock(lock)) - goto release; + if (paravirt) { + if (pv_hybrid_queued_unfair_trylock(lock)) + goto release; + } else { + if (queued_spin_trylock(lock)) + goto release; + } =20 /* * Ensure that the initialisation of @node is complete before we @@ -398,7 +393,8 @@ static inline void queued_spin_lock_mcs_queue(struct qs= pinlock *lock) /* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node); =20 - pv_wait_node(node, prev); + if (paravirt) + pv_wait_node(node, prev); /* Wait for mcs node lock to be released */ smp_cond_load_acquire(&node->locked, VAL); =20 @@ -434,8 +430,10 @@ static inline void queued_spin_lock_mcs_queue(struct q= spinlock *lock) * If PV isn't active, 0 will be returned instead. * */ - if ((val =3D pv_wait_head_or_lock(lock, node))) - goto locked; + if (paravirt) { + if ((val =3D pv_wait_head_or_lock(lock, node))) + goto locked; + } =20 val =3D atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MA= SK)); =20 @@ -480,7 +478,8 @@ static inline void queued_spin_lock_mcs_queue(struct qs= pinlock *lock) next =3D smp_cond_load_relaxed(&node->next, (VAL)); =20 smp_store_release(&next->locked, 1); /* unlock the mcs node lock */ - pv_kick_node(lock, next); + if (paravirt) + pv_kick_node(lock, next); =20 release: trace_contention_end(lock, 0); @@ -512,13 +511,12 @@ static inline void queued_spin_lock_mcs_queue(struct = qspinlock *lock) * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : * queue : ^--' : */ +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath +#endif + void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { - if (pv_enabled()) { - queued_spin_lock_mcs_queue(lock); - return; - } - if (virt_spin_lock(lock)) return; =20 @@ -592,31 +590,17 @@ void queued_spin_lock_slowpath(struct qspinlock *lock= , u32 val) */ queue: lockevent_inc(lock_slowpath); - queued_spin_lock_mcs_queue(lock); + queued_spin_lock_mcs_queue(lock, false); } EXPORT_SYMBOL(queued_spin_lock_slowpath); =20 -/* - * Generate the paravirt code for queued_spin_unlock_slowpath(). - */ -#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS) -#define _GEN_PV_LOCK_SLOWPATH - -#undef pv_enabled -#define pv_enabled() true - -#undef pv_init_node -#undef pv_wait_node -#undef pv_kick_node -#undef pv_wait_head_or_lock - -#define queued_spin_lock_mcs_queue __pv_queued_spin_lock_mcs_queue - -#undef queued_spin_lock_slowpath -#define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath - -#include "qspinlock_paravirt.h" -#include "qspinlock.c" +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#undef queued_spin_lock_slowpath +void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + queued_spin_lock_mcs_queue(lock, true); +} +EXPORT_SYMBOL(__pv_queued_spin_lock_slowpath); =20 bool nopvspin __initdata; static __init int parse_nopvspin(char *arg) @@ -625,4 +609,4 @@ static __init int parse_nopvspin(char *arg) return 0; } early_param("nopvspin", parse_nopvspin); -#endif +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock= _paravirt.h index 97385861adc2..f1922e3a0f7d 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -1,8 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _GEN_PV_LOCK_SLOWPATH -#error "do not include this file" -#endif - #include #include #include @@ -50,9 +46,8 @@ enum vcpu_state { /* * Hybrid PV queued/unfair lock * - * By replacing the regular queued_spin_trylock() with the function below, - * it will be called once when a lock waiter enter the PV slowpath before - * being queued. + * This function is called once when a lock waiter enters the PV slowpath + * before being queued. * * The pending bit is set by the queue head vCPU of the MCS wait queue in * pv_wait_head_or_lock() to signal that it is ready to spin on the lock. @@ -71,7 +66,6 @@ enum vcpu_state { * queued lock (no lock starvation) and an unfair lock (good performance * on not heavily contended locks). */ -#define queued_spin_trylock(l) pv_hybrid_queued_unfair_trylock(l) static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) { /* --=20 2.35.1 From nobody Sat Apr 18 14:08:10 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id EE8D5C433EF for ; Wed, 13 Jul 2022 07:08:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234591AbiGMHIG (ORCPT ); Wed, 13 Jul 2022 03:08:06 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:34998 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234636AbiGMHHo (ORCPT ); Wed, 13 Jul 2022 03:07:44 -0400 Received: from mail-pg1-x52e.google.com (mail-pg1-x52e.google.com [IPv6:2607:f8b0:4864:20::52e]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B4FD7E3071 for ; Wed, 13 Jul 2022 00:07:35 -0700 (PDT) Received: by mail-pg1-x52e.google.com with SMTP id e132so9676157pgc.5 for ; Wed, 13 Jul 2022 00:07:35 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=40ncgsOAwyc3rCPX0MCyCkrSp4LjXOzqrt5qmZficko=; b=HpxmwBGS3Nt71I3tzKp85f/MhFjS/L+EKFCZH2f7IBMra7ekXRZfXSAg9bjYdBhJWy rAF4dbWfOZmAZJDkeTp551N9hlRAMSPH8pF5xCJzsvcwBdCcGhfK6WhEK6KMXfLSyRDm T8Y33S+4NVDq9qOui0gfLPGnQ6bxRAOsGvzNh03/toGY1YFANRSe8QxvF8+QeX0FiMHA jnLrVygOZjL4MoSCnKKo3rikl838U4p1tgxbLMRmuvEbymE73Ge+E3XdaYktC9qFuaBJ KP+LvTmpDbbKwl7C2Aowc/UaQ55xSkBrDR/QPWhsT1aTmmq99Dj35qZyBet2C48Ots4H 4ijw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=40ncgsOAwyc3rCPX0MCyCkrSp4LjXOzqrt5qmZficko=; b=CndQSEvwO6G1ACCARPwN8RIPuw+Eg0iG56GhaqIF/ckK0KgBCEONyc0Tmfh7eoaCEk c+ssNwJbNAVUhe8Zgrota0I7TO9XMc2UmQrRR5pVJuFtMnvUX98vxtyKGe3jqK7s7Bi3 IxKPDKtCX7AfycVu1MwPLU/mrP2TJxRNjya+pXEWG94G5B6KVGxdqXFu4K3so57Fxkzk GUL2OVfpp52Ao1wVU9DMMCi9nzp7cvGkRAJxqqXWIGS9dLL1Lh/QsHh//VWbLE2xDm/1 Vyj2F+fKrHHftZdNaP+9m70XQhzi52VeOEyFfThBCfY6oHjRIiJDWF6jaYg0lau1eZVp 1KWw== X-Gm-Message-State: AJIora9Bc+UoxXuXDWZu5DyQkh+NcjcQZ/DfmSS4Z+2Lq6ztf//CqhXd O3fhIvA3JoXsJNf71+DS9oEIo2ha0OQ= X-Google-Smtp-Source: AGRyM1sThATjQCJocj+atU5BLGVrjfjQQYzOS6wKnOgBV2rMhB8uK0kJM7/2nEQZjwiiWHfrYi2aug== X-Received: by 2002:a65:6786:0:b0:415:c67a:49a9 with SMTP id e6-20020a656786000000b00415c67a49a9mr1776095pgr.395.1657696055012; Wed, 13 Jul 2022 00:07:35 -0700 (PDT) Received: from bobo.ozlabs.ibm.com (193-116-203-247.tpgi.com.au. [193.116.203.247]) by smtp.gmail.com with ESMTPSA id d11-20020a170902cecb00b0016bd5da20casm8099061plg.134.2022.07.13.00.07.31 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 13 Jul 2022 00:07:34 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , "linux-kernel @ vger . kernel . org" Subject: [PATCH v2 06/12] locking/qspinlock: merge qspinlock_paravirt.h into qspinlock.c Date: Wed, 13 Jul 2022 17:06:58 +1000 Message-Id: <20220713070704.308394-7-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220713070704.308394-1-npiggin@gmail.com> References: <20220713070704.308394-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" There isn't much reason to keep these separate. Signed-off-by: Nicholas Piggin Reported-by: kernel test robot --- kernel/locking/qspinlock.c | 488 ++++++++++++++++++++++++++- kernel/locking/qspinlock_paravirt.h | 490 ---------------------------- 2 files changed, 487 insertions(+), 491 deletions(-) delete mode 100644 kernel/locking/qspinlock_paravirt.h diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 037bd5440cfd..3b3663d15402 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -286,7 +287,492 @@ static __always_inline void set_locked(struct qspinlo= ck *lock) } =20 #ifdef CONFIG_PARAVIRT_SPINLOCKS -#include "qspinlock_paravirt.h" +/* + * Implement paravirt qspinlocks; the general idea is to halt the vcpus in= stead + * of spinning them. + * + * This relies on the architecture to provide two paravirt hypercalls: + * + * pv_wait(u8 *ptr, u8 val) -- suspends the vcpu if *ptr =3D=3D val + * pv_kick(cpu) -- wakes a suspended vcpu + * + * Using these we implement __pv_queued_spin_lock_slowpath() and + * __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath()= and + * native_queued_spin_unlock(). + */ + +#define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET) + +/* + * Queue Node Adaptive Spinning + * + * A queue node vCPU will stop spinning if the vCPU in the previous node is + * not running. The one lock stealing attempt allowed at slowpath entry + * mitigates the slight slowdown for non-overcommitted guest with this + * aggressive wait-early mechanism. + * + * The status of the previous node will be checked at fixed interval + * controlled by PV_PREV_CHECK_MASK. This is to ensure that we won't + * pound on the cacheline of the previous node too heavily. + */ +#define PV_PREV_CHECK_MASK 0xff + +/* + * Queue node uses: vcpu_running & vcpu_halted. + * Queue head uses: vcpu_running & vcpu_hashed. + */ +enum vcpu_state { + vcpu_running =3D 0, + vcpu_halted, /* Used only in pv_wait_node */ + vcpu_hashed, /* =3D pv_hash'ed + vcpu_halted */ +}; + +/* + * Hybrid PV queued/unfair lock + * + * This function is called once when a lock waiter enters the PV slowpath + * before being queued. + * + * The pending bit is set by the queue head vCPU of the MCS wait queue in + * pv_wait_head_or_lock() to signal that it is ready to spin on the lock. + * When that bit becomes visible to the incoming waiters, no lock stealing + * is allowed. The function will return immediately to make the waiters + * enter the MCS wait queue. So lock starvation shouldn't happen as long + * as the queued mode vCPUs are actively running to set the pending bit + * and hence disabling lock stealing. + * + * When the pending bit isn't set, the lock waiters will stay in the unfair + * mode spinning on the lock unless the MCS wait queue is empty. In this + * case, the lock waiters will enter the queued mode slowpath trying to + * become the queue head and set the pending bit. + * + * This hybrid PV queued/unfair lock combines the best attributes of a + * queued lock (no lock starvation) and an unfair lock (good performance + * on not heavily contended locks). + */ +static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) +{ + /* + * Stay in unfair lock mode as long as queued mode waiters are + * present in the MCS wait queue but the pending bit isn't set. + */ + for (;;) { + int val =3D atomic_read(&lock->val); + + if (!(val & _Q_LOCKED_PENDING_MASK) && + (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) =3D=3D 0)) { + lockevent_inc(pv_lock_stealing); + return true; + } + if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK)) + break; + + cpu_relax(); + } + + return false; +} + +/* + * Lock and MCS node addresses hash table for fast lookup + * + * Hashing is done on a per-cacheline basis to minimize the need to access + * more than one cacheline. + * + * Dynamically allocate a hash table big enough to hold at least 4X the + * number of possible cpus in the system. Allocation is done on page + * granularity. So the minimum number of hash buckets should be at least + * 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page. + * + * Since we should not be holding locks from NMI context (very rare indeed= ) the + * max load factor is 0.75, which is around the point where open addressing + * breaks down. + * + */ +struct pv_hash_entry { + struct qspinlock *lock; + struct qnode *node; +}; + +#define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry)) +#define PV_HE_MIN (PAGE_SIZE / sizeof(struct pv_hash_entry)) + +static struct pv_hash_entry *pv_lock_hash; +static unsigned int pv_lock_hash_bits __read_mostly; + +/* + * Allocate memory for the PV qspinlock hash buckets + * + * This function should be called from the paravirt spinlock initialization + * routine. + */ +void __init __pv_init_lock_hash(void) +{ + int pv_hash_size =3D ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE); + + if (pv_hash_size < PV_HE_MIN) + pv_hash_size =3D PV_HE_MIN; + + /* + * Allocate space from bootmem which should be page-size aligned + * and hence cacheline aligned. + */ + pv_lock_hash =3D alloc_large_system_hash("PV qspinlock", + sizeof(struct pv_hash_entry), + pv_hash_size, 0, + HASH_EARLY | HASH_ZERO, + &pv_lock_hash_bits, NULL, + pv_hash_size, pv_hash_size); +} + +#define for_each_hash_entry(he, offset, hash) \ + for (hash &=3D ~(PV_HE_PER_LINE - 1), he =3D &pv_lock_hash[hash], offset = =3D 0; \ + offset < (1 << pv_lock_hash_bits); \ + offset++, he =3D &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash= _bits) - 1)]) + +static struct qspinlock **pv_hash(struct qspinlock *lock, struct qnode *no= de) +{ + unsigned long offset, hash =3D hash_ptr(lock, pv_lock_hash_bits); + struct pv_hash_entry *he; + int hopcnt =3D 0; + + for_each_hash_entry(he, offset, hash) { + hopcnt++; + if (!cmpxchg(&he->lock, NULL, lock)) { + WRITE_ONCE(he->node, node); + lockevent_pv_hop(hopcnt); + return &he->lock; + } + } + /* + * Hard assume there is a free entry for us. + * + * This is guaranteed by ensuring every blocked lock only ever consumes + * a single entry, and since we only have 4 nesting levels per CPU + * and allocated 4*nr_possible_cpus(), this must be so. + * + * The single entry is guaranteed by having the lock owner unhash + * before it releases. + */ + BUG(); +} + +static struct qnode *pv_unhash(struct qspinlock *lock) +{ + unsigned long offset, hash =3D hash_ptr(lock, pv_lock_hash_bits); + struct pv_hash_entry *he; + struct qnode *node; + + for_each_hash_entry(he, offset, hash) { + if (READ_ONCE(he->lock) =3D=3D lock) { + node =3D READ_ONCE(he->node); + WRITE_ONCE(he->lock, NULL); + return node; + } + } + /* + * Hard assume we'll find an entry. + * + * This guarantees a limited lookup time and is itself guaranteed by + * having the lock owner do the unhash -- IFF the unlock sees the + * SLOW flag, there MUST be a hash entry. + */ + BUG(); +} + +/* + * Return true if when it is time to check the previous node which is not + * in a running state. + */ +static inline bool +pv_wait_early(struct qnode *prev, int loop) +{ + if ((loop & PV_PREV_CHECK_MASK) !=3D 0) + return false; + + return READ_ONCE(prev->state) !=3D vcpu_running; +} + +/* + * Initialize the PV part of the qnode. + */ +static void pv_init_node(struct qnode *node) +{ + node->cpu =3D smp_processor_id(); + node->state =3D vcpu_running; +} + +/* + * Wait for node->locked to become true, halt the vcpu after a short spin. + * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its + * behalf. + */ +static void pv_wait_node(struct qnode *node, struct qnode *prev) +{ + int loop; + bool wait_early; + + for (;;) { + for (wait_early =3D false, loop =3D SPIN_THRESHOLD; loop; loop--) { + if (READ_ONCE(node->locked)) + return; + if (pv_wait_early(prev, loop)) { + wait_early =3D true; + break; + } + cpu_relax(); + } + + /* + * Order node->state vs node->locked thusly: + * + * [S] node->state =3D vcpu_halted [S] next->locked =3D 1 + * MB MB + * [L] node->locked [RmW] node->state =3D vcpu_hashed + * + * Matches the cmpxchg() from pv_kick_node(). + */ + smp_store_mb(node->state, vcpu_halted); + + if (!READ_ONCE(node->locked)) { + lockevent_inc(pv_wait_node); + lockevent_cond_inc(pv_wait_early, wait_early); + pv_wait(&node->state, vcpu_halted); + } + + /* + * If pv_kick_node() changed us to vcpu_hashed, retain that + * value so that pv_wait_head_or_lock() knows to not also try + * to hash this lock. + */ + cmpxchg(&node->state, vcpu_halted, vcpu_running); + + /* + * If the locked flag is still not set after wakeup, it is a + * spurious wakeup and the vCPU should wait again. However, + * there is a pretty high overhead for CPU halting and kicking. + * So it is better to spin for a while in the hope that the + * MCS lock will be released soon. + */ + lockevent_cond_inc(pv_spurious_wakeup, + !READ_ONCE(node->locked)); + } + + /* + * By now our node->locked should be 1 and our caller will not actually + * spin-wait for it. We do however rely on our caller to do a + * load-acquire for us. + */ +} + +/* + * Called after setting next->locked =3D 1 when we're the lock owner. + * + * Instead of waking the waiters stuck in pv_wait_node() advance their sta= te + * such that they're waiting in pv_wait_head_or_lock(), this avoids a + * wake/sleep cycle. + */ +static void pv_kick_node(struct qspinlock *lock, struct qnode *node) +{ + /* + * If the vCPU is indeed halted, advance its state to match that of + * pv_wait_node(). If OTOH this fails, the vCPU was running and will + * observe its next->locked value and advance itself. + * + * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() + * + * The write to next->locked in arch_mcs_spin_unlock_contended() + * must be ordered before the read of node->state in the cmpxchg() + * below for the code to work correctly. To guarantee full ordering + * irrespective of the success or failure of the cmpxchg(), + * a relaxed version with explicit barrier is used. The control + * dependency will order the reading of node->state before any + * subsequent writes. + */ + smp_mb__before_atomic(); + if (cmpxchg_relaxed(&node->state, vcpu_halted, vcpu_hashed) + !=3D vcpu_halted) + return; + + /* + * Put the lock into the hash table and set the _Q_SLOW_VAL. + * + * As this is the same vCPU that will check the _Q_SLOW_VAL value and + * the hash table later on at unlock time, no atomic instruction is + * needed. + */ + WRITE_ONCE(lock->locked, _Q_SLOW_VAL); + (void)pv_hash(lock, node); +} + +/* + * Wait for l->locked to become clear and acquire the lock; + * halt the vcpu after a short spin. + * __pv_queued_spin_unlock() will wake us. + * + * The current value of the lock will be returned for additional processin= g. + */ +static u32 +pv_wait_head_or_lock(struct qspinlock *lock, struct qnode *node) +{ + struct qspinlock **lp =3D NULL; + int waitcnt =3D 0; + int loop; + + /* + * If pv_kick_node() already advanced our state, we don't need to + * insert ourselves into the hash table anymore. + */ + if (READ_ONCE(node->state) =3D=3D vcpu_hashed) + lp =3D (struct qspinlock **)1; + + /* + * Tracking # of slowpath locking operations + */ + lockevent_inc(lock_slowpath); + + for (;; waitcnt++) { + /* + * Set correct vCPU state to be used by queue node wait-early + * mechanism. + */ + WRITE_ONCE(node->state, vcpu_running); + + /* + * Set the pending bit in the active lock spinning loop to + * disable lock stealing before attempting to acquire the lock. + */ + set_pending(lock); + for (loop =3D SPIN_THRESHOLD; loop; loop--) { + if (trylock_clear_pending(lock)) + goto gotlock; + cpu_relax(); + } + clear_pending(lock); + + + if (!lp) { /* ONCE */ + lp =3D pv_hash(lock, node); + + /* + * We must hash before setting _Q_SLOW_VAL, such that + * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock() + * we'll be sure to be able to observe our hash entry. + * + * [S] [Rmw] l->locked =3D=3D _Q_SLOW_VAL + * MB RMB + * [RmW] l->locked =3D _Q_SLOW_VAL [L] + * + * Matches the smp_rmb() in __pv_queued_spin_unlock(). + */ + if (xchg(&lock->locked, _Q_SLOW_VAL) =3D=3D 0) { + /* + * The lock was free and now we own the lock. + * Change the lock value back to _Q_LOCKED_VAL + * and unhash the table. + */ + WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); + WRITE_ONCE(*lp, NULL); + goto gotlock; + } + } + WRITE_ONCE(node->state, vcpu_hashed); + lockevent_inc(pv_wait_head); + lockevent_cond_inc(pv_wait_again, waitcnt); + pv_wait(&lock->locked, _Q_SLOW_VAL); + + /* + * Because of lock stealing, the queue head vCPU may not be + * able to acquire the lock before it has to wait again. + */ + } + + /* + * The cmpxchg() or xchg() call before coming here provides the + * acquire semantics for locking. The dummy ORing of _Q_LOCKED_VAL + * here is to indicate to the compiler that the value will always + * be nozero to enable better code optimization. + */ +gotlock: + return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL); +} + +/* + * PV versions of the unlock fastpath and slowpath functions to be used + * instead of queued_spin_unlock(). + */ +__visible void +__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) +{ + struct qnode *node; + + if (unlikely(locked !=3D _Q_SLOW_VAL)) { + WARN(!debug_locks_silent, + "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n", + (unsigned long)lock, atomic_read(&lock->val)); + return; + } + + /* + * A failed cmpxchg doesn't provide any memory-ordering guarantees, + * so we need a barrier to order the read of the node data in + * pv_unhash *after* we've read the lock being _Q_SLOW_VAL. + * + * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL. + */ + smp_rmb(); + + /* + * Since the above failed to release, this must be the SLOW path. + * Therefore start by looking up the blocked node and unhashing it. + */ + node =3D pv_unhash(lock); + + /* + * Now that we have a reference to the (likely) blocked qnode, + * release the lock. + */ + smp_store_release(&lock->locked, 0); + + /* + * At this point the memory pointed at by lock can be freed/reused, + * however we can still use the qnode to kick the CPU. + * The other vCPU may not really be halted, but kicking an active + * vCPU is harmless other than the additional latency in completing + * the unlock. + */ + lockevent_inc(pv_kick_unlock); + pv_kick(node->cpu); +} + +/* + * Include the architecture specific callee-save thunk of the + * __pv_queued_spin_unlock(). This thunk is put together with + * __pv_queued_spin_unlock() to make the callee-save thunk and the real un= lock + * function close to each other sharing consecutive instruction cachelines. + * Alternatively, architecture specific version of __pv_queued_spin_unlock= () + * can be defined. + */ +#include + +#ifndef __pv_queued_spin_unlock +__visible void __pv_queued_spin_unlock(struct qspinlock *lock) +{ + u8 locked; + + /* + * We must not unlock if SLOW, because in that case we must first + * unhash. Otherwise it would be possible to have multiple @lock + * entries, which would be BAD. + */ + locked =3D cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0); + if (likely(locked =3D=3D _Q_LOCKED_VAL)) + return; + + __pv_queued_spin_unlock_slowpath(lock, locked); +} +#endif + #else /* CONFIG_PARAVIRT_SPINLOCKS */ static __always_inline void pv_init_node(struct qnode *node) { } static __always_inline void pv_wait_node(struct qnode *node, diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock= _paravirt.h deleted file mode 100644 index f1922e3a0f7d..000000000000 --- a/kernel/locking/qspinlock_paravirt.h +++ /dev/null @@ -1,490 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include - -/* - * Implement paravirt qspinlocks; the general idea is to halt the vcpus in= stead - * of spinning them. - * - * This relies on the architecture to provide two paravirt hypercalls: - * - * pv_wait(u8 *ptr, u8 val) -- suspends the vcpu if *ptr =3D=3D val - * pv_kick(cpu) -- wakes a suspended vcpu - * - * Using these we implement __pv_queued_spin_lock_slowpath() and - * __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath()= and - * native_queued_spin_unlock(). - */ - -#define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET) - -/* - * Queue Node Adaptive Spinning - * - * A queue node vCPU will stop spinning if the vCPU in the previous node is - * not running. The one lock stealing attempt allowed at slowpath entry - * mitigates the slight slowdown for non-overcommitted guest with this - * aggressive wait-early mechanism. - * - * The status of the previous node will be checked at fixed interval - * controlled by PV_PREV_CHECK_MASK. This is to ensure that we won't - * pound on the cacheline of the previous node too heavily. - */ -#define PV_PREV_CHECK_MASK 0xff - -/* - * Queue node uses: vcpu_running & vcpu_halted. - * Queue head uses: vcpu_running & vcpu_hashed. - */ -enum vcpu_state { - vcpu_running =3D 0, - vcpu_halted, /* Used only in pv_wait_node */ - vcpu_hashed, /* =3D pv_hash'ed + vcpu_halted */ -}; - -/* - * Hybrid PV queued/unfair lock - * - * This function is called once when a lock waiter enters the PV slowpath - * before being queued. - * - * The pending bit is set by the queue head vCPU of the MCS wait queue in - * pv_wait_head_or_lock() to signal that it is ready to spin on the lock. - * When that bit becomes visible to the incoming waiters, no lock stealing - * is allowed. The function will return immediately to make the waiters - * enter the MCS wait queue. So lock starvation shouldn't happen as long - * as the queued mode vCPUs are actively running to set the pending bit - * and hence disabling lock stealing. - * - * When the pending bit isn't set, the lock waiters will stay in the unfair - * mode spinning on the lock unless the MCS wait queue is empty. In this - * case, the lock waiters will enter the queued mode slowpath trying to - * become the queue head and set the pending bit. - * - * This hybrid PV queued/unfair lock combines the best attributes of a - * queued lock (no lock starvation) and an unfair lock (good performance - * on not heavily contended locks). - */ -static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) -{ - /* - * Stay in unfair lock mode as long as queued mode waiters are - * present in the MCS wait queue but the pending bit isn't set. - */ - for (;;) { - int val =3D atomic_read(&lock->val); - - if (!(val & _Q_LOCKED_PENDING_MASK) && - (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) =3D=3D 0)) { - lockevent_inc(pv_lock_stealing); - return true; - } - if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK)) - break; - - cpu_relax(); - } - - return false; -} - -/* - * Lock and MCS node addresses hash table for fast lookup - * - * Hashing is done on a per-cacheline basis to minimize the need to access - * more than one cacheline. - * - * Dynamically allocate a hash table big enough to hold at least 4X the - * number of possible cpus in the system. Allocation is done on page - * granularity. So the minimum number of hash buckets should be at least - * 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page. - * - * Since we should not be holding locks from NMI context (very rare indeed= ) the - * max load factor is 0.75, which is around the point where open addressing - * breaks down. - * - */ -struct pv_hash_entry { - struct qspinlock *lock; - struct qnode *node; -}; - -#define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry)) -#define PV_HE_MIN (PAGE_SIZE / sizeof(struct pv_hash_entry)) - -static struct pv_hash_entry *pv_lock_hash; -static unsigned int pv_lock_hash_bits __read_mostly; - -/* - * Allocate memory for the PV qspinlock hash buckets - * - * This function should be called from the paravirt spinlock initialization - * routine. - */ -void __init __pv_init_lock_hash(void) -{ - int pv_hash_size =3D ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE); - - if (pv_hash_size < PV_HE_MIN) - pv_hash_size =3D PV_HE_MIN; - - /* - * Allocate space from bootmem which should be page-size aligned - * and hence cacheline aligned. - */ - pv_lock_hash =3D alloc_large_system_hash("PV qspinlock", - sizeof(struct pv_hash_entry), - pv_hash_size, 0, - HASH_EARLY | HASH_ZERO, - &pv_lock_hash_bits, NULL, - pv_hash_size, pv_hash_size); -} - -#define for_each_hash_entry(he, offset, hash) \ - for (hash &=3D ~(PV_HE_PER_LINE - 1), he =3D &pv_lock_hash[hash], offset = =3D 0; \ - offset < (1 << pv_lock_hash_bits); \ - offset++, he =3D &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash= _bits) - 1)]) - -static struct qspinlock **pv_hash(struct qspinlock *lock, struct qnode *no= de) -{ - unsigned long offset, hash =3D hash_ptr(lock, pv_lock_hash_bits); - struct pv_hash_entry *he; - int hopcnt =3D 0; - - for_each_hash_entry(he, offset, hash) { - hopcnt++; - if (!cmpxchg(&he->lock, NULL, lock)) { - WRITE_ONCE(he->node, node); - lockevent_pv_hop(hopcnt); - return &he->lock; - } - } - /* - * Hard assume there is a free entry for us. - * - * This is guaranteed by ensuring every blocked lock only ever consumes - * a single entry, and since we only have 4 nesting levels per CPU - * and allocated 4*nr_possible_cpus(), this must be so. - * - * The single entry is guaranteed by having the lock owner unhash - * before it releases. - */ - BUG(); -} - -static struct qnode *pv_unhash(struct qspinlock *lock) -{ - unsigned long offset, hash =3D hash_ptr(lock, pv_lock_hash_bits); - struct pv_hash_entry *he; - struct qnode *node; - - for_each_hash_entry(he, offset, hash) { - if (READ_ONCE(he->lock) =3D=3D lock) { - node =3D READ_ONCE(he->node); - WRITE_ONCE(he->lock, NULL); - return node; - } - } - /* - * Hard assume we'll find an entry. - * - * This guarantees a limited lookup time and is itself guaranteed by - * having the lock owner do the unhash -- IFF the unlock sees the - * SLOW flag, there MUST be a hash entry. - */ - BUG(); -} - -/* - * Return true if when it is time to check the previous node which is not - * in a running state. - */ -static inline bool -pv_wait_early(struct qnode *prev, int loop) -{ - if ((loop & PV_PREV_CHECK_MASK) !=3D 0) - return false; - - return READ_ONCE(prev->state) !=3D vcpu_running; -} - -/* - * Initialize the PV part of the qnode. - */ -static void pv_init_node(struct qnode *node) -{ - node->cpu =3D smp_processor_id(); - node->state =3D vcpu_running; -} - -/* - * Wait for node->locked to become true, halt the vcpu after a short spin. - * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its - * behalf. - */ -static void pv_wait_node(struct qnode *node, struct qnode *prev) -{ - int loop; - bool wait_early; - - for (;;) { - for (wait_early =3D false, loop =3D SPIN_THRESHOLD; loop; loop--) { - if (READ_ONCE(node->locked)) - return; - if (pv_wait_early(prev, loop)) { - wait_early =3D true; - break; - } - cpu_relax(); - } - - /* - * Order node->state vs node->locked thusly: - * - * [S] node->state =3D vcpu_halted [S] next->locked =3D 1 - * MB MB - * [L] node->locked [RmW] node->state =3D vcpu_hashed - * - * Matches the cmpxchg() from pv_kick_node(). - */ - smp_store_mb(node->state, vcpu_halted); - - if (!READ_ONCE(node->locked)) { - lockevent_inc(pv_wait_node); - lockevent_cond_inc(pv_wait_early, wait_early); - pv_wait(&node->state, vcpu_halted); - } - - /* - * If pv_kick_node() changed us to vcpu_hashed, retain that - * value so that pv_wait_head_or_lock() knows to not also try - * to hash this lock. - */ - cmpxchg(&node->state, vcpu_halted, vcpu_running); - - /* - * If the locked flag is still not set after wakeup, it is a - * spurious wakeup and the vCPU should wait again. However, - * there is a pretty high overhead for CPU halting and kicking. - * So it is better to spin for a while in the hope that the - * MCS lock will be released soon. - */ - lockevent_cond_inc(pv_spurious_wakeup, - !READ_ONCE(node->locked)); - } - - /* - * By now our node->locked should be 1 and our caller will not actually - * spin-wait for it. We do however rely on our caller to do a - * load-acquire for us. - */ -} - -/* - * Called after setting next->locked =3D 1 when we're the lock owner. - * - * Instead of waking the waiters stuck in pv_wait_node() advance their sta= te - * such that they're waiting in pv_wait_head_or_lock(), this avoids a - * wake/sleep cycle. - */ -static void pv_kick_node(struct qspinlock *lock, struct qnode *node) -{ - /* - * If the vCPU is indeed halted, advance its state to match that of - * pv_wait_node(). If OTOH this fails, the vCPU was running and will - * observe its next->locked value and advance itself. - * - * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() - * - * The write to next->locked in arch_mcs_spin_unlock_contended() - * must be ordered before the read of node->state in the cmpxchg() - * below for the code to work correctly. To guarantee full ordering - * irrespective of the success or failure of the cmpxchg(), - * a relaxed version with explicit barrier is used. The control - * dependency will order the reading of node->state before any - * subsequent writes. - */ - smp_mb__before_atomic(); - if (cmpxchg_relaxed(&node->state, vcpu_halted, vcpu_hashed) - !=3D vcpu_halted) - return; - - /* - * Put the lock into the hash table and set the _Q_SLOW_VAL. - * - * As this is the same vCPU that will check the _Q_SLOW_VAL value and - * the hash table later on at unlock time, no atomic instruction is - * needed. - */ - WRITE_ONCE(lock->locked, _Q_SLOW_VAL); - (void)pv_hash(lock, node); -} - -/* - * Wait for l->locked to become clear and acquire the lock; - * halt the vcpu after a short spin. - * __pv_queued_spin_unlock() will wake us. - * - * The current value of the lock will be returned for additional processin= g. - */ -static u32 -pv_wait_head_or_lock(struct qspinlock *lock, struct qnode *node) -{ - struct qspinlock **lp =3D NULL; - int waitcnt =3D 0; - int loop; - - /* - * If pv_kick_node() already advanced our state, we don't need to - * insert ourselves into the hash table anymore. - */ - if (READ_ONCE(node->state) =3D=3D vcpu_hashed) - lp =3D (struct qspinlock **)1; - - /* - * Tracking # of slowpath locking operations - */ - lockevent_inc(lock_slowpath); - - for (;; waitcnt++) { - /* - * Set correct vCPU state to be used by queue node wait-early - * mechanism. - */ - WRITE_ONCE(node->state, vcpu_running); - - /* - * Set the pending bit in the active lock spinning loop to - * disable lock stealing before attempting to acquire the lock. - */ - set_pending(lock); - for (loop =3D SPIN_THRESHOLD; loop; loop--) { - if (trylock_clear_pending(lock)) - goto gotlock; - cpu_relax(); - } - clear_pending(lock); - - - if (!lp) { /* ONCE */ - lp =3D pv_hash(lock, node); - - /* - * We must hash before setting _Q_SLOW_VAL, such that - * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock() - * we'll be sure to be able to observe our hash entry. - * - * [S] [Rmw] l->locked =3D=3D _Q_SLOW_VAL - * MB RMB - * [RmW] l->locked =3D _Q_SLOW_VAL [L] - * - * Matches the smp_rmb() in __pv_queued_spin_unlock(). - */ - if (xchg(&lock->locked, _Q_SLOW_VAL) =3D=3D 0) { - /* - * The lock was free and now we own the lock. - * Change the lock value back to _Q_LOCKED_VAL - * and unhash the table. - */ - WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); - WRITE_ONCE(*lp, NULL); - goto gotlock; - } - } - WRITE_ONCE(node->state, vcpu_hashed); - lockevent_inc(pv_wait_head); - lockevent_cond_inc(pv_wait_again, waitcnt); - pv_wait(&lock->locked, _Q_SLOW_VAL); - - /* - * Because of lock stealing, the queue head vCPU may not be - * able to acquire the lock before it has to wait again. - */ - } - - /* - * The cmpxchg() or xchg() call before coming here provides the - * acquire semantics for locking. The dummy ORing of _Q_LOCKED_VAL - * here is to indicate to the compiler that the value will always - * be nozero to enable better code optimization. - */ -gotlock: - return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL); -} - -/* - * PV versions of the unlock fastpath and slowpath functions to be used - * instead of queued_spin_unlock(). - */ -__visible void -__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) -{ - struct qnode *node; - - if (unlikely(locked !=3D _Q_SLOW_VAL)) { - WARN(!debug_locks_silent, - "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n", - (unsigned long)lock, atomic_read(&lock->val)); - return; - } - - /* - * A failed cmpxchg doesn't provide any memory-ordering guarantees, - * so we need a barrier to order the read of the node data in - * pv_unhash *after* we've read the lock being _Q_SLOW_VAL. - * - * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL. - */ - smp_rmb(); - - /* - * Since the above failed to release, this must be the SLOW path. - * Therefore start by looking up the blocked node and unhashing it. - */ - node =3D pv_unhash(lock); - - /* - * Now that we have a reference to the (likely) blocked qnode, - * release the lock. - */ - smp_store_release(&lock->locked, 0); - - /* - * At this point the memory pointed at by lock can be freed/reused, - * however we can still use the qnode to kick the CPU. - * The other vCPU may not really be halted, but kicking an active - * vCPU is harmless other than the additional latency in completing - * the unlock. - */ - lockevent_inc(pv_kick_unlock); - pv_kick(node->cpu); -} - -/* - * Include the architecture specific callee-save thunk of the - * __pv_queued_spin_unlock(). This thunk is put together with - * __pv_queued_spin_unlock() to make the callee-save thunk and the real un= lock - * function close to each other sharing consecutive instruction cachelines. - * Alternatively, architecture specific version of __pv_queued_spin_unlock= () - * can be defined. - */ -#include - -#ifndef __pv_queued_spin_unlock -__visible void __pv_queued_spin_unlock(struct qspinlock *lock) -{ - u8 locked; - - /* - * We must not unlock if SLOW, because in that case we must first - * unhash. Otherwise it would be possible to have multiple @lock - * entries, which would be BAD. - */ - locked =3D cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0); - if (likely(locked =3D=3D _Q_LOCKED_VAL)) - return; - - __pv_queued_spin_unlock_slowpath(lock, locked); -} -#endif /* __pv_queued_spin_unlock */ --=20 2.35.1 From nobody Sat Apr 18 14:08:10 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8D11CC43334 for ; Wed, 13 Jul 2022 07:08:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234598AbiGMHIJ (ORCPT ); Wed, 13 Jul 2022 03:08:09 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:35268 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234639AbiGMHHo (ORCPT ); Wed, 13 Jul 2022 03:07:44 -0400 Received: from mail-pj1-x1029.google.com (mail-pj1-x1029.google.com [IPv6:2607:f8b0:4864:20::1029]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9AD88E2A1E for ; Wed, 13 Jul 2022 00:07:39 -0700 (PDT) Received: by mail-pj1-x1029.google.com with SMTP id fz10so10780921pjb.2 for ; Wed, 13 Jul 2022 00:07:39 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=905v9R4P6Da5F35eFheoGycTQtObbagfCZ0iYpTk9DM=; b=O5wPRgxh6AUMXh5KXFNWpXzVEohOrNZN1hpsbQ/d9isZFDvkrJBR14fMJYpirIxNBJ FXcPS84eAHjysEkcEMsM70Gj3ADdxN50J1uxKl/t9N5m+wO4XOlFw6O3Asj6tLtOwRkN a5wlPcLEcRZmM0tCDyZnFGaNk5kZ7e3App6DY3Z2oWbSZ4I8RSVBrbqX/bc3McqOHsCA n4BXNhyVJ0yrAMKNRkRbtUdkBu3/QnUeWDL1GyxUI6pwNXVOljakkbwSREomxWk17h1R ULY6pc6iljbzv1k3kkf5Rrh0whVMI4vnmP7peHI8oJg5/Vq7ullVez6BS1sIU52ciIKr hSVA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=905v9R4P6Da5F35eFheoGycTQtObbagfCZ0iYpTk9DM=; b=NItSOGCJG7k9XF2fI1oteazKOV9M0eQpPOR7YL5jPzXsqiGrIPOeR+rbT67PXbktI+ VCzwLHWH19pIeE8IhOB0+NBz0ER0Tv4enww762++mpoIA8oBj4470wlJRf/MBuvSGD/6 AJb9i6tAbaFaCxieeMh0/JkIjx8s/ynT9TguaW2lej1I8sfnzfaLFPxhlCt3cSVlKqgB OZPUydhA0vZG7YlchbPaZm+ou0ATBZ/pV2OWFvTbm3y/XddBnOOMTGprB7xrRUv0AlZ/ RqVsTuRETH07dss2oAV5xInB3SnYJCNsliNoZoEm1SKYXjxT1ME5CsRide4eXH61KyoB peWg== X-Gm-Message-State: AJIora/ZT1iQrImaxxYzmq9BC9gi1jW0VFrQeKdUKFUFkIaLz3NkMicD EC/uhXgfn1vcnbe+99jcA6I= X-Google-Smtp-Source: AGRyM1seMTDgf+6u/eAsASyNugn+q6giry6nouDYpAVkgpwOD2kiI5YYhma8MdWeBzAGpN7KQwuitg== X-Received: by 2002:a17:902:f544:b0:16c:5119:d4c2 with SMTP id h4-20020a170902f54400b0016c5119d4c2mr1806894plf.1.1657696058723; Wed, 13 Jul 2022 00:07:38 -0700 (PDT) Received: from bobo.ozlabs.ibm.com (193-116-203-247.tpgi.com.au. [193.116.203.247]) by smtp.gmail.com with ESMTPSA id d11-20020a170902cecb00b0016bd5da20casm8099061plg.134.2022.07.13.00.07.35 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 13 Jul 2022 00:07:38 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , "linux-kernel @ vger . kernel . org" Subject: [PATCH v2 07/12] locking/qspinlock: remove arch qspinlock_paravirt.h includes Date: Wed, 13 Jul 2022 17:06:59 +1000 Message-Id: <20220713070704.308394-8-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220713070704.308394-1-npiggin@gmail.com> References: <20220713070704.308394-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Signed-off-by: Nicholas Piggin Reported-by: kernel test robot --- arch/powerpc/include/asm/qspinlock_paravirt.h | 7 -- arch/x86/include/asm/qspinlock.h | 4 ++ arch/x86/include/asm/qspinlock_paravirt.h | 72 ------------------- arch/x86/kernel/paravirt-spinlocks.c | 71 ++++++++++++++++++ kernel/locking/qspinlock.c | 11 +-- 5 files changed, 76 insertions(+), 89 deletions(-) delete mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h delete mode 100644 arch/x86/include/asm/qspinlock_paravirt.h diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/i= nclude/asm/qspinlock_paravirt.h deleted file mode 100644 index 6b60e7736a47..000000000000 --- a/arch/powerpc/include/asm/qspinlock_paravirt.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H -#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H - -EXPORT_SYMBOL(__pv_queued_spin_unlock); - -#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */ diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinl= ock.h index d87451df480b..7f914fe7bc30 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -34,6 +34,10 @@ extern void __pv_queued_spin_lock_slowpath(struct qspinl= ock *lock, u32 val); extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lo= ck); extern bool nopvspin; =20 +#ifdef CONFIG_64BIT +#define __pv_queued_spin_unlock __pv_queued_spin_unlock +#endif + #define queued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/a= sm/qspinlock_paravirt.h deleted file mode 100644 index 892fd8c3a6f7..000000000000 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_QSPINLOCK_PARAVIRT_H -#define __ASM_QSPINLOCK_PARAVIRT_H - -#include - -/* - * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit - * registers. For i386, however, only 1 32-bit register needs to be saved - * and restored. So an optimized version of __pv_queued_spin_unlock() is - * hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit. - */ -#ifdef CONFIG_64BIT - -PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); -#define __pv_queued_spin_unlock __pv_queued_spin_unlock -#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" -#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slow= path" - -/* - * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock - * which combines the registers saving trunk and the body of the following - * C code: - * - * void __pv_queued_spin_unlock(struct qspinlock *lock) - * { - * u8 lockval =3D cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); - * - * if (likely(lockval =3D=3D _Q_LOCKED_VAL)) - * return; - * pv_queued_spin_unlock_slowpath(lock, lockval); - * } - * - * For x86-64, - * rdi =3D lock (first argument) - * rsi =3D lockval (second argument) - * rdx =3D internal variable (set to 0) - */ -asm (".pushsection .text;" - ".globl " PV_UNLOCK ";" - ".type " PV_UNLOCK ", @function;" - ".align 4,0x90;" - PV_UNLOCK ": " - ASM_ENDBR - FRAME_BEGIN - "push %rdx;" - "mov $0x1,%eax;" - "xor %edx,%edx;" - LOCK_PREFIX "cmpxchg %dl,(%rdi);" - "cmp $0x1,%al;" - "jne .slowpath;" - "pop %rdx;" - FRAME_END - ASM_RET - ".slowpath: " - "push %rsi;" - "movzbl %al,%esi;" - "call " PV_UNLOCK_SLOWPATH ";" - "pop %rsi;" - "pop %rdx;" - FRAME_END - ASM_RET - ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" - ".popsection"); - -#else /* CONFIG_64BIT */ - -extern void __pv_queued_spin_unlock(struct qspinlock *lock); -PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock); - -#endif /* CONFIG_64BIT */ -#endif diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravir= t-spinlocks.c index 9e1ea99ad9df..c6a107dfe20d 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -7,6 +7,7 @@ #include #include =20 +#include #include =20 __visible void __native_queued_spin_unlock(struct qspinlock *lock) @@ -15,6 +16,76 @@ __visible void __native_queued_spin_unlock(struct qspinl= ock *lock) } PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock); =20 +#ifdef CONFIG_PARAVIRT_SPINLOCKS +/* + * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit + * registers. For i386, however, only 1 32-bit register needs to be saved + * and restored. So an optimized version of __pv_queued_spin_unlock() is + * hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit. + */ +#ifdef CONFIG_64BIT + +__visible void +__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked); + +PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); +#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" +#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slow= path" + +/* + * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock + * which combines the registers saving trunk and the body of the following + * C code: + * + * void __pv_queued_spin_unlock(struct qspinlock *lock) + * { + * u8 lockval =3D cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); + * + * if (likely(lockval =3D=3D _Q_LOCKED_VAL)) + * return; + * pv_queued_spin_unlock_slowpath(lock, lockval); + * } + * + * For x86-64, + * rdi =3D lock (first argument) + * rsi =3D lockval (second argument) + * rdx =3D internal variable (set to 0) + */ +asm (".pushsection .text;" + ".globl " PV_UNLOCK ";" + ".type " PV_UNLOCK ", @function;" + ".align 4,0x90;" + PV_UNLOCK ": " + ASM_ENDBR + FRAME_BEGIN + "push %rdx;" + "mov $0x1,%eax;" + "xor %edx,%edx;" + LOCK_PREFIX "cmpxchg %dl,(%rdi);" + "cmp $0x1,%al;" + "jne .slowpath;" + "pop %rdx;" + FRAME_END + ASM_RET + ".slowpath: " + "push %rsi;" + "movzbl %al,%esi;" + "call " PV_UNLOCK_SLOWPATH ";" + "pop %rsi;" + "pop %rdx;" + FRAME_END + ASM_RET + ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" + ".popsection"); + +#else /* CONFIG_64BIT */ + +extern void __pv_queued_spin_unlock(struct qspinlock *lock); +PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock); + +#endif /* CONFIG_64BIT */ +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + bool pv_is_native_spin_unlock(void) { return pv_ops.lock.queued_spin_unlock.func =3D=3D diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 3b3663d15402..c4f223a03345 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -745,16 +745,6 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *loc= k, u8 locked) pv_kick(node->cpu); } =20 -/* - * Include the architecture specific callee-save thunk of the - * __pv_queued_spin_unlock(). This thunk is put together with - * __pv_queued_spin_unlock() to make the callee-save thunk and the real un= lock - * function close to each other sharing consecutive instruction cachelines. - * Alternatively, architecture specific version of __pv_queued_spin_unlock= () - * can be defined. - */ -#include - #ifndef __pv_queued_spin_unlock __visible void __pv_queued_spin_unlock(struct qspinlock *lock) { @@ -771,6 +761,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock= *lock) =20 __pv_queued_spin_unlock_slowpath(lock, locked); } +EXPORT_SYMBOL(__pv_queued_spin_unlock); #endif =20 #else /* CONFIG_PARAVIRT_SPINLOCKS */ --=20 2.35.1 From nobody Sat Apr 18 14:08:10 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 448C5C43334 for ; Wed, 13 Jul 2022 07:08:22 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234721AbiGMHIU (ORCPT ); Wed, 13 Jul 2022 03:08:20 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:34814 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234659AbiGMHHx (ORCPT ); Wed, 13 Jul 2022 03:07:53 -0400 Received: from mail-pg1-x52e.google.com (mail-pg1-x52e.google.com [IPv6:2607:f8b0:4864:20::52e]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2D8C7E2A3C for ; Wed, 13 Jul 2022 00:07:43 -0700 (PDT) Received: by mail-pg1-x52e.google.com with SMTP id f11so8797615pgj.7 for ; Wed, 13 Jul 2022 00:07:43 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=z/StboqhuAVzEvcv+05Dw1pybR79zS+xo0AFlQGPzP8=; b=TH0MUr1aNtyAMw5tSlehWZRh+7Dwpmv7dYTvzkB6Og/JgZMsPhhuKnPSpAmpfSnxOQ xEmwoGJKJ5ooIFWRrlnNUbLyP7cktvAbUEAV2zqvNf+naGGFD7iJiHbEaqHYXZywYVtr znxF35xwlP8rpGiHjZabGnf+E7vFcBU+0xejOO97EVaBAptdH4G5fM/PA547dXji+DMU 2sDRmJx+G/7sSuwcFDLD1/Q1LLP8ZsS74kjq+UINqiTy81HAWVHsvQxs/aAqmYyR/2AX fqDgIbf1rd1UgUnktVRfEDViffKg/FrXYlDCwCjPjs2UGkHjE1oZWwYkXm1Crmm2DxKC 8jIA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=z/StboqhuAVzEvcv+05Dw1pybR79zS+xo0AFlQGPzP8=; b=2XhPa7qM320e7zWm+tvfaasc8TseNY+oqRd1Hi2vhsHMfY7pQaO4DhijYTdfhiHHNE FTNfI0DxgGsfb3AEIgRsWYWgPs2x9RmU3BRJBDYTR6Fdn/PNTK7DXtDWIyCPVbazVadm f225VtB5xh89bEJYcvm4wfylisyfhaHAbJsLAgNOWFN4AEcC8DSbwXTsDhwR02+JN0rg nEFgOaAWXBYJSq1yWZW61lMY0j9qU7yx7sGQ5jMNtRrkmC7jXmmRDNua7ue5a5WqOVpt nH68nSj7YIJctBH5vP63ijUhDkbhQyMSAnyYISRvSmuljHRlmOBMJqCmHGHl2pOJDkCi MwsQ== X-Gm-Message-State: AJIora9y8QcFfAk+kP2Jn606ucoR2y2wvA8mk/RXS5dGL8qE7eOb4H0X sGiIrCRgGxutW4qROLdfIS0= X-Google-Smtp-Source: AGRyM1sfD8hVaoeySN4D+3rQDwl261LpD8XlDUAAPXJ4cQBBc6eg8djSe9XjO0svThhDniwPqOxaJA== X-Received: by 2002:a62:79d7:0:b0:52a:b557:2796 with SMTP id u206-20020a6279d7000000b0052ab5572796mr1969465pfc.34.1657696062459; Wed, 13 Jul 2022 00:07:42 -0700 (PDT) Received: from bobo.ozlabs.ibm.com (193-116-203-247.tpgi.com.au. [193.116.203.247]) by smtp.gmail.com with ESMTPSA id d11-20020a170902cecb00b0016bd5da20casm8099061plg.134.2022.07.13.00.07.39 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 13 Jul 2022 00:07:42 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , "linux-kernel @ vger . kernel . org" Subject: [PATCH v2 08/12] locking/qspinlock: stop renaming queued_spin_lock_slowpath to native_queued_spin_lock_slowpath Date: Wed, 13 Jul 2022 17:07:00 +1000 Message-Id: <20220713070704.308394-9-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220713070704.308394-1-npiggin@gmail.com> References: <20220713070704.308394-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" The native version can simply be queued_spin_lock_slowpath, and the paravirt version __pv_queued_spin_lock_slowpath, which is as they are named in the C code. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/qspinlock.h | 38 ++++++++++------------------ arch/x86/include/asm/qspinlock.h | 14 +++++++--- arch/x86/kernel/paravirt.c | 2 +- kernel/locking/qspinlock.c | 8 +----- 4 files changed, 26 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/as= m/qspinlock.h index b676c4fb90fd..dd231c756233 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -7,42 +7,32 @@ =20 #define _Q_PENDING_LOOPS (1 << 9) /* not tuned */ =20 -#ifdef CONFIG_PARAVIRT_SPINLOCKS -extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 v= al); -extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val= ); -extern void __pv_queued_spin_unlock(struct qspinlock *lock); +void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +void __pv_queued_spin_unlock(struct qspinlock *lock); =20 -static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lo= ck, u32 val) +static __always_inline void queued_spin_lock(struct qspinlock *lock) { - if (!is_shared_processor()) - native_queued_spin_lock_slowpath(lock, val); + u32 val =3D 0; + + if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL))) + return; + + if (!IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) || !is_shared_processor()) + queued_spin_lock_slowpath(lock, val); else __pv_queued_spin_lock_slowpath(lock, val); } +#define queued_spin_lock queued_spin_lock =20 -#define queued_spin_unlock queued_spin_unlock static inline void queued_spin_unlock(struct qspinlock *lock) { - if (!is_shared_processor()) + if (!IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) || !is_shared_processor()) smp_store_release(&lock->locked, 0); else __pv_queued_spin_unlock(lock); } - -#else -extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -#endif - -static __always_inline void queued_spin_lock(struct qspinlock *lock) -{ - u32 val =3D 0; - - if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL))) - return; - - queued_spin_lock_slowpath(lock, val); -} -#define queued_spin_lock queued_spin_lock +#define queued_spin_unlock queued_spin_unlock =20 #ifdef CONFIG_PARAVIRT_SPINLOCKS #define SPIN_THRESHOLD (1<<15) /* not tuned */ diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinl= ock.h index 7f914fe7bc30..603ad61e9dfe 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -28,7 +28,7 @@ static __always_inline u32 queued_fetch_set_pending_acqui= re(struct qspinlock *lo } =20 #ifdef CONFIG_PARAVIRT_SPINLOCKS -extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 v= al); +extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); extern void __pv_init_lock_hash(void); extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val= ); extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lo= ck); @@ -38,7 +38,6 @@ extern bool nopvspin; #define __pv_queued_spin_unlock __pv_queued_spin_unlock #endif =20 -#define queued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock * @lock : Pointer to queued spinlock structure @@ -50,22 +49,29 @@ static inline void native_queued_spin_unlock(struct qsp= inlock *lock) smp_store_release(&lock->locked, 0); } =20 -static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 v= al) +static inline void queued_spin_lock(struct qspinlock *lock) { + int val =3D 0; + + if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) + return; + pv_queued_spin_lock_slowpath(lock, val); } +#define queued_spin_lock queued_spin_lock =20 static inline void queued_spin_unlock(struct qspinlock *lock) { kcsan_release(); pv_queued_spin_unlock(lock); } +#define queued_spin_unlock queued_spin_unlock =20 -#define vcpu_is_preempted vcpu_is_preempted static inline bool vcpu_is_preempted(long cpu) { return pv_vcpu_is_preempted(cpu); } +#define vcpu_is_preempted vcpu_is_preempted #endif =20 #ifdef CONFIG_PARAVIRT diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 7ca2d46c08cc..f03e2962afa8 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -384,7 +384,7 @@ struct paravirt_patch_template pv_ops =3D { #if defined(CONFIG_PARAVIRT_SPINLOCKS) /* Lock ops. */ #ifdef CONFIG_SMP - .lock.queued_spin_lock_slowpath =3D native_queued_spin_lock_slowpath, + .lock.queued_spin_lock_slowpath =3D queued_spin_lock_slowpath, .lock.queued_spin_unlock =3D PV_CALLEE_SAVE(__native_queued_spin_unlock), .lock.wait =3D paravirt_nop, diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index c4f223a03345..4785a32a6fd2 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -297,8 +297,7 @@ static __always_inline void set_locked(struct qspinlock= *lock) * pv_kick(cpu) -- wakes a suspended vcpu * * Using these we implement __pv_queued_spin_lock_slowpath() and - * __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath()= and - * native_queued_spin_unlock(). + * __pv_queued_spin_unlock(). */ =20 #define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET) @@ -988,10 +987,6 @@ static __always_inline void queued_spin_lock_mcs_queue= (struct qspinlock *lock, b * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : * queue : ^--' : */ -#ifdef CONFIG_PARAVIRT_SPINLOCKS -#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath -#endif - void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { if (virt_spin_lock(lock)) @@ -1072,7 +1067,6 @@ void queued_spin_lock_slowpath(struct qspinlock *lock= , u32 val) EXPORT_SYMBOL(queued_spin_lock_slowpath); =20 #ifdef CONFIG_PARAVIRT_SPINLOCKS -#undef queued_spin_lock_slowpath void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { queued_spin_lock_mcs_queue(lock, true); --=20 2.35.1 From nobody Sat Apr 18 14:08:10 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 918B4C433EF for ; Wed, 13 Jul 2022 07:08:29 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234738AbiGMHI2 (ORCPT ); Wed, 13 Jul 2022 03:08:28 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:35078 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234690AbiGMHH5 (ORCPT ); Wed, 13 Jul 2022 03:07:57 -0400 Received: from mail-pf1-x42c.google.com (mail-pf1-x42c.google.com [IPv6:2607:f8b0:4864:20::42c]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id CA513E3C2F for ; Wed, 13 Jul 2022 00:07:46 -0700 (PDT) Received: by mail-pf1-x42c.google.com with SMTP id v7so7418263pfb.0 for ; Wed, 13 Jul 2022 00:07:46 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=+3iHP3kFYerWpm4anlOzhvJFyDBNIHOmpXweRrXpfF4=; b=QbAGsusVhx8+fbRwlWVo0U/NEcEq/fLo3kHRcH+iEZzRkjCSaljSKXFHfpcvhqwRdf JPTjfMQc79HIXOG0AMiVjWZtc6ghvzMGfRF1mSvWezMUWPb5R5HaNWq61KGGKoYbtsiw sy2o+NmDX0IbNxxnY9sjaudNVCxffRG5+EOVR9eIyYIh6Hr4OUxdDr7BsrDuJgUhUX4+ zckClmeSbA8Oa2wTa62iLsGIix8hKUSrUp0cuF31whtwvXwt6tFKvA+5PFvnYx4WPsoe X/9BYMbMAkXi7fXeBsg4vLdh/RFWfrqYjfCPOMIYfqDnGMpm+3A5QKBE9M7FvNw6nXHe +utQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=+3iHP3kFYerWpm4anlOzhvJFyDBNIHOmpXweRrXpfF4=; b=gccngcwImS5cvql1ZgGeD0msbTLSZatbwP3Kw4mt7FKq2uSbdUz13Ou9T7N+dpD7Hp raph2EjC3kHJdBFXF7fVw4utuqNqQ2JylqnkzmRt+tl7WDJTVOjk6bAYdLqaJnNL1m3s tQK92f0JdL6BfeCV32gwIxl6Te+ycumGG0mgpcUAI8z14NOo072Cw13Gd+esKF7NZB5f 43dfr6zEZPbzIOrn8bqGEAtJg0ohGHB4HkbyvkJJiCjyCBzfgj3DeGhRb4E0h/E+Bi15 kUKAoB4z168XMdRUfihbUK2FMESb3qCRLkjwkyUik41HhKGrqRqYVdK9JDXz6nFXghEa q48A== X-Gm-Message-State: AJIora9lOkuJh26qT+CMnPfNFoxnF75nKrDgN2jgWKQTaauhFWvoSSH6 Q7f8/3T8/Jmb99mCc2afiCo= X-Google-Smtp-Source: AGRyM1spF0j9huppBaC9iXGIk+uwQ/Cb5zKJgLFCCRqR17G3ERr9+3uvaBshfFn1pE3WMNgvrvRnkA== X-Received: by 2002:a05:6a00:2347:b0:52a:cf39:59a5 with SMTP id j7-20020a056a00234700b0052acf3959a5mr1737736pfj.28.1657696066155; Wed, 13 Jul 2022 00:07:46 -0700 (PDT) Received: from bobo.ozlabs.ibm.com (193-116-203-247.tpgi.com.au. [193.116.203.247]) by smtp.gmail.com with ESMTPSA id d11-20020a170902cecb00b0016bd5da20casm8099061plg.134.2022.07.13.00.07.42 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 13 Jul 2022 00:07:45 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , "linux-kernel @ vger . kernel . org" Subject: [PATCH v2 09/12] locking/qspinlock: rename __pv_init_lock_hash to pv_spinlocks_init Date: Wed, 13 Jul 2022 17:07:01 +1000 Message-Id: <20220713070704.308394-10-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220713070704.308394-1-npiggin@gmail.com> References: <20220713070704.308394-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" The caller should not have to be aware what the implementation initialisation does. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/qspinlock.h | 7 ------- arch/powerpc/include/asm/spinlock.h | 2 +- arch/x86/hyperv/hv_spinlock.c | 2 +- arch/x86/include/asm/qspinlock.h | 1 - arch/x86/kernel/kvm.c | 2 +- arch/x86/xen/spinlock.c | 2 +- include/asm-generic/qspinlock.h | 6 ++++++ kernel/locking/qspinlock.c | 2 +- 8 files changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/as= m/qspinlock.h index dd231c756233..39c1c7f80579 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -53,13 +53,6 @@ static __always_inline void pv_kick(int cpu) prod_cpu(cpu); } =20 -extern void __pv_init_lock_hash(void); - -static inline void pv_spinlocks_init(void) -{ - __pv_init_lock_hash(); -} - #endif =20 /* diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm= /spinlock.h index bd75872a6334..7dafca8e3f02 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -13,7 +13,7 @@ /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() =20 -#ifndef CONFIG_PARAVIRT_SPINLOCKS +#ifndef CONFIG_PPC_QUEUED_SPINLOCKS static inline void pv_spinlocks_init(void) { } #endif =20 diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c index 91cfe698bde0..c7b5c3211c79 100644 --- a/arch/x86/hyperv/hv_spinlock.c +++ b/arch/x86/hyperv/hv_spinlock.c @@ -76,7 +76,7 @@ void __init hv_init_spinlocks(void) } pr_info("PV spinlocks enabled\n"); =20 - __pv_init_lock_hash(); + pv_spinlocks_init(); pv_ops.lock.queued_spin_lock_slowpath =3D __pv_queued_spin_lock_slowpath; pv_ops.lock.queued_spin_unlock =3D PV_CALLEE_SAVE(__pv_queued_spin_unlock= ); pv_ops.lock.wait =3D hv_qlock_wait; diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinl= ock.h index 603ad61e9dfe..9a03fcc1b2b7 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -29,7 +29,6 @@ static __always_inline u32 queued_fetch_set_pending_acqui= re(struct qspinlock *lo =20 #ifdef CONFIG_PARAVIRT_SPINLOCKS extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_init_lock_hash(void); extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val= ); extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lo= ck); extern bool nopvspin; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 1a3658f7e6d9..98a2c4d3e91d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -1106,7 +1106,7 @@ void __init kvm_spinlock_init(void) =20 pr_info("PV spinlocks enabled\n"); =20 - __pv_init_lock_hash(); + pv_spinlocks_init(); pv_ops.lock.queued_spin_lock_slowpath =3D __pv_queued_spin_lock_slowpath; pv_ops.lock.queued_spin_unlock =3D PV_CALLEE_SAVE(__pv_queued_spin_unlock); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 043c73dfd2c9..5145c4aec4ea 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -135,7 +135,7 @@ void __init xen_init_spinlocks(void) } printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); =20 - __pv_init_lock_hash(); + pv_spinlocks_init(); pv_ops.lock.queued_spin_lock_slowpath =3D __pv_queued_spin_lock_slowpath; pv_ops.lock.queued_spin_unlock =3D PV_CALLEE_SAVE(__pv_queued_spin_unlock); diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinloc= k.h index 995513fa2690..e0fb29ee1adc 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -147,4 +147,10 @@ static __always_inline bool virt_spin_lock(struct qspi= nlock *lock) #define arch_spin_trylock(l) queued_spin_trylock(l) #define arch_spin_unlock(l) queued_spin_unlock(l) =20 +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void pv_spinlocks_init(void); +#else +static inline void pv_spinlocks_init(void) { } +#endif + #endif /* __ASM_GENERIC_QSPINLOCK_H */ diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 4785a32a6fd2..df6d8af8f2a9 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -405,7 +405,7 @@ static unsigned int pv_lock_hash_bits __read_mostly; * This function should be called from the paravirt spinlock initialization * routine. */ -void __init __pv_init_lock_hash(void) +void __init pv_spinlocks_init(void) { int pv_hash_size =3D ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE); =20 --=20 2.35.1 From nobody Sat Apr 18 14:08:10 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7AD9FC43334 for ; Wed, 13 Jul 2022 07:08:37 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234741AbiGMHIe (ORCPT ); Wed, 13 Jul 2022 03:08:34 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:34970 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234561AbiGMHIC (ORCPT ); Wed, 13 Jul 2022 03:08:02 -0400 Received: from mail-pj1-x102a.google.com (mail-pj1-x102a.google.com [IPv6:2607:f8b0:4864:20::102a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 46377E1925 for ; Wed, 13 Jul 2022 00:07:50 -0700 (PDT) Received: by mail-pj1-x102a.google.com with SMTP id x18-20020a17090a8a9200b001ef83b332f5so2244758pjn.0 for ; Wed, 13 Jul 2022 00:07:50 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=KkGCZt24/wM/viMu6gJ/6hAx3oCXgMtSYE11mElSvVY=; b=BNn49egqVlLLnQFFlawyOMHUv7VlexlZy3atxaH2y6tB+YWZkjWCFTL6SA0j/4q4Ve oQNkHqzZbeYjrY4Y3lNjzlrBJxWoT6E5PGyd0osFQgQQw0PbZfMDU56swh1zOFnRW7YW RmABL9KxdFk/FRIk8G+OydUgOlgxwuQTWmrhKcFPyFG229pJPtgBhpwsMgRmwL1IZtW3 cpmj2vKnlllS44kw50D+xhtG8uM5ogpJOGM46NJFXE0iy7g6xLZl8Q7AnSX8FOnfYmdw hMYuLWqj6Zj/8rWIiqhawavc0sFgIuNQsiFlDw/cvDc6k2LpaUTPnWbxfGAw+pACexfB goIw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=KkGCZt24/wM/viMu6gJ/6hAx3oCXgMtSYE11mElSvVY=; b=SNE6/9HstwPuBzphbFkZGCGi1ws4rAUs1J2VSL7w3V45xO9/mVpH1EcPkwDGwUAGVk +JmNNv2s1IjQx1Wxde4xlTbzK4+06rsKwXAOqz1vQ2GgTZeWol/iDSEJfSsIaWPe7AhK is/apgxuSKmC/NCPhfExqouepsuPlCOBqHgyM0XqA0yyElwvO0T5uQcP5RSl/7G9mNy6 Ygl1gohJprAaTVBjSxkEkVrHtuBjpUrXGKyLvXErQOvp+a/ANSxIjbSaiN7xSHrKGEYy Ycml/MJWhUrs1P51c/3JsIUnGvlFsuNMPlP3D4e/uJa33RfX5lyUX1w2BPqYSAJV3qwi OjpA== X-Gm-Message-State: AJIora94ObTMfo+EHEOblP2WGSeZvbdct/1AqPTFhlbGJVUoQ+IcyDtW qJWt+74S0rvm+X41FMLaBRk= X-Google-Smtp-Source: AGRyM1upUIcpNOC2xFkUOzlCnjteo/Jph0OGSqNgdYn+pxj4Vk9L2GVp2BLSeIAqjFFS/RnUkhOgOg== X-Received: by 2002:a17:90a:4809:b0:1f0:59d7:7ee2 with SMTP id a9-20020a17090a480900b001f059d77ee2mr2271401pjh.240.1657696069824; Wed, 13 Jul 2022 00:07:49 -0700 (PDT) Received: from bobo.ozlabs.ibm.com (193-116-203-247.tpgi.com.au. [193.116.203.247]) by smtp.gmail.com with ESMTPSA id d11-20020a170902cecb00b0016bd5da20casm8099061plg.134.2022.07.13.00.07.46 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 13 Jul 2022 00:07:49 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , "linux-kernel @ vger . kernel . org" Subject: [PATCH v2 10/12] locking/qspinlock: paravirt use simple trylock in case idx overflows Date: Wed, 13 Jul 2022 17:07:02 +1000 Message-Id: <20220713070704.308394-11-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220713070704.308394-1-npiggin@gmail.com> References: <20220713070704.308394-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" The mcs node overflow fallback locking case does not have to use pv_hybrid_queued_unfair_trylock as the trylock, which reduces the differences between pv and !pv cases. This was likely an artifact of function renaming making it use the pv trylock, which is not a bug but it is unexpected. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index df6d8af8f2a9..2ebb946a6b80 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -800,13 +800,8 @@ static __always_inline void queued_spin_lock_mcs_queue= (struct qspinlock *lock, b */ if (unlikely(idx >=3D MAX_NODES)) { lockevent_inc(lock_no_node); - if (paravirt) { - while (!pv_hybrid_queued_unfair_trylock(lock)) - cpu_relax(); - } else { - while (!queued_spin_trylock(lock)) - cpu_relax(); - } + while (!queued_spin_trylock(lock)) + cpu_relax(); goto release; } =20 --=20 2.35.1 From nobody Sat Apr 18 14:08:10 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6E46AC43334 for ; Wed, 13 Jul 2022 07:08:47 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234636AbiGMHIo (ORCPT ); Wed, 13 Jul 2022 03:08:44 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:35310 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234519AbiGMHIF (ORCPT ); Wed, 13 Jul 2022 03:08:05 -0400 Received: from mail-pg1-x52f.google.com (mail-pg1-x52f.google.com [IPv6:2607:f8b0:4864:20::52f]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E95C2E3C13 for ; Wed, 13 Jul 2022 00:07:53 -0700 (PDT) Received: by mail-pg1-x52f.google.com with SMTP id bf13so9640294pgb.11 for ; Wed, 13 Jul 2022 00:07:53 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=1rI3NJLOWsk4vsP7I8W/i/0iO2V6TYGkyq9mbykMWmc=; b=JsG4DJjxtIuzRTGePgkVD534zkcZvQCGsVUdko8T5lmNcAPW6UZ7mQdZa+W/ofasOE h5F3pi8R+hkkB5uAtv/dql08ZOT7mM58lvtRP/IBEX+ST37A5TKH5bwD4EkrUGO6pEAH dvNoRUjBgeP79D0rAESigx29AySMMKB/IsH+z6Aye6NLNHvjH6HEoluYULQfUzp3bFYj QHB+rmvpmJEhlSc89WoUiwUTIYcwLy6uh8N+c7PVtZ7IQAkK3HtKoBBnemQv5jEaU+mR ArdWpkHEGRn0pdJt7xetbSGN4qYvVL4ozKMaoqb33FCtCm9wtwUpWegMri03ixXOiAWK rLmg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=1rI3NJLOWsk4vsP7I8W/i/0iO2V6TYGkyq9mbykMWmc=; b=d/hMn2QCPeCR9Xbb7qEq7ej7yqXL5S12goihyW7btYaoftpMv356eS3OLAGR8RU3t1 YK5RgoaAjeGEE8QJe4XE2MAeCFQv36+15Bw7yGdiai5LMumta9HyUxzk+cqMod/arMLZ PMgZdIhVUaikk+cAV8jCpvkgL2wAItQvVEWectYIXmqG6w65Llo7+9I/kHVaX63PR/Wu 08jD65Q0FRFz1eHyRz2viULQP1D1sv4be4sPGCH6F9tkvSpVYyZrO+7M8IIcAhJK5LYf jmdp90CBVUFwvjZcW1eBJflFRtJz4D35BKIJQ648n8QqrPlhcNuW+O4QyNZowv36ZW5V dPFg== X-Gm-Message-State: AJIora8xNAmQFEXqUsV9PSm3jN5yYvecuLqhEo+DJa3289KWmAXw5ARn ysDeJciorkrK/iogC5nvTsM= X-Google-Smtp-Source: AGRyM1urJp8NIccTLGHfoWd9dsGyW7nWtFqiBcPbYT+WiGVzK84rVNCszlItvcBCs3a43wT/qk2u7A== X-Received: by 2002:a05:6a00:23d4:b0:52a:e5c1:caa7 with SMTP id g20-20020a056a0023d400b0052ae5c1caa7mr1775750pfc.62.1657696073495; Wed, 13 Jul 2022 00:07:53 -0700 (PDT) Received: from bobo.ozlabs.ibm.com (193-116-203-247.tpgi.com.au. [193.116.203.247]) by smtp.gmail.com with ESMTPSA id d11-20020a170902cecb00b0016bd5da20casm8099061plg.134.2022.07.13.00.07.50 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 13 Jul 2022 00:07:53 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , "linux-kernel @ vger . kernel . org" Subject: [PATCH v2 11/12] locking/qspinlock: separate pv_wait_node from the non-paravirt path Date: Wed, 13 Jul 2022 17:07:03 +1000 Message-Id: <20220713070704.308394-12-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220713070704.308394-1-npiggin@gmail.com> References: <20220713070704.308394-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" pv_wait_node waits until node->locked is non-zero, no need for the pv case to wait again by also executing the !pv code path. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 2ebb946a6b80..3255e7804842 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -506,15 +506,18 @@ static void pv_init_node(struct qnode *node) * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its * behalf. */ -static void pv_wait_node(struct qnode *node, struct qnode *prev) +static void pv_wait_node_acquire(struct qnode *node, struct qnode *prev) { int loop; bool wait_early; =20 for (;;) { for (wait_early =3D false, loop =3D SPIN_THRESHOLD; loop; loop--) { - if (READ_ONCE(node->locked)) + if (READ_ONCE(node->locked)) { + /* Provide the acquire ordering. */ + smp_load_acquire(&node->locked); return; + } if (pv_wait_early(prev, loop)) { wait_early =3D true; break; @@ -556,29 +559,23 @@ static void pv_wait_node(struct qnode *node, struct q= node *prev) lockevent_cond_inc(pv_spurious_wakeup, !READ_ONCE(node->locked)); } - - /* - * By now our node->locked should be 1 and our caller will not actually - * spin-wait for it. We do however rely on our caller to do a - * load-acquire for us. - */ } =20 /* * Called after setting next->locked =3D 1 when we're the lock owner. * - * Instead of waking the waiters stuck in pv_wait_node() advance their sta= te - * such that they're waiting in pv_wait_head_or_lock(), this avoids a + * Instead of waking the waiters stuck in pv_wait_node_acquire() advance t= heir + * state such that they're waiting in pv_wait_head_or_lock(), this avoids a * wake/sleep cycle. */ static void pv_kick_node(struct qspinlock *lock, struct qnode *node) { /* * If the vCPU is indeed halted, advance its state to match that of - * pv_wait_node(). If OTOH this fails, the vCPU was running and will - * observe its next->locked value and advance itself. + * pv_wait_node_acquire(). If OTOH this fails, the vCPU was running and + * will observe its next->locked value and advance itself. * - * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() + * Matches with smp_store_mb() and cmpxchg() in pv_wait_node_acquire() * * The write to next->locked in arch_mcs_spin_unlock_contended() * must be ordered before the read of node->state in the cmpxchg() @@ -765,8 +762,8 @@ EXPORT_SYMBOL(__pv_queued_spin_unlock); =20 #else /* CONFIG_PARAVIRT_SPINLOCKS */ static __always_inline void pv_init_node(struct qnode *node) { } -static __always_inline void pv_wait_node(struct qnode *node, - struct qnode *prev) { } +static __always_inline void pv_wait_node_acquire(struct qnode *node, + struct qnode *prev) { } static __always_inline void pv_kick_node(struct qspinlock *lock, struct qnode *node) { } static __always_inline u32 pv_wait_head_or_lock(struct qspinlock *lock, @@ -864,10 +861,11 @@ static __always_inline void queued_spin_lock_mcs_queu= e(struct qspinlock *lock, b /* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node); =20 - if (paravirt) - pv_wait_node(node, prev); /* Wait for mcs node lock to be released */ - smp_cond_load_acquire(&node->locked, VAL); + if (paravirt) + pv_wait_node_acquire(node, prev); + else + smp_cond_load_acquire(&node->locked, VAL); =20 /* * While waiting for the MCS lock, the next pointer may have --=20 2.35.1 From nobody Sat Apr 18 14:08:10 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id B5DB0C433EF for ; Wed, 13 Jul 2022 07:08:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234762AbiGMHIw (ORCPT ); Wed, 13 Jul 2022 03:08:52 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:35044 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234652AbiGMHIR (ORCPT ); Wed, 13 Jul 2022 03:08:17 -0400 Received: from mail-pg1-x52d.google.com (mail-pg1-x52d.google.com [IPv6:2607:f8b0:4864:20::52d]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A39FBE3040 for ; Wed, 13 Jul 2022 00:07:57 -0700 (PDT) Received: by mail-pg1-x52d.google.com with SMTP id bh13so9676972pgb.4 for ; Wed, 13 Jul 2022 00:07:57 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=KnPsdcwpQ6/HQ7udB+nfFkQyTndFGEtXQDYIXdw9bEk=; b=PLK7bYe85Cz+0uz8atHvvMpo9qGTWHUXGvGGlyMJo31WZFkqhtWTOXNSWXKjq6Ce2/ vQ3OrCVv1GKVRnMViqSVhjttJnmif33+dTNDBjOmiWvjzmNhwkRejjtg9IW4mXhH50kT KXSoX8XBgZhvu1eJu25dbPck28ETryRCX1GICM2cLjG53X7Hmd1dpaae3065E9AFt0J2 egCs5Uxfxngncyv1GmD4bset3emzypyEHATQyZQPHLmRpf1PC8MyJae3BbsvLELPrkLr X0I0mOxPH9OwNeTO3QY327VjgphadnVM2Au2cFu2L9xgLDzk2d4ntbRZb6cCVKc9kBIg 9oEw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=KnPsdcwpQ6/HQ7udB+nfFkQyTndFGEtXQDYIXdw9bEk=; b=cAFmDDcFxErvfNavi5XFkhXtMvG7wZ8J/hHMGvZuGI0LcgU5YeUsvEWNhkErZhmkVc igSpdoGPwuMztnyJw2SCur89YjfdDOcEeqjp7gQYjXkjjxLqFpSHvFfHvkZjEnMRSlU8 tDKLByt8gH+6+rAoZmwHB+O3a+7ppE6G9ma3A97yrHYHDXuLeU/JcDDy70PUe5uRB2LA YpAEzIWd5p0QhOQD5VmeTZt7C9Uw2k4aZf2a3E0F6XVBU6GN3iE6JgtDpv3mQUCvZ62v 8FMzcZEWH9s2Pv6FoWnAgnE9bLggOkrzLU7hFZ2BabGhwermJzpO2fC49gQfnN8QVmOj eeUg== X-Gm-Message-State: AJIora+h1pIWanhOjBzk3HhJA7RJzGK3Z5AMH2R134IhWP7ihdXqiSlX uMB9OIRC6h0NUJGr09Jrx0w= X-Google-Smtp-Source: AGRyM1v85scKwcrTA7w9VhbsWNy4SnwwYrtxNZ9GzmAAXjSEmP85SYKD7FToZencVhaEeh+MLZPKdA== X-Received: by 2002:a63:d341:0:b0:412:ace8:b0ed with SMTP id u1-20020a63d341000000b00412ace8b0edmr1770283pgi.169.1657696077187; Wed, 13 Jul 2022 00:07:57 -0700 (PDT) Received: from bobo.ozlabs.ibm.com (193-116-203-247.tpgi.com.au. [193.116.203.247]) by smtp.gmail.com with ESMTPSA id d11-20020a170902cecb00b0016bd5da20casm8099061plg.134.2022.07.13.00.07.53 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 13 Jul 2022 00:07:56 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , "linux-kernel @ vger . kernel . org" Subject: [PATCH v2 12/12] locking/qspinlock: simplify pv_wait_head_or_lock calling scheme Date: Wed, 13 Jul 2022 17:07:04 +1000 Message-Id: <20220713070704.308394-13-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220713070704.308394-1-npiggin@gmail.com> References: <20220713070704.308394-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" pv_wait_head_or_lock returns the lock word value ORed with a constant, which was done to achieve a constant folding compiler optimisation when the code was generated for both pv and !pv cases. This is no longer necessary with the explicit paravirt test, so make the calling convention simpler. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 3255e7804842..251980783079 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -608,8 +608,7 @@ static void pv_kick_node(struct qspinlock *lock, struct= qnode *node) * * The current value of the lock will be returned for additional processin= g. */ -static u32 -pv_wait_head_or_lock(struct qspinlock *lock, struct qnode *node) +static void pv_wait_head_or_lock(struct qspinlock *lock, struct qnode *nod= e) { struct qspinlock **lp =3D NULL; int waitcnt =3D 0; @@ -641,7 +640,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct qno= de *node) set_pending(lock); for (loop =3D SPIN_THRESHOLD; loop; loop--) { if (trylock_clear_pending(lock)) - goto gotlock; + return; /* got lock */ cpu_relax(); } clear_pending(lock); @@ -669,7 +668,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct qno= de *node) */ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); WRITE_ONCE(*lp, NULL); - goto gotlock; + return; /* got lock */ } } WRITE_ONCE(node->state, vcpu_hashed); @@ -685,12 +684,8 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct qn= ode *node) =20 /* * The cmpxchg() or xchg() call before coming here provides the - * acquire semantics for locking. The dummy ORing of _Q_LOCKED_VAL - * here is to indicate to the compiler that the value will always - * be nozero to enable better code optimization. + * acquire semantics for locking. */ -gotlock: - return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL); } =20 /* @@ -766,9 +761,8 @@ static __always_inline void pv_wait_node_acquire(struct= qnode *node, struct qnode *prev) { } static __always_inline void pv_kick_node(struct qspinlock *lock, struct qnode *node) { } -static __always_inline u32 pv_wait_head_or_lock(struct qspinlock *lock, - struct qnode *node) - { return 0; } +static __always_inline void pv_wait_head_or_lock(struct qspinlock *lock, + struct qnode *node) { } static __always_inline bool pv_hybrid_queued_unfair_trylock(struct qspinlo= ck *lock) { BUILD_BUG(); } #endif /* CONFIG_PARAVIRT_SPINLOCKS */ =20 @@ -889,24 +883,23 @@ static __always_inline void queued_spin_lock_mcs_queu= e(struct qspinlock *lock, b * sequentiality; this is because the set_locked() function below * does not imply a full barrier. * - * The PV pv_wait_head_or_lock function, if active, will acquire - * the lock and return a non-zero value. So we have to skip the - * atomic_cond_read_acquire() call. As the next PV queue head hasn't - * been designated yet, there is no way for the locked value to become - * _Q_SLOW_VAL. So both the set_locked() and the + * The PV pv_wait_head_or_lock function will acquire the lock, so + * skip the atomic_cond_read_acquire() call. As the next PV queue head + * hasn't been designated yet, there is no way for the locked value to + * become _Q_SLOW_VAL. So both the set_locked() and the * atomic_cmpxchg_relaxed() calls will be safe. * * If PV isn't active, 0 will be returned instead. * */ if (paravirt) { - if ((val =3D pv_wait_head_or_lock(lock, node))) - goto locked; + pv_wait_head_or_lock(lock, node); + val =3D atomic_read(&lock->val); + } else { + val =3D atomic_cond_read_acquire(&lock->val, + !(VAL & _Q_LOCKED_PENDING_MASK)); } =20 - val =3D atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MA= SK)); - -locked: /* * claim the lock: * --=20 2.35.1