From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4697AC433EF for ; Mon, 4 Jul 2022 14:38:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234440AbiGDOik (ORCPT ); Mon, 4 Jul 2022 10:38:40 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38134 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231871AbiGDOie (ORCPT ); Mon, 4 Jul 2022 10:38:34 -0400 Received: from mail-pj1-x1032.google.com (mail-pj1-x1032.google.com [IPv6:2607:f8b0:4864:20::1032]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 66CA665F0 for ; Mon, 4 Jul 2022 07:38:33 -0700 (PDT) Received: by mail-pj1-x1032.google.com with SMTP id i8-20020a17090a4b8800b001ef8a65bfbdso2265155pjh.1 for ; Mon, 04 Jul 2022 07:38:33 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=FIDkYfvKDw43uecwnfQLEkprsoJgdapMUXzn/Kr3sTs=; b=CY+3UP29fPbYElXIhspAj4ieKSopLlHPB5SQ10kJmoJrA8YiBRNA1MooiKTYKpP+Kw bYXprF0s+SN9Kv3zOPh9TXdfr4LO3r1qyf/eUrJ8mVThNacKfW5hIrgiTklCKtDZd6Wz +9/CZ/iAPCErkRkirhnHQJKLZ8cSiKHbwh+Duy73bWGxKazHvVAV4Rgxz/IKZ/ynqSr5 DNuROAeYPhLDbJX/n2pvaPRg7DI68hnoCPC4SbdtAN+y8u3xTQ0RbGMW4KiLgnf5bHUc fbTDDbxKKj7hO5kzIUkVoQbP6lR4YQbgxa0CVG4GRH4aQPnGn8jO1eEE+Td82mSKxHcw CwwQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=FIDkYfvKDw43uecwnfQLEkprsoJgdapMUXzn/Kr3sTs=; b=Hn6meCl2b0jUV3i7iwMDui/vBxY9ca1QU6y5lpVbwlCL5M/mI6PtvFaUBaHU7E5QNu abfsY7VHUJevfAXWLC7zujBWYR0VXWjoTuyveQuXj66RHr365BnWZwZd2WmQ3gKBSEe/ JKjt5oCO6CL+w1kctibuCDkEcWXM8ZhD2S6savef8ZT3kOraEeX2wJchX6DCTY4Et57r TWkg8UTAkvjQbT4ki9myxlYn/hJnSTNpWRcgrzUId3Tyg1AA77C8tDgXHktIdiQFLMuR fznhig3MI+0NrATcQkb/h7/LdzogSPFu7JpnUipF6flymT5EB7+K+75MYnfVoyqCHleK kreg== X-Gm-Message-State: AJIora9pCUCqh+fOyq1X65bZs3Q7kT89BHzTKBR66nAnzt64v88b0MU1 AANn8qR8tHN64PR7BzHn7NU= X-Google-Smtp-Source: AGRyM1s/OrOGW0U3koMEdt/RYPom/gfbiphDRSNU15loeOuzppFgGwuCjKRB9S4aFsFET4VITn9uLw== X-Received: by 2002:a17:90b:2246:b0:1ec:aa2c:8edc with SMTP id hk6-20020a17090b224600b001ecaa2c8edcmr34747221pjb.14.1656945512427; Mon, 04 Jul 2022 07:38:32 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.38.29 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:38:31 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 01/13] locking/qspinlock: remove pv_node abstraction Date: Tue, 5 Jul 2022 00:38:08 +1000 Message-Id: <20220704143820.3071004-2-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" There isn't much point trying to separate struct qnode from struct pv_node when struct qnode has to know about pv_node anyway. Signed-off-by: Nicholas Piggin Acked-by: Boqun Feng --- kernel/locking/qspinlock.c | 3 ++- kernel/locking/qspinlock_paravirt.h | 34 ++++++++++++----------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 65a9a10caa6f..a0fc21d99199 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -82,7 +82,8 @@ struct qnode { struct mcs_spinlock mcs; #ifdef CONFIG_PARAVIRT_SPINLOCKS - long reserved[2]; + int cpu; + u8 state; #endif }; =20 diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock= _paravirt.h index e84d21aa0722..b6a175155f36 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -47,12 +47,6 @@ enum vcpu_state { vcpu_hashed, /* =3D pv_hash'ed + vcpu_halted */ }; =20 -struct pv_node { - struct mcs_spinlock mcs; - int cpu; - u8 state; -}; - /* * Hybrid PV queued/unfair lock * @@ -170,7 +164,7 @@ static __always_inline int trylock_clear_pending(struct= qspinlock *lock) */ struct pv_hash_entry { struct qspinlock *lock; - struct pv_node *node; + struct qnode *node; }; =20 #define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry)) @@ -209,7 +203,7 @@ void __init __pv_init_lock_hash(void) offset < (1 << pv_lock_hash_bits); \ offset++, he =3D &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash= _bits) - 1)]) =20 -static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *= node) +static struct qspinlock **pv_hash(struct qspinlock *lock, struct qnode *no= de) { unsigned long offset, hash =3D hash_ptr(lock, pv_lock_hash_bits); struct pv_hash_entry *he; @@ -236,11 +230,11 @@ static struct qspinlock **pv_hash(struct qspinlock *l= ock, struct pv_node *node) BUG(); } =20 -static struct pv_node *pv_unhash(struct qspinlock *lock) +static struct qnode *pv_unhash(struct qspinlock *lock) { unsigned long offset, hash =3D hash_ptr(lock, pv_lock_hash_bits); struct pv_hash_entry *he; - struct pv_node *node; + struct qnode *node; =20 for_each_hash_entry(he, offset, hash) { if (READ_ONCE(he->lock) =3D=3D lock) { @@ -264,7 +258,7 @@ static struct pv_node *pv_unhash(struct qspinlock *lock) * in a running state. */ static inline bool -pv_wait_early(struct pv_node *prev, int loop) +pv_wait_early(struct qnode *prev, int loop) { if ((loop & PV_PREV_CHECK_MASK) !=3D 0) return false; @@ -277,9 +271,9 @@ pv_wait_early(struct pv_node *prev, int loop) */ static void pv_init_node(struct mcs_spinlock *node) { - struct pv_node *pn =3D (struct pv_node *)node; + struct qnode *pn =3D (struct qnode *)node; =20 - BUILD_BUG_ON(sizeof(struct pv_node) > sizeof(struct qnode)); + BUILD_BUG_ON(sizeof(struct qnode) > sizeof(struct qnode)); =20 pn->cpu =3D smp_processor_id(); pn->state =3D vcpu_running; @@ -292,8 +286,8 @@ static void pv_init_node(struct mcs_spinlock *node) */ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *p= rev) { - struct pv_node *pn =3D (struct pv_node *)node; - struct pv_node *pp =3D (struct pv_node *)prev; + struct qnode *pn =3D (struct qnode *)node; + struct qnode *pp =3D (struct qnode *)prev; int loop; bool wait_early; =20 @@ -359,7 +353,7 @@ static void pv_wait_node(struct mcs_spinlock *node, str= uct mcs_spinlock *prev) */ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { - struct pv_node *pn =3D (struct pv_node *)node; + struct qnode *pn =3D (struct qnode *)node; =20 /* * If the vCPU is indeed halted, advance its state to match that of @@ -402,7 +396,7 @@ static void pv_kick_node(struct qspinlock *lock, struct= mcs_spinlock *node) static u32 pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) { - struct pv_node *pn =3D (struct pv_node *)node; + struct qnode *pn =3D (struct qnode *)node; struct qspinlock **lp =3D NULL; int waitcnt =3D 0; int loop; @@ -492,7 +486,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs= _spinlock *node) __visible void __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) { - struct pv_node *node; + struct qnode *node; =20 if (unlikely(locked !=3D _Q_SLOW_VAL)) { WARN(!debug_locks_silent, @@ -517,14 +511,14 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lo= ck, u8 locked) node =3D pv_unhash(lock); =20 /* - * Now that we have a reference to the (likely) blocked pv_node, + * Now that we have a reference to the (likely) blocked qnode, * release the lock. */ smp_store_release(&lock->locked, 0); =20 /* * At this point the memory pointed at by lock can be freed/reused, - * however we can still use the pv_node to kick the CPU. + * however we can still use the qnode to kick the CPU. * The other vCPU may not really be halted, but kicking an active * vCPU is harmless other than the additional latency in completing * the unlock. --=20 2.35.1 From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C43D5C43334 for ; Mon, 4 Jul 2022 14:38:47 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234668AbiGDOip (ORCPT ); Mon, 4 Jul 2022 10:38:45 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38210 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234303AbiGDOij (ORCPT ); Mon, 4 Jul 2022 10:38:39 -0400 Received: from mail-pj1-x102b.google.com (mail-pj1-x102b.google.com [IPv6:2607:f8b0:4864:20::102b]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A28F1B1DA for ; Mon, 4 Jul 2022 07:38:37 -0700 (PDT) Received: by mail-pj1-x102b.google.com with SMTP id ju17so4433725pjb.3 for ; Mon, 04 Jul 2022 07:38:37 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=G0nMqCZyEDBF5dk9P4V54ZmK3GN8sqx0HX2Ot01Uflg=; b=jFFoN/AEWSGG9au6cyQgDeEI6ToSgfx9+J1s30zLAwqxd0z81ia/3gk5thizkHhBKK DLSFJ72nif8Pjjv4moqzpb6b75ecI0XbJMzGUIivKyuFusjI7Ml78lFV4WKLT0no9IAi swtCNqDMRd8tQhG+LO41a6uNpIeFL8oMMMRfqHyOuLavB7Y4GOyS7ssT4OKpUk+XX1nC e7mzOmKbEPK6qB68uM2IMoJN2Bg055nWHoK73miYiBtGZGS3Buc8k1NnEeVlGV9gPR63 2u8bgvXwZrEj86/IFIXZrrKIThmIg80kdDyjIjYD/iaLVy/KHR0DDgoSb76yXs+y5MjG UEqQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=G0nMqCZyEDBF5dk9P4V54ZmK3GN8sqx0HX2Ot01Uflg=; b=ReersLb5e9NlzzXPtxKVg1fBOzglbiJk25ZuJVjcHqTAiBQOhW5orpkgYG9FFXrXWA Z0hplkArt9B9WhOxfOUvmfvTxzh7HQTOuGUmSviNjfP6JVhn4oNIvQDcX+s1soDfIpEc qgpMUmIU7sAm9gY6QUmsmnCc04qEuQOmBvbTFtVDV642yflrasKS+ebZ8nOBp0bCNZGn 0AKCH75HvZDyjZCRZyKzdIQbcDnIt4Mmjuf4d3qJtoYUKLMbuQrqvFpxDBjrWv+Ge3Ps BuNyYNEwL1QbEsNrriLCmkGHgcvJudUHrKpVoyL2W/msbadvMsNuX4//f1i7eB8zSYBp 5Zbg== X-Gm-Message-State: AJIora+1fs3O/mASkCrT1qndlX+u+zEcunM6rnug6rGIjipbl5K7TXZT EkZdvtk702J6NesntwFZUQg= X-Google-Smtp-Source: AGRyM1udijlFaW3NzGeVx4vwxWIhZIjo+51FBtkOStTWFW3YIv5EJTRkLQF2XZRTJgA5kmch22HXlw== X-Received: by 2002:a17:90a:404a:b0:1ea:e936:b69 with SMTP id k10-20020a17090a404a00b001eae9360b69mr35114686pjg.133.1656945517022; Mon, 04 Jul 2022 07:38:37 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.38.33 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:38:36 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 02/13] locking/qspinlock: inline mcs_spinlock functions into qspinlock Date: Tue, 5 Jul 2022 00:38:09 +1000 Message-Id: <20220704143820.3071004-3-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" qspinlock uses mcs_spinlock for the struct type (.next, .locked, and the misplaced .count), and arch_mcs_spin_{un}lock_contended(). These can be trivially inlined into qspinlock, and the unused mcs_spinlock code removed. Signed-off-by: Nicholas Piggin --- arch/alpha/include/asm/Kbuild | 1 - arch/arc/include/asm/Kbuild | 1 - arch/arm/include/asm/mcs_spinlock.h | 24 ------ arch/arm64/include/asm/Kbuild | 1 - arch/hexagon/include/asm/Kbuild | 1 - arch/ia64/include/asm/Kbuild | 1 - arch/m68k/include/asm/Kbuild | 1 - arch/microblaze/include/asm/Kbuild | 1 - arch/mips/include/asm/Kbuild | 1 - arch/nios2/include/asm/Kbuild | 1 - arch/parisc/include/asm/Kbuild | 1 - arch/powerpc/include/asm/Kbuild | 1 - arch/s390/include/asm/Kbuild | 1 - arch/sh/include/asm/Kbuild | 1 - arch/sparc/include/asm/Kbuild | 1 - arch/um/include/asm/Kbuild | 1 - arch/x86/include/asm/Kbuild | 1 - arch/xtensa/include/asm/Kbuild | 1 - include/asm-generic/mcs_spinlock.h | 13 --- kernel/locking/mcs_spinlock.h | 121 ---------------------------- kernel/locking/qspinlock.c | 38 ++++----- kernel/locking/qspinlock_paravirt.h | 55 ++++++------- 22 files changed, 43 insertions(+), 225 deletions(-) delete mode 100644 arch/arm/include/asm/mcs_spinlock.h delete mode 100644 include/asm-generic/mcs_spinlock.h delete mode 100644 kernel/locking/mcs_spinlock.h diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 42911c8340c7..d21cf7b3173a 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -3,4 +3,3 @@ generated-y +=3D syscall_table.h generic-y +=3D export.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 3c1afa524b9c..5ae4337a9301 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generic-y +=3D extable.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D parport.h generic-y +=3D user.h diff --git a/arch/arm/include/asm/mcs_spinlock.h b/arch/arm/include/asm/mcs= _spinlock.h deleted file mode 100644 index 529d2cf4d06f..000000000000 --- a/arch/arm/include/asm/mcs_spinlock.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MCS_LOCK_H -#define __ASM_MCS_LOCK_H - -#ifdef CONFIG_SMP -#include - -/* MCS spin-locking. */ -#define arch_mcs_spin_lock_contended(lock) \ -do { \ - /* Ensure prior stores are observed before we enter wfe. */ \ - smp_mb(); \ - while (!(smp_load_acquire(lock))) \ - wfe(); \ -} while (0) \ - -#define arch_mcs_spin_unlock_contended(lock) \ -do { \ - smp_store_release(lock, 1); \ - dsb_sev(); \ -} while (0) - -#endif /* CONFIG_SMP */ -#endif /* __ASM_MCS_LOCK_H */ diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 5c8ee5a541d2..57e9ad366d25 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generic-y +=3D early_ioremap.h -generic-y +=3D mcs_spinlock.h generic-y +=3D qrwlock.h generic-y +=3D qspinlock.h generic-y +=3D parport.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbu= ild index 3ece3c93fe08..37bbf99f66d4 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -2,4 +2,3 @@ generic-y +=3D extable.h generic-y +=3D iomap.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index f994c1daf9d4..a0198c12e339 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 generated-y +=3D syscall_table.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D vtime.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 1b720299deb1..8dbef73ce01d 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -3,5 +3,4 @@ generated-y +=3D syscall_table.h generic-y +=3D export.h generic-y +=3D extable.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D spinlock.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/a= sm/Kbuild index a055f5dbe00a..7615a27e0851 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -3,7 +3,6 @@ generated-y +=3D syscall_table.h generic-y +=3D cmpxchg.h generic-y +=3D extable.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D parport.h generic-y +=3D syscalls.h generic-y +=3D tlb.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index dee172716581..65cedca08771 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -9,7 +9,6 @@ generated-y +=3D unistd_nr_o32.h =20 generic-y +=3D export.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D parport.h generic-y +=3D qrwlock.h generic-y +=3D qspinlock.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 7fe7437555fb..5718eee9665c 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -2,6 +2,5 @@ generic-y +=3D cmpxchg.h generic-y +=3D extable.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D spinlock.h generic-y +=3D user.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index e6e7f74c8ac9..1f0c28d74c88 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -2,5 +2,4 @@ generated-y +=3D syscall_table_32.h generated-y +=3D syscall_table_64.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D user.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbu= ild index bcf95ce0964f..813a8c3405ad 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -4,7 +4,6 @@ generated-y +=3D syscall_table_64.h generated-y +=3D syscall_table_spu.h generic-y +=3D export.h generic-y +=3D kvm_types.h -generic-y +=3D mcs_spinlock.h generic-y +=3D qrwlock.h generic-y +=3D vtime.h generic-y +=3D early_ioremap.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 1a18d7b82f86..8b036a4ee2ca 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -7,4 +7,3 @@ generated-y +=3D unistd_nr.h generic-y +=3D asm-offsets.h generic-y +=3D export.h generic-y +=3D kvm_types.h -generic-y +=3D mcs_spinlock.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index fc44d9c88b41..3192f19bcf85 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 generated-y +=3D syscall_table.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D parport.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 0b9d98ced34a..f0b913f7ba05 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -3,4 +3,3 @@ generated-y +=3D syscall_table_32.h generated-y +=3D syscall_table_64.h generic-y +=3D export.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index b2d834a29f3a..04080c0c1aec 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -14,7 +14,6 @@ generic-y +=3D hw_irq.h generic-y +=3D irq_regs.h generic-y +=3D irq_work.h generic-y +=3D kdebug.h -generic-y +=3D mcs_spinlock.h generic-y +=3D mmiowb.h generic-y +=3D module.lds.h generic-y +=3D param.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 1e51650b79d7..beb7683f7b8f 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -10,4 +10,3 @@ generated-y +=3D xen-hypercalls.h =20 generic-y +=3D early_ioremap.h generic-y +=3D export.h -generic-y +=3D mcs_spinlock.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index fa07c686cbcc..29ae65cb68c2 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -2,7 +2,6 @@ generated-y +=3D syscall_table.h generic-y +=3D extable.h generic-y +=3D kvm_para.h -generic-y +=3D mcs_spinlock.h generic-y +=3D param.h generic-y +=3D parport.h generic-y +=3D qrwlock.h diff --git a/include/asm-generic/mcs_spinlock.h b/include/asm-generic/mcs_s= pinlock.h deleted file mode 100644 index 10cd4ffc6ba2..000000000000 --- a/include/asm-generic/mcs_spinlock.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __ASM_MCS_SPINLOCK_H -#define __ASM_MCS_SPINLOCK_H - -/* - * Architectures can define their own: - * - * arch_mcs_spin_lock_contended(l) - * arch_mcs_spin_unlock_contended(l) - * - * See kernel/locking/mcs_spinlock.c. - */ - -#endif /* __ASM_MCS_SPINLOCK_H */ diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h deleted file mode 100644 index 85251d8771d9..000000000000 --- a/kernel/locking/mcs_spinlock.h +++ /dev/null @@ -1,121 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * MCS lock defines - * - * This file contains the main data structure and API definitions of MCS l= ock. - * - * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lo= ck - * with the desirable properties of being fair, and with each cpu trying - * to acquire the lock spinning on a local variable. - * It avoids expensive cache bounces that common test-and-set spin-lock - * implementations incur. - */ -#ifndef __LINUX_MCS_SPINLOCK_H -#define __LINUX_MCS_SPINLOCK_H - -#include - -struct mcs_spinlock { - struct mcs_spinlock *next; - int locked; /* 1 if lock acquired */ - int count; /* nesting count, see qspinlock.c */ -}; - -#ifndef arch_mcs_spin_lock_contended -/* - * Using smp_cond_load_acquire() provides the acquire semantics - * required so that subsequent operations happen after the - * lock is acquired. Additionally, some architectures such as - * ARM64 would like to do spin-waiting instead of purely - * spinning, and smp_cond_load_acquire() provides that behavior. - */ -#define arch_mcs_spin_lock_contended(l) \ -do { \ - smp_cond_load_acquire(l, VAL); \ -} while (0) -#endif - -#ifndef arch_mcs_spin_unlock_contended -/* - * smp_store_release() provides a memory barrier to ensure all - * operations in the critical section has been completed before - * unlocking. - */ -#define arch_mcs_spin_unlock_contended(l) \ - smp_store_release((l), 1) -#endif - -/* - * Note: the smp_load_acquire/smp_store_release pair is not - * sufficient to form a full memory barrier across - * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. - * For applications that need a full barrier across multiple cpus - * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be - * used after mcs_lock. - */ - -/* - * In order to acquire the lock, the caller should declare a local node and - * pass a reference of the node to this function in addition to the lock. - * If the lock has already been acquired, then this will proceed to spin - * on this node->locked until the previous lock holder sets the node->lock= ed - * in mcs_spin_unlock(). - */ -static inline -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *prev; - - /* Init node */ - node->locked =3D 0; - node->next =3D NULL; - - /* - * We rely on the full barrier with global transitivity implied by the - * below xchg() to order the initialization stores above against any - * observation of @node. And to provide the ACQUIRE ordering associated - * with a LOCK primitive. - */ - prev =3D xchg(lock, node); - if (likely(prev =3D=3D NULL)) { - /* - * Lock acquired, don't need to set node->locked to 1. Threads - * only spin on its own node->locked value for lock acquisition. - * However, since this thread can immediately acquire the lock - * and does not proceed to spin on its own node->locked, this - * value won't be used. If a debug mode is needed to - * audit lock status, then set node->locked value here. - */ - return; - } - WRITE_ONCE(prev->next, node); - - /* Wait until the lock holder passes the lock down. */ - arch_mcs_spin_lock_contended(&node->locked); -} - -/* - * Releases the lock. The caller should pass in the corresponding node that - * was used to acquire the lock. - */ -static inline -void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *next =3D READ_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (likely(cmpxchg_release(lock, node, NULL) =3D=3D node)) - return; - /* Wait until the next pointer is set */ - while (!(next =3D READ_ONCE(node->next))) - cpu_relax(); - } - - /* Pass lock to next waiter. */ - arch_mcs_spin_unlock_contended(&next->locked); -} - -#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index a0fc21d99199..32f401e966ab 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -66,11 +66,10 @@ * */ =20 -#include "mcs_spinlock.h" #define MAX_NODES 4 =20 /* - * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in + * On 64-bit architectures, the qnode structure will be 16 bytes in * size and four of them will fit nicely in one 64-byte cacheline. For * pvqspinlock, however, we need more space for extra data. To accommodate * that, we insert two more long words to pad it up to 32 bytes. IOW, only @@ -80,7 +79,9 @@ * qspinlocks. */ struct qnode { - struct mcs_spinlock mcs; + struct qnode *next; + int locked; /* 1 if lock acquired */ + int count; /* nesting count */ #ifdef CONFIG_PARAVIRT_SPINLOCKS int cpu; u8 state; @@ -124,18 +125,18 @@ static inline __pure u32 encode_tail(int cpu, int idx) return tail; } =20 -static inline __pure struct mcs_spinlock *decode_tail(u32 tail) +static inline __pure struct qnode *decode_tail(u32 tail) { int cpu =3D (tail >> _Q_TAIL_CPU_OFFSET) - 1; int idx =3D (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; =20 - return per_cpu_ptr(&qnodes[idx].mcs, cpu); + return per_cpu_ptr(&qnodes[idx], cpu); } =20 static inline __pure -struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx) +struct qnode *grab_qnode(struct qnode *base, int idx) { - return &((struct qnode *)base + idx)->mcs; + return &base[idx]; } =20 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) @@ -271,13 +272,13 @@ static __always_inline void set_locked(struct qspinlo= ck *lock) * all the PV callbacks. */ =20 -static __always_inline void __pv_init_node(struct mcs_spinlock *node) { } -static __always_inline void __pv_wait_node(struct mcs_spinlock *node, - struct mcs_spinlock *prev) { } +static __always_inline void __pv_init_node(struct qnode *node) { } +static __always_inline void __pv_wait_node(struct qnode *node, + struct qnode *prev) { } static __always_inline void __pv_kick_node(struct qspinlock *lock, - struct mcs_spinlock *node) { } + struct qnode *node) { } static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, - struct mcs_spinlock *node) + struct qnode *node) { return 0; } =20 #define pv_enabled() false @@ -316,7 +317,7 @@ static __always_inline u32 __pv_wait_head_or_lock(stru= ct qspinlock *lock, */ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { - struct mcs_spinlock *prev, *next, *node; + struct qnode *prev, *next, *node; u32 old, tail; int idx; =20 @@ -399,7 +400,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, = u32 val) queue: lockevent_inc(lock_slowpath); pv_queue: - node =3D this_cpu_ptr(&qnodes[0].mcs); + node =3D this_cpu_ptr(&qnodes[0]); idx =3D node->count++; tail =3D encode_tail(smp_processor_id(), idx); =20 @@ -421,7 +422,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, = u32 val) goto release; } =20 - node =3D grab_mcs_node(node, idx); + node =3D grab_qnode(node, idx); =20 /* * Keep counts of non-zero index values: @@ -475,7 +476,8 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, = u32 val) WRITE_ONCE(prev->next, node); =20 pv_wait_node(node, prev); - arch_mcs_spin_lock_contended(&node->locked); + /* Wait for mcs node lock to be released */ + smp_cond_load_acquire(&node->locked, VAL); =20 /* * While waiting for the MCS lock, the next pointer may have @@ -554,7 +556,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, = u32 val) if (!next) next =3D smp_cond_load_relaxed(&node->next, (VAL)); =20 - arch_mcs_spin_unlock_contended(&next->locked); + smp_store_release(&next->locked, 1); /* unlock the mcs node lock */ pv_kick_node(lock, next); =20 release: @@ -563,7 +565,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, = u32 val) /* * release the node */ - __this_cpu_dec(qnodes[0].mcs.count); + __this_cpu_dec(qnodes[0].count); } EXPORT_SYMBOL(queued_spin_lock_slowpath); =20 diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock= _paravirt.h index b6a175155f36..cce3d3dde216 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -267,16 +267,12 @@ pv_wait_early(struct qnode *prev, int loop) } =20 /* - * Initialize the PV part of the mcs_spinlock node. + * Initialize the PV part of the qnode. */ -static void pv_init_node(struct mcs_spinlock *node) +static void pv_init_node(struct qnode *node) { - struct qnode *pn =3D (struct qnode *)node; - - BUILD_BUG_ON(sizeof(struct qnode) > sizeof(struct qnode)); - - pn->cpu =3D smp_processor_id(); - pn->state =3D vcpu_running; + node->cpu =3D smp_processor_id(); + node->state =3D vcpu_running; } =20 /* @@ -284,10 +280,8 @@ static void pv_init_node(struct mcs_spinlock *node) * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its * behalf. */ -static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *p= rev) +static void pv_wait_node(struct qnode *node, struct qnode *prev) { - struct qnode *pn =3D (struct qnode *)node; - struct qnode *pp =3D (struct qnode *)prev; int loop; bool wait_early; =20 @@ -295,7 +289,7 @@ static void pv_wait_node(struct mcs_spinlock *node, str= uct mcs_spinlock *prev) for (wait_early =3D false, loop =3D SPIN_THRESHOLD; loop; loop--) { if (READ_ONCE(node->locked)) return; - if (pv_wait_early(pp, loop)) { + if (pv_wait_early(prev, loop)) { wait_early =3D true; break; } @@ -303,20 +297,20 @@ static void pv_wait_node(struct mcs_spinlock *node, s= truct mcs_spinlock *prev) } =20 /* - * Order pn->state vs pn->locked thusly: + * Order node->state vs node->locked thusly: * - * [S] pn->state =3D vcpu_halted [S] next->locked =3D 1 - * MB MB - * [L] pn->locked [RmW] pn->state =3D vcpu_hashed + * [S] node->state =3D vcpu_halted [S] next->locked =3D 1 + * MB MB + * [L] node->locked [RmW] node->state =3D vcpu_hashed * * Matches the cmpxchg() from pv_kick_node(). */ - smp_store_mb(pn->state, vcpu_halted); + smp_store_mb(node->state, vcpu_halted); =20 if (!READ_ONCE(node->locked)) { lockevent_inc(pv_wait_node); lockevent_cond_inc(pv_wait_early, wait_early); - pv_wait(&pn->state, vcpu_halted); + pv_wait(&node->state, vcpu_halted); } =20 /* @@ -324,7 +318,7 @@ static void pv_wait_node(struct mcs_spinlock *node, str= uct mcs_spinlock *prev) * value so that pv_wait_head_or_lock() knows to not also try * to hash this lock. */ - cmpxchg(&pn->state, vcpu_halted, vcpu_running); + cmpxchg(&node->state, vcpu_halted, vcpu_running); =20 /* * If the locked flag is still not set after wakeup, it is a @@ -351,10 +345,8 @@ static void pv_wait_node(struct mcs_spinlock *node, st= ruct mcs_spinlock *prev) * such that they're waiting in pv_wait_head_or_lock(), this avoids a * wake/sleep cycle. */ -static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) +static void pv_kick_node(struct qspinlock *lock, struct qnode *node) { - struct qnode *pn =3D (struct qnode *)node; - /* * If the vCPU is indeed halted, advance its state to match that of * pv_wait_node(). If OTOH this fails, the vCPU was running and will @@ -363,15 +355,15 @@ static void pv_kick_node(struct qspinlock *lock, stru= ct mcs_spinlock *node) * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() * * The write to next->locked in arch_mcs_spin_unlock_contended() - * must be ordered before the read of pn->state in the cmpxchg() + * must be ordered before the read of node->state in the cmpxchg() * below for the code to work correctly. To guarantee full ordering * irrespective of the success or failure of the cmpxchg(), * a relaxed version with explicit barrier is used. The control - * dependency will order the reading of pn->state before any + * dependency will order the reading of node->state before any * subsequent writes. */ smp_mb__before_atomic(); - if (cmpxchg_relaxed(&pn->state, vcpu_halted, vcpu_hashed) + if (cmpxchg_relaxed(&node->state, vcpu_halted, vcpu_hashed) !=3D vcpu_halted) return; =20 @@ -383,7 +375,7 @@ static void pv_kick_node(struct qspinlock *lock, struct= mcs_spinlock *node) * needed. */ WRITE_ONCE(lock->locked, _Q_SLOW_VAL); - (void)pv_hash(lock, pn); + (void)pv_hash(lock, node); } =20 /* @@ -394,9 +386,8 @@ static void pv_kick_node(struct qspinlock *lock, struct= mcs_spinlock *node) * The current value of the lock will be returned for additional processin= g. */ static u32 -pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) +pv_wait_head_or_lock(struct qspinlock *lock, struct qnode *node) { - struct qnode *pn =3D (struct qnode *)node; struct qspinlock **lp =3D NULL; int waitcnt =3D 0; int loop; @@ -405,7 +396,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs= _spinlock *node) * If pv_kick_node() already advanced our state, we don't need to * insert ourselves into the hash table anymore. */ - if (READ_ONCE(pn->state) =3D=3D vcpu_hashed) + if (READ_ONCE(node->state) =3D=3D vcpu_hashed) lp =3D (struct qspinlock **)1; =20 /* @@ -418,7 +409,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs= _spinlock *node) * Set correct vCPU state to be used by queue node wait-early * mechanism. */ - WRITE_ONCE(pn->state, vcpu_running); + WRITE_ONCE(node->state, vcpu_running); =20 /* * Set the pending bit in the active lock spinning loop to @@ -434,7 +425,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs= _spinlock *node) =20 =20 if (!lp) { /* ONCE */ - lp =3D pv_hash(lock, pn); + lp =3D pv_hash(lock, node); =20 /* * We must hash before setting _Q_SLOW_VAL, such that @@ -458,7 +449,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs= _spinlock *node) goto gotlock; } } - WRITE_ONCE(pn->state, vcpu_hashed); + WRITE_ONCE(node->state, vcpu_hashed); lockevent_inc(pv_wait_head); lockevent_cond_inc(pv_wait_again, waitcnt); pv_wait(&lock->locked, _Q_SLOW_VAL); --=20 2.35.1 From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8B02BC43334 for ; Mon, 4 Jul 2022 14:38:59 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234730AbiGDOi5 (ORCPT ); Mon, 4 Jul 2022 10:38:57 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38328 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234408AbiGDOin (ORCPT ); Mon, 4 Jul 2022 10:38:43 -0400 Received: from mail-pj1-x102a.google.com (mail-pj1-x102a.google.com [IPv6:2607:f8b0:4864:20::102a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0B22DB4A4 for ; Mon, 4 Jul 2022 07:38:41 -0700 (PDT) Received: by mail-pj1-x102a.google.com with SMTP id b12-20020a17090a6acc00b001ec2b181c98so13962777pjm.4 for ; Mon, 04 Jul 2022 07:38:41 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=sT9qOYSF74DDh91Es5esuTKdKBvO8DYK5RYWFWTsIig=; b=ZJ1ZLy5nNkrTYNCiJ8uUZz3e0hPb6Pp1R+NN1nFblK1X4atDwRTksk0HsArzdQ9feT iwMHc9KBjlQFVZoqZxz6N4xGx0ACHGqw7bdu539OoADa41mH8mDRfJsbx6WN2eOrkn2/ ZGX7o1p6BwVa8ocfULlkOr5kFHKrZCY3lFiAuN2FL1Zuz5fnVLr5qRmiTf5PgU57sxar OmkNbhwxBTQJstpxtTSy8aAnNRmT1BqkNJCkiULGjmVAk2gD1ZgcTfUSHABmimAXs1d9 YdFfqq9Oe9BoyIcgbubzqJMQbIofcwNebLRX7Cc3okGuk6PQ33qmC01gRg8sNi77kpsA LqPQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=sT9qOYSF74DDh91Es5esuTKdKBvO8DYK5RYWFWTsIig=; b=OXpPcIGHLJqe5K3uI861pQORitAQEWDLAoLggEsDbcM/J517WkKFO4UsMTQZ+FFWwt DTajCrj+ts2P4WB34GMSPiCylkVuD8f3MgJEjTWISFdOHwbadbYPDWqaoBiUtf5y7wxf 5/NQPzo/EMZiXugCe9nofDUUs57905pzyk6WjMzW1F6YGWRHUxmtMiftELOcNMSuqmeg ff4w7qUzR6dTmzDShSXCXGot0L/G2HVp/nryvUXO7t6xdjqpnl4ie2fKDmQJXNW4xuz9 etBdWYu2FjrCZZpY7vhQEY5xolRf39GvEzrxvI4uLR8sZLzxH8lqnGgOtBfgmdUyPvSE 9x9w== X-Gm-Message-State: AJIora9oqxV6NfAItZlhkx7lJwj8HGthHxU68s+L59Jyld7mGd5M6nMu dhisEtNOzV+/7NgaMcu9OY0= X-Google-Smtp-Source: AGRyM1s8OsmPnAxfdjYw0hc6+ssdQM1L7uKkVAosGTQHotboKfbghzqN2RGAfWxDWVC8h7VYyjcx1A== X-Received: by 2002:a17:90b:1982:b0:1ec:e2fa:99ae with SMTP id mv2-20020a17090b198200b001ece2fa99aemr36639154pjb.228.1656945520631; Mon, 04 Jul 2022 07:38:40 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.38.37 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:38:40 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 03/13] locking/qspinlock: split common mcs queueing code into its own function Date: Tue, 5 Jul 2022 00:38:10 +1000 Message-Id: <20220704143820.3071004-4-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" pv qspinlocks jumps over a bunch of slowpath code directly to the queueing part. Split the queueing code into its own function and call it explicitly in each pv and !pv cases. This will help to untangle the two cases with subsequent changes. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 208 +++++++++++++++++++------------------ 1 file changed, 108 insertions(+), 100 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 32f401e966ab..7360d643de29 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -294,112 +294,14 @@ static __always_inline u32 __pv_wait_head_or_lock(s= truct qspinlock *lock, =20 #endif /* _GEN_PV_LOCK_SLOWPATH */ =20 -/** - * queued_spin_lock_slowpath - acquire the queued spinlock - * @lock: Pointer to queued spinlock structure - * @val: Current value of the queued spinlock 32-bit word - * - * (queue tail, pending bit, lock value) - * - * fast : slow : u= nlock - * : : - * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (= *,*,0) - * : | ^--------.------. / : - * : v \ \ | : - * pending : (0,1,1) +--> (0,1,0) \ | : - * : | ^--' | | : - * : v | | : - * uncontended : (n,x,y) +--> (n,0,0) --' | : - * queue : | ^--' | : - * : v | : - * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : - * queue : ^--' : - */ -void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) { struct qnode *prev, *next, *node; - u32 old, tail; + u32 val, old, tail; int idx; =20 BUILD_BUG_ON(CONFIG_NR_CPUS >=3D (1U << _Q_TAIL_CPU_BITS)); =20 - if (pv_enabled()) - goto pv_queue; - - if (virt_spin_lock(lock)) - return; - - /* - * Wait for in-progress pending->locked hand-overs with a bounded - * number of spins so that we guarantee forward progress. - * - * 0,1,0 -> 0,0,1 - */ - if (val =3D=3D _Q_PENDING_VAL) { - int cnt =3D _Q_PENDING_LOOPS; - val =3D atomic_cond_read_relaxed(&lock->val, - (VAL !=3D _Q_PENDING_VAL) || !cnt--); - } - - /* - * If we observe any contention; queue. - */ - if (val & ~_Q_LOCKED_MASK) - goto queue; - - /* - * trylock || pending - * - * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock - */ - val =3D queued_fetch_set_pending_acquire(lock); - - /* - * If we observe contention, there is a concurrent locker. - * - * Undo and queue; our setting of PENDING might have made the - * n,0,0 -> 0,0,0 transition fail and it will now be waiting - * on @next to become !NULL. - */ - if (unlikely(val & ~_Q_LOCKED_MASK)) { - - /* Undo PENDING if we set it. */ - if (!(val & _Q_PENDING_MASK)) - clear_pending(lock); - - goto queue; - } - - /* - * We're pending, wait for the owner to go away. - * - * 0,1,1 -> 0,1,0 - * - * this wait loop must be a load-acquire such that we match the - * store-release that clears the locked bit and create lock - * sequentiality; this is because not all - * clear_pending_set_locked() implementations imply full - * barriers. - */ - if (val & _Q_LOCKED_MASK) - atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK)); - - /* - * take ownership and clear the pending bit. - * - * 0,1,0 -> 0,0,1 - */ - clear_pending_set_locked(lock); - lockevent_inc(lock_pending); - return; - - /* - * End of pending bit optimistic spinning and beginning of MCS - * queuing. - */ -queue: - lockevent_inc(lock_slowpath); -pv_queue: node =3D this_cpu_ptr(&qnodes[0]); idx =3D node->count++; tail =3D encode_tail(smp_processor_id(), idx); @@ -567,6 +469,110 @@ void queued_spin_lock_slowpath(struct qspinlock *lock= , u32 val) */ __this_cpu_dec(qnodes[0].count); } + +/** + * queued_spin_lock_slowpath - acquire the queued spinlock + * @lock: Pointer to queued spinlock structure + * @val: Current value of the queued spinlock 32-bit word + * + * (queue tail, pending bit, lock value) + * + * fast : slow : u= nlock + * : : + * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (= *,*,0) + * : | ^--------.------. / : + * : v \ \ | : + * pending : (0,1,1) +--> (0,1,0) \ | : + * : | ^--' | | : + * : v | | : + * uncontended : (n,x,y) +--> (n,0,0) --' | : + * queue : | ^--' | : + * : v | : + * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : + * queue : ^--' : + */ +void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + if (pv_enabled()) { + queued_spin_lock_mcs_queue(lock); + return; + } + + if (virt_spin_lock(lock)) + return; + + /* + * Wait for in-progress pending->locked hand-overs with a bounded + * number of spins so that we guarantee forward progress. + * + * 0,1,0 -> 0,0,1 + */ + if (val =3D=3D _Q_PENDING_VAL) { + int cnt =3D _Q_PENDING_LOOPS; + val =3D atomic_cond_read_relaxed(&lock->val, + (VAL !=3D _Q_PENDING_VAL) || !cnt--); + } + + /* + * If we observe any contention; queue. + */ + if (val & ~_Q_LOCKED_MASK) + goto queue; + + /* + * trylock || pending + * + * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock + */ + val =3D queued_fetch_set_pending_acquire(lock); + + /* + * If we observe contention, there is a concurrent locker. + * + * Undo and queue; our setting of PENDING might have made the + * n,0,0 -> 0,0,0 transition fail and it will now be waiting + * on @next to become !NULL. + */ + if (unlikely(val & ~_Q_LOCKED_MASK)) { + + /* Undo PENDING if we set it. */ + if (!(val & _Q_PENDING_MASK)) + clear_pending(lock); + + goto queue; + } + + /* + * We're pending, wait for the owner to go away. + * + * 0,1,1 -> 0,1,0 + * + * this wait loop must be a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because not all + * clear_pending_set_locked() implementations imply full + * barriers. + */ + if (val & _Q_LOCKED_MASK) + atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK)); + + /* + * take ownership and clear the pending bit. + * + * 0,1,0 -> 0,0,1 + */ + clear_pending_set_locked(lock); + lockevent_inc(lock_pending); + return; + + /* + * End of pending bit optimistic spinning and beginning of MCS + * queuing. + */ +queue: + lockevent_inc(lock_slowpath); + queued_spin_lock_mcs_queue(lock); +} EXPORT_SYMBOL(queued_spin_lock_slowpath); =20 /* @@ -583,6 +589,8 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath); #undef pv_kick_node #undef pv_wait_head_or_lock =20 +#define queued_spin_lock_mcs_queue __pv_queued_spin_lock_mcs_queue + #undef queued_spin_lock_slowpath #define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath =20 --=20 2.35.1 From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 07610C433EF for ; Mon, 4 Jul 2022 14:39:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234702AbiGDOjB (ORCPT ); Mon, 4 Jul 2022 10:39:01 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38328 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234692AbiGDOiz (ORCPT ); Mon, 4 Jul 2022 10:38:55 -0400 Received: from mail-pj1-x102a.google.com (mail-pj1-x102a.google.com [IPv6:2607:f8b0:4864:20::102a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6FE73BE2C for ; Mon, 4 Jul 2022 07:38:44 -0700 (PDT) Received: by mail-pj1-x102a.google.com with SMTP id b12-20020a17090a6acc00b001ec2b181c98so13962891pjm.4 for ; Mon, 04 Jul 2022 07:38:44 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=sQX1btfhEReMr9q0OWNBdMs36tQogolU1yyeQWb4e7U=; b=Yc7QDKjooePhdGKRSV7xaCFguyrG37KEol4bV/gP7Mr3DjfOEpx0nmeWBAIDFiel8p 0m0MDZpQqvlI0daGuMkssDHv3nfjuuK/dN3N75WsnGt750oD5/KXiFx2RhihS8S6QPKj 5tvh4X/1VzYY6WHapgdG1J2pIHm50UlYLeb7YKXMbQ+9PbWyCsZVoBkb40yp1TuGWAp1 sO/gsPIDDdzRJlgKWBN23RarmM2XOe0a04uH+8F74ZXe0tm7J1fHKGm/yNArh3lO+twk sttbbtck7nm+Hwg9Evdwc4+ZoVlUoZpHFoHOMMzRaHWLlW7jthjOm8LXKKCPEmGTQpjy 5nwA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=sQX1btfhEReMr9q0OWNBdMs36tQogolU1yyeQWb4e7U=; b=DJhDKSMrlCFr+shhc9aoEEXNmYxG+34tHRRv17D+DOR8g+5Dvtahne508yf4vg6fRR TB+c/DMOhQx7HTQeksbMhKHOmCo0Eat30N6wTJ7NoQVxyQUhrTDAUYIfqdiMZW385Vwp TV1zNREoiziwtidnPXl6hoh27x93dGrOj+cZgjby7QivHogcHeqbYwZOU/T+lTmndURB fo+RJ+SngZog9L33kJm/voGcx2JKiAcc8/WwpF0rTR8PzbCt/RkFzcq49Wk6F2UEQ4nn nCHAt0Dgqw49gbDhrAgttbG9Zt5sjT7Z0L/aX9i+8fBV71VwMZu0iDnCsa6tEXQoyska Cucg== X-Gm-Message-State: AJIora+6wuNqdt5+WqBfhPJQzXxIrCsL7dRd5OAwaA9BJF4Vb4yfvKgq PNjQ66vK/BC9akwWQkJXQ5A= X-Google-Smtp-Source: AGRyM1vdrLtiKH7wi9hO4VmuuorYBQT8JVOOA0eRXxK5wrGoVnmGb1diriQk3204eQTmPIK10ASaNw== X-Received: by 2002:a17:90a:1305:b0:1ef:7b0a:366f with SMTP id h5-20020a17090a130500b001ef7b0a366fmr13103243pja.146.1656945523997; Mon, 04 Jul 2022 07:38:43 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.38.41 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:38:43 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 04/13] locking/qspinlock: move pv lock word helpers into qspinlock.c Date: Tue, 5 Jul 2022 00:38:11 +1000 Message-Id: <20220704143820.3071004-5-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" There is no real reason not to keep all the bit manipulation together. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 107 ++++++++++++++++------------ kernel/locking/qspinlock_paravirt.h | 51 ------------- 2 files changed, 63 insertions(+), 95 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 7360d643de29..8f2173e22479 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -141,7 +141,24 @@ struct qnode *grab_qnode(struct qnode *base, int idx) =20 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) =20 +/** + * set_pending - set the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,0,* -> *,1,* + * + * The pending bit is used by the queue head vCPU to indicate that it + * is actively spinning on the lock and no lock stealing is allowed. + */ +static __always_inline void set_pending(struct qspinlock *lock) +{ #if _Q_PENDING_BITS =3D=3D 8 + WRITE_ONCE(lock->pending, 1); +#else + atomic_or(_Q_PENDING_VAL, &lock->val); +#endif +} + /** * clear_pending - clear the pending bit. * @lock: Pointer to queued spinlock structure @@ -150,7 +167,11 @@ struct qnode *grab_qnode(struct qnode *base, int idx) */ static __always_inline void clear_pending(struct qspinlock *lock) { +#if _Q_PENDING_BITS =3D=3D 8 WRITE_ONCE(lock->pending, 0); +#else + atomic_andnot(_Q_PENDING_VAL, &lock->val); +#endif } =20 /** @@ -163,74 +184,72 @@ static __always_inline void clear_pending(struct qspi= nlock *lock) */ static __always_inline void clear_pending_set_locked(struct qspinlock *loc= k) { +#if _Q_PENDING_BITS =3D=3D 8 WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); +#else + atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); +#endif } =20 -/* - * xchg_tail - Put in the new queue tail code word & retrieve previous one - * @lock : Pointer to queued spinlock structure - * @tail : The new queue tail code word - * Return: The previous queue tail code word - * - * xchg(lock, tail), which heads an address dependency - * - * p,*,* -> n,*,* ; prev =3D xchg(lock, node) - */ -static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) -{ - /* - * We can use relaxed semantics since the caller ensures that the - * MCS node is properly initialized before updating the tail. - */ - return (u32)xchg_relaxed(&lock->tail, - tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; -} - -#else /* _Q_PENDING_BITS =3D=3D 8 */ - /** - * clear_pending - clear the pending bit. + * trylock_clear_pending - try to take ownership and clear the pending bit * @lock: Pointer to queued spinlock structure * - * *,1,* -> *,0,* + * 0,1,0 -> 0,0,1 */ -static __always_inline void clear_pending(struct qspinlock *lock) +static __always_inline int trylock_clear_pending(struct qspinlock *lock) { - atomic_andnot(_Q_PENDING_VAL, &lock->val); -} +#if _Q_PENDING_BITS =3D=3D 8 + return !READ_ONCE(lock->locked) && + (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL, + _Q_LOCKED_VAL) =3D=3D _Q_PENDING_VAL); +#else + int val =3D atomic_read(&lock->val); =20 -/** - * clear_pending_set_locked - take ownership and clear the pending bit. - * @lock: Pointer to queued spinlock structure - * - * *,1,0 -> *,0,1 - */ -static __always_inline void clear_pending_set_locked(struct qspinlock *loc= k) -{ - atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); + for (;;) { + int old, new; + + if (val & _Q_LOCKED_MASK) + break; + + /* + * Try to clear pending bit & set locked bit + */ + old =3D val; + new =3D (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; + val =3D atomic_cmpxchg_acquire(&lock->val, old, new); + + if (val =3D=3D old) + return 1; + } + return 0; +#endif } =20 -/** +/* * xchg_tail - Put in the new queue tail code word & retrieve previous one * @lock : Pointer to queued spinlock structure * @tail : The new queue tail code word * Return: The previous queue tail code word * - * xchg(lock, tail) + * xchg(lock, tail), which heads an address dependency * * p,*,* -> n,*,* ; prev =3D xchg(lock, node) */ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) { + /* + * We can use relaxed semantics since the caller ensures that the + * MCS node is properly initialized before updating the tail. + */ +#if _Q_PENDING_BITS =3D=3D 8 + return (u32)xchg_relaxed(&lock->tail, + tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; +#else u32 old, new, val =3D atomic_read(&lock->val); =20 for (;;) { new =3D (val & _Q_LOCKED_PENDING_MASK) | tail; - /* - * We can use relaxed semantics since the caller ensures that - * the MCS node is properly initialized before updating the - * tail. - */ old =3D atomic_cmpxchg_relaxed(&lock->val, val, new); if (old =3D=3D val) break; @@ -238,8 +257,8 @@ static __always_inline u32 xchg_tail(struct qspinlock *= lock, u32 tail) val =3D old; } return old; +#endif } -#endif /* _Q_PENDING_BITS =3D=3D 8 */ =20 /** * queued_fetch_set_pending_acquire - fetch the whole lock value and set p= ending diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock= _paravirt.h index cce3d3dde216..97385861adc2 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -95,57 +95,6 @@ static inline bool pv_hybrid_queued_unfair_trylock(struc= t qspinlock *lock) return false; } =20 -/* - * The pending bit is used by the queue head vCPU to indicate that it - * is actively spinning on the lock and no lock stealing is allowed. - */ -#if _Q_PENDING_BITS =3D=3D 8 -static __always_inline void set_pending(struct qspinlock *lock) -{ - WRITE_ONCE(lock->pending, 1); -} - -/* - * The pending bit check in pv_queued_spin_steal_lock() isn't a memory - * barrier. Therefore, an atomic cmpxchg_acquire() is used to acquire the - * lock just to be sure that it will get it. - */ -static __always_inline int trylock_clear_pending(struct qspinlock *lock) -{ - return !READ_ONCE(lock->locked) && - (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL, - _Q_LOCKED_VAL) =3D=3D _Q_PENDING_VAL); -} -#else /* _Q_PENDING_BITS =3D=3D 8 */ -static __always_inline void set_pending(struct qspinlock *lock) -{ - atomic_or(_Q_PENDING_VAL, &lock->val); -} - -static __always_inline int trylock_clear_pending(struct qspinlock *lock) -{ - int val =3D atomic_read(&lock->val); - - for (;;) { - int old, new; - - if (val & _Q_LOCKED_MASK) - break; - - /* - * Try to clear pending bit & set locked bit - */ - old =3D val; - new =3D (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; - val =3D atomic_cmpxchg_acquire(&lock->val, old, new); - - if (val =3D=3D old) - return 1; - } - return 0; -} -#endif /* _Q_PENDING_BITS =3D=3D 8 */ - /* * Lock and MCS node addresses hash table for fast lookup * --=20 2.35.1 From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 51CBAC43334 for ; Mon, 4 Jul 2022 14:39:14 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234759AbiGDOjM (ORCPT ); Mon, 4 Jul 2022 10:39:12 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38316 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234737AbiGDOi6 (ORCPT ); Mon, 4 Jul 2022 10:38:58 -0400 Received: from mail-pl1-x635.google.com (mail-pl1-x635.google.com [IPv6:2607:f8b0:4864:20::635]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 3F5B4DEE4 for ; Mon, 4 Jul 2022 07:38:48 -0700 (PDT) Received: by mail-pl1-x635.google.com with SMTP id c13so4986044pla.6 for ; Mon, 04 Jul 2022 07:38:48 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=84Y9XBk0SNt7frn8fBT+xPbBuKdfBamXXetJsZuNdKo=; b=Xds/lY9Q9ABZS5tRHRNEA5ciu7hreU6q4iFzlIxh+Lp5LoqFJz0heK0gCmzF4xE4cE MoEUqPoTwWc8xKraHxCOPJ45aMCoa7Hcef5EBu5I0moJRRAI/irspbl8nSrVdwXoJJ9p KNEEHuCXfWhn6jXurm2dO0i9g60C07FlDGBASiACQM0+ACWUe24u8/5qw9XXJrNLfwoo pQkBwhsuPD15Z2LcdeT9hUsXBdM/9U+pwp0PKnNOUTyB6ZVGGxmLL+KHuR4iUxRB6zKR V6uNH+1Pz0cXoSJjT+sPm9SWqgGmI1Z0l/E2cFICkwlIOAIChpwp3BRAgx5+Ei9G7ku1 Uxyw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=84Y9XBk0SNt7frn8fBT+xPbBuKdfBamXXetJsZuNdKo=; b=x00FBCRo88t3DXynwBWo5Y/a6P2UGB8s0/ApcKzYrZDwOwr+Qtdwp6pLXUf3isUogw g0Fr2gz1mnNZyJEM8AgdoRT2gHmWKSlTSF54K7vBZr3UOCJBTrx47lz2FrfKpU7wKsht Y+Rl6YVAARGC+3+bV4+YE+y4eedtxwLBF6CnKlhlDnS2sEWI9kJp1SYvhxv9dsD2kVTT Z+x+LzZeEcUhrrlTR0rv4QbQ2wK4tk+qP/NRlTK9TQjuYfY4I6QDYAXpoBRnL/Op8Zd/ jrIgImc/kX4F/Y8cnUAjCLCkWIGD6EPxiOfRCdIYIWx6+X+/lGycoBXRa7NHBFhqX1aj rADw== X-Gm-Message-State: AJIora+cxA4zexsxZt0XdNOtpXzFOD9blKayCnZaHREiFOu931eCC+Cb cbv1eAhvK6KVfSKgWAVeFwA= X-Google-Smtp-Source: AGRyM1uPMaiyyCyarBVdymWgr8Lm38UFlPexkFVEv0hOM9Ugp5w8kmtluo2cX2rI8CJ3jUreJy+mgg== X-Received: by 2002:a17:90b:1e0e:b0:1ec:b2a6:c9d0 with SMTP id pg14-20020a17090b1e0e00b001ecb2a6c9d0mr38149101pjb.230.1656945527609; Mon, 04 Jul 2022 07:38:47 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.38.44 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:38:47 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 05/13] locking/qspinlock: be less clever with the preprocessor Date: Tue, 5 Jul 2022 00:38:12 +1000 Message-Id: <20220704143820.3071004-6-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Stop qspinlock.c including itself and avoid most of the function renaming with the preprocessor. This is mostly done by having the common slowpath code take a 'bool paravirt' argument and adjusting code based on that. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 116 ++++++++++++---------------- kernel/locking/qspinlock_paravirt.h | 10 +-- 2 files changed, 52 insertions(+), 74 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 8f2173e22479..b96c58ca51de 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -11,8 +11,6 @@ * Peter Zijlstra */ =20 -#ifndef _GEN_PV_LOCK_SLOWPATH - #include #include #include @@ -285,35 +283,21 @@ static __always_inline void set_locked(struct qspinlo= ck *lock) WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); } =20 - -/* - * Generate the native code for queued_spin_unlock_slowpath(); provide NOP= s for - * all the PV callbacks. - */ - -static __always_inline void __pv_init_node(struct qnode *node) { } -static __always_inline void __pv_wait_node(struct qnode *node, - struct qnode *prev) { } -static __always_inline void __pv_kick_node(struct qspinlock *lock, - struct qnode *node) { } -static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, - struct qnode *node) - { return 0; } - -#define pv_enabled() false - -#define pv_init_node __pv_init_node -#define pv_wait_node __pv_wait_node -#define pv_kick_node __pv_kick_node -#define pv_wait_head_or_lock __pv_wait_head_or_lock - #ifdef CONFIG_PARAVIRT_SPINLOCKS -#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath -#endif - -#endif /* _GEN_PV_LOCK_SLOWPATH */ +#include "qspinlock_paravirt.h" +#else /* CONFIG_PARAVIRT_SPINLOCKS */ +static __always_inline void pv_init_node(struct qnode *node) { } +static __always_inline void pv_wait_node(struct qnode *node, + struct qnode *prev) { } +static __always_inline void pv_kick_node(struct qspinlock *lock, + struct qnode *node) { } +static __always_inline u32 pv_wait_head_or_lock(struct qspinlock *lock, + struct qnode *node) + { return 0; } +static __always_inline bool pv_hybrid_queued_unfair_trylock(struct qspinlo= ck *lock) { BUILD_BUG(); } +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ =20 -static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock) +static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool= paravirt) { struct qnode *prev, *next, *node; u32 val, old, tail; @@ -338,8 +322,13 @@ static inline void queued_spin_lock_mcs_queue(struct q= spinlock *lock) */ if (unlikely(idx >=3D MAX_NODES)) { lockevent_inc(lock_no_node); - while (!queued_spin_trylock(lock)) - cpu_relax(); + if (paravirt) { + while (!pv_hybrid_queued_unfair_trylock(lock)) + cpu_relax(); + } else { + while (!queued_spin_trylock(lock)) + cpu_relax(); + } goto release; } =20 @@ -359,15 +348,21 @@ static inline void queued_spin_lock_mcs_queue(struct = qspinlock *lock) =20 node->locked =3D 0; node->next =3D NULL; - pv_init_node(node); + if (paravirt) + pv_init_node(node); =20 /* * We touched a (possibly) cold cacheline in the per-cpu queue node; * attempt the trylock once more in the hope someone let go while we * weren't watching. */ - if (queued_spin_trylock(lock)) - goto release; + if (paravirt) { + if (pv_hybrid_queued_unfair_trylock(lock)) + goto release; + } else { + if (queued_spin_trylock(lock)) + goto release; + } =20 /* * Ensure that the initialisation of @node is complete before we @@ -396,7 +391,8 @@ static inline void queued_spin_lock_mcs_queue(struct qs= pinlock *lock) /* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node); =20 - pv_wait_node(node, prev); + if (paravirt) + pv_wait_node(node, prev); /* Wait for mcs node lock to be released */ smp_cond_load_acquire(&node->locked, VAL); =20 @@ -432,8 +428,10 @@ static inline void queued_spin_lock_mcs_queue(struct q= spinlock *lock) * If PV isn't active, 0 will be returned instead. * */ - if ((val =3D pv_wait_head_or_lock(lock, node))) - goto locked; + if (paravirt) { + if ((val =3D pv_wait_head_or_lock(lock, node))) + goto locked; + } =20 val =3D atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MA= SK)); =20 @@ -478,7 +476,8 @@ static inline void queued_spin_lock_mcs_queue(struct qs= pinlock *lock) next =3D smp_cond_load_relaxed(&node->next, (VAL)); =20 smp_store_release(&next->locked, 1); /* unlock the mcs node lock */ - pv_kick_node(lock, next); + if (paravirt) + pv_kick_node(lock, next); =20 release: trace_contention_end(lock, 0); @@ -510,13 +509,12 @@ static inline void queued_spin_lock_mcs_queue(struct = qspinlock *lock) * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : * queue : ^--' : */ +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath +#endif + void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { - if (pv_enabled()) { - queued_spin_lock_mcs_queue(lock); - return; - } - if (virt_spin_lock(lock)) return; =20 @@ -590,31 +588,17 @@ void queued_spin_lock_slowpath(struct qspinlock *lock= , u32 val) */ queue: lockevent_inc(lock_slowpath); - queued_spin_lock_mcs_queue(lock); + queued_spin_lock_mcs_queue(lock, false); } EXPORT_SYMBOL(queued_spin_lock_slowpath); =20 -/* - * Generate the paravirt code for queued_spin_unlock_slowpath(). - */ -#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS) -#define _GEN_PV_LOCK_SLOWPATH - -#undef pv_enabled -#define pv_enabled() true - -#undef pv_init_node -#undef pv_wait_node -#undef pv_kick_node -#undef pv_wait_head_or_lock - -#define queued_spin_lock_mcs_queue __pv_queued_spin_lock_mcs_queue - -#undef queued_spin_lock_slowpath -#define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath - -#include "qspinlock_paravirt.h" -#include "qspinlock.c" +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#undef queued_spin_lock_slowpath +void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + queued_spin_lock_mcs_queue(lock, true); +} +EXPORT_SYMBOL(__pv_queued_spin_lock_slowpath); =20 bool nopvspin __initdata; static __init int parse_nopvspin(char *arg) @@ -623,4 +607,4 @@ static __init int parse_nopvspin(char *arg) return 0; } early_param("nopvspin", parse_nopvspin); -#endif +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock= _paravirt.h index 97385861adc2..f1922e3a0f7d 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -1,8 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _GEN_PV_LOCK_SLOWPATH -#error "do not include this file" -#endif - #include #include #include @@ -50,9 +46,8 @@ enum vcpu_state { /* * Hybrid PV queued/unfair lock * - * By replacing the regular queued_spin_trylock() with the function below, - * it will be called once when a lock waiter enter the PV slowpath before - * being queued. + * This function is called once when a lock waiter enters the PV slowpath + * before being queued. * * The pending bit is set by the queue head vCPU of the MCS wait queue in * pv_wait_head_or_lock() to signal that it is ready to spin on the lock. @@ -71,7 +66,6 @@ enum vcpu_state { * queued lock (no lock starvation) and an unfair lock (good performance * on not heavily contended locks). */ -#define queued_spin_trylock(l) pv_hybrid_queued_unfair_trylock(l) static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) { /* --=20 2.35.1 From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 16B68C43334 for ; Mon, 4 Jul 2022 14:39:24 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234885AbiGDOjW (ORCPT ); Mon, 4 Jul 2022 10:39:22 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:39068 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234013AbiGDOjH (ORCPT ); Mon, 4 Jul 2022 10:39:07 -0400 Received: from mail-pj1-x102d.google.com (mail-pj1-x102d.google.com [IPv6:2607:f8b0:4864:20::102d]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C600EDFAD for ; Mon, 4 Jul 2022 07:38:52 -0700 (PDT) Received: by mail-pj1-x102d.google.com with SMTP id z12-20020a17090a7b8c00b001ef84000b8bso3599323pjc.1 for ; Mon, 04 Jul 2022 07:38:52 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=xeBRiVgBBuhSD1LKmUtK/e7xvztM0LYAXTqWVSqegAg=; b=oinHs9cK6K4jzrrhO7ObNOFbVf0WC8cyWVGePCQ+7t9qeIldO6FfT7QNwlMvXunD1H mFauhk9Y81NqIbs4SFcNvKQQUnR4L/UarVLkQli+L91XWnOuAZxJ7IjzWorh5KghK0F6 73z7Zl1lGYAg175C+LOzP8gHZbKzFeNgHpppNzdhm/6t5y4U7K7nEU9ZftUmqC3kl3Lb 1SI8Z+OSN3GATZJUA1sXOB67X99JUCfnEpJ/JQSLzZaMssQRRQYWVQOru4nuVWjdgnpZ SxIN9P5u1C/CltdISHLko2tu/60QjDPyY/PVo+EAt8grayJm5XOc88y8mrIPYOiRVbTX JzJw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=xeBRiVgBBuhSD1LKmUtK/e7xvztM0LYAXTqWVSqegAg=; b=LfkZTpY4UfaXAg41hk1wdnAhBMexmNzmWibVprj/ESNuoyd0bH4UrCWihZ1d9T7hj/ efPL0rJlHA3bMTIoxPULr0KBVQTOEHIbNB3lIRtzyoGqEb/YUPy8DcIaIr9viMzxcRDr KsiIvD2v5dJFwK6ztA2v2hT8KObbckHrT88nLEg8xR1kxepUYU3h4uOlsGULsh50Hlnw RNtXdNRjvHjItUE+SZ0B64RXLygrbdNAEgh3COHr6Z8SEQKTPi9tQK6dL2tEdKWQGS2I yYfjNViw0cwC6tERxarkbXi679lJ54itILwaGJTjjfTAbE6M8QPnH3QuuEuJCOYU5Ph+ boyA== X-Gm-Message-State: AJIora/fQMA7Ypoiuq9HgKvIK7Onq6tL78xA7O9Xh1tAnrqZ+JcKzWPT kHYJd08jUBnMZ6aq7cRJX+g= X-Google-Smtp-Source: AGRyM1uv9ECMZ41N71+2JfaDeypYjPpd3MZGdQtcinnjjBR8jwVHqiXOtqHGCr77VhcEFy/1kQV5wQ== X-Received: by 2002:a17:903:1108:b0:16a:a0ab:8f89 with SMTP id n8-20020a170903110800b0016aa0ab8f89mr36689793plh.12.1656945531440; Mon, 04 Jul 2022 07:38:51 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.38.48 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:38:50 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 06/13] locking/qspinlock: merge qspinlock_paravirt.h into qspinlock.c Date: Tue, 5 Jul 2022 00:38:13 +1000 Message-Id: <20220704143820.3071004-7-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" There isn't much reason to keep these separate. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 488 ++++++++++++++++++++++++++- kernel/locking/qspinlock_paravirt.h | 490 ---------------------------- 2 files changed, 487 insertions(+), 491 deletions(-) delete mode 100644 kernel/locking/qspinlock_paravirt.h diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index b96c58ca51de..9a235b0d98ca 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -284,7 +285,492 @@ static __always_inline void set_locked(struct qspinlo= ck *lock) } =20 #ifdef CONFIG_PARAVIRT_SPINLOCKS -#include "qspinlock_paravirt.h" +/* + * Implement paravirt qspinlocks; the general idea is to halt the vcpus in= stead + * of spinning them. + * + * This relies on the architecture to provide two paravirt hypercalls: + * + * pv_wait(u8 *ptr, u8 val) -- suspends the vcpu if *ptr =3D=3D val + * pv_kick(cpu) -- wakes a suspended vcpu + * + * Using these we implement __pv_queued_spin_lock_slowpath() and + * __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath()= and + * native_queued_spin_unlock(). + */ + +#define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET) + +/* + * Queue Node Adaptive Spinning + * + * A queue node vCPU will stop spinning if the vCPU in the previous node is + * not running. The one lock stealing attempt allowed at slowpath entry + * mitigates the slight slowdown for non-overcommitted guest with this + * aggressive wait-early mechanism. + * + * The status of the previous node will be checked at fixed interval + * controlled by PV_PREV_CHECK_MASK. This is to ensure that we won't + * pound on the cacheline of the previous node too heavily. + */ +#define PV_PREV_CHECK_MASK 0xff + +/* + * Queue node uses: vcpu_running & vcpu_halted. + * Queue head uses: vcpu_running & vcpu_hashed. + */ +enum vcpu_state { + vcpu_running =3D 0, + vcpu_halted, /* Used only in pv_wait_node */ + vcpu_hashed, /* =3D pv_hash'ed + vcpu_halted */ +}; + +/* + * Hybrid PV queued/unfair lock + * + * This function is called once when a lock waiter enters the PV slowpath + * before being queued. + * + * The pending bit is set by the queue head vCPU of the MCS wait queue in + * pv_wait_head_or_lock() to signal that it is ready to spin on the lock. + * When that bit becomes visible to the incoming waiters, no lock stealing + * is allowed. The function will return immediately to make the waiters + * enter the MCS wait queue. So lock starvation shouldn't happen as long + * as the queued mode vCPUs are actively running to set the pending bit + * and hence disabling lock stealing. + * + * When the pending bit isn't set, the lock waiters will stay in the unfair + * mode spinning on the lock unless the MCS wait queue is empty. In this + * case, the lock waiters will enter the queued mode slowpath trying to + * become the queue head and set the pending bit. + * + * This hybrid PV queued/unfair lock combines the best attributes of a + * queued lock (no lock starvation) and an unfair lock (good performance + * on not heavily contended locks). + */ +static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) +{ + /* + * Stay in unfair lock mode as long as queued mode waiters are + * present in the MCS wait queue but the pending bit isn't set. + */ + for (;;) { + int val =3D atomic_read(&lock->val); + + if (!(val & _Q_LOCKED_PENDING_MASK) && + (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) =3D=3D 0)) { + lockevent_inc(pv_lock_stealing); + return true; + } + if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK)) + break; + + cpu_relax(); + } + + return false; +} + +/* + * Lock and MCS node addresses hash table for fast lookup + * + * Hashing is done on a per-cacheline basis to minimize the need to access + * more than one cacheline. + * + * Dynamically allocate a hash table big enough to hold at least 4X the + * number of possible cpus in the system. Allocation is done on page + * granularity. So the minimum number of hash buckets should be at least + * 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page. + * + * Since we should not be holding locks from NMI context (very rare indeed= ) the + * max load factor is 0.75, which is around the point where open addressing + * breaks down. + * + */ +struct pv_hash_entry { + struct qspinlock *lock; + struct qnode *node; +}; + +#define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry)) +#define PV_HE_MIN (PAGE_SIZE / sizeof(struct pv_hash_entry)) + +static struct pv_hash_entry *pv_lock_hash; +static unsigned int pv_lock_hash_bits __read_mostly; + +/* + * Allocate memory for the PV qspinlock hash buckets + * + * This function should be called from the paravirt spinlock initialization + * routine. + */ +void __init __pv_init_lock_hash(void) +{ + int pv_hash_size =3D ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE); + + if (pv_hash_size < PV_HE_MIN) + pv_hash_size =3D PV_HE_MIN; + + /* + * Allocate space from bootmem which should be page-size aligned + * and hence cacheline aligned. + */ + pv_lock_hash =3D alloc_large_system_hash("PV qspinlock", + sizeof(struct pv_hash_entry), + pv_hash_size, 0, + HASH_EARLY | HASH_ZERO, + &pv_lock_hash_bits, NULL, + pv_hash_size, pv_hash_size); +} + +#define for_each_hash_entry(he, offset, hash) \ + for (hash &=3D ~(PV_HE_PER_LINE - 1), he =3D &pv_lock_hash[hash], offset = =3D 0; \ + offset < (1 << pv_lock_hash_bits); \ + offset++, he =3D &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash= _bits) - 1)]) + +static struct qspinlock **pv_hash(struct qspinlock *lock, struct qnode *no= de) +{ + unsigned long offset, hash =3D hash_ptr(lock, pv_lock_hash_bits); + struct pv_hash_entry *he; + int hopcnt =3D 0; + + for_each_hash_entry(he, offset, hash) { + hopcnt++; + if (!cmpxchg(&he->lock, NULL, lock)) { + WRITE_ONCE(he->node, node); + lockevent_pv_hop(hopcnt); + return &he->lock; + } + } + /* + * Hard assume there is a free entry for us. + * + * This is guaranteed by ensuring every blocked lock only ever consumes + * a single entry, and since we only have 4 nesting levels per CPU + * and allocated 4*nr_possible_cpus(), this must be so. + * + * The single entry is guaranteed by having the lock owner unhash + * before it releases. + */ + BUG(); +} + +static struct qnode *pv_unhash(struct qspinlock *lock) +{ + unsigned long offset, hash =3D hash_ptr(lock, pv_lock_hash_bits); + struct pv_hash_entry *he; + struct qnode *node; + + for_each_hash_entry(he, offset, hash) { + if (READ_ONCE(he->lock) =3D=3D lock) { + node =3D READ_ONCE(he->node); + WRITE_ONCE(he->lock, NULL); + return node; + } + } + /* + * Hard assume we'll find an entry. + * + * This guarantees a limited lookup time and is itself guaranteed by + * having the lock owner do the unhash -- IFF the unlock sees the + * SLOW flag, there MUST be a hash entry. + */ + BUG(); +} + +/* + * Return true if when it is time to check the previous node which is not + * in a running state. + */ +static inline bool +pv_wait_early(struct qnode *prev, int loop) +{ + if ((loop & PV_PREV_CHECK_MASK) !=3D 0) + return false; + + return READ_ONCE(prev->state) !=3D vcpu_running; +} + +/* + * Initialize the PV part of the qnode. + */ +static void pv_init_node(struct qnode *node) +{ + node->cpu =3D smp_processor_id(); + node->state =3D vcpu_running; +} + +/* + * Wait for node->locked to become true, halt the vcpu after a short spin. + * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its + * behalf. + */ +static void pv_wait_node(struct qnode *node, struct qnode *prev) +{ + int loop; + bool wait_early; + + for (;;) { + for (wait_early =3D false, loop =3D SPIN_THRESHOLD; loop; loop--) { + if (READ_ONCE(node->locked)) + return; + if (pv_wait_early(prev, loop)) { + wait_early =3D true; + break; + } + cpu_relax(); + } + + /* + * Order node->state vs node->locked thusly: + * + * [S] node->state =3D vcpu_halted [S] next->locked =3D 1 + * MB MB + * [L] node->locked [RmW] node->state =3D vcpu_hashed + * + * Matches the cmpxchg() from pv_kick_node(). + */ + smp_store_mb(node->state, vcpu_halted); + + if (!READ_ONCE(node->locked)) { + lockevent_inc(pv_wait_node); + lockevent_cond_inc(pv_wait_early, wait_early); + pv_wait(&node->state, vcpu_halted); + } + + /* + * If pv_kick_node() changed us to vcpu_hashed, retain that + * value so that pv_wait_head_or_lock() knows to not also try + * to hash this lock. + */ + cmpxchg(&node->state, vcpu_halted, vcpu_running); + + /* + * If the locked flag is still not set after wakeup, it is a + * spurious wakeup and the vCPU should wait again. However, + * there is a pretty high overhead for CPU halting and kicking. + * So it is better to spin for a while in the hope that the + * MCS lock will be released soon. + */ + lockevent_cond_inc(pv_spurious_wakeup, + !READ_ONCE(node->locked)); + } + + /* + * By now our node->locked should be 1 and our caller will not actually + * spin-wait for it. We do however rely on our caller to do a + * load-acquire for us. + */ +} + +/* + * Called after setting next->locked =3D 1 when we're the lock owner. + * + * Instead of waking the waiters stuck in pv_wait_node() advance their sta= te + * such that they're waiting in pv_wait_head_or_lock(), this avoids a + * wake/sleep cycle. + */ +static void pv_kick_node(struct qspinlock *lock, struct qnode *node) +{ + /* + * If the vCPU is indeed halted, advance its state to match that of + * pv_wait_node(). If OTOH this fails, the vCPU was running and will + * observe its next->locked value and advance itself. + * + * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() + * + * The write to next->locked in arch_mcs_spin_unlock_contended() + * must be ordered before the read of node->state in the cmpxchg() + * below for the code to work correctly. To guarantee full ordering + * irrespective of the success or failure of the cmpxchg(), + * a relaxed version with explicit barrier is used. The control + * dependency will order the reading of node->state before any + * subsequent writes. + */ + smp_mb__before_atomic(); + if (cmpxchg_relaxed(&node->state, vcpu_halted, vcpu_hashed) + !=3D vcpu_halted) + return; + + /* + * Put the lock into the hash table and set the _Q_SLOW_VAL. + * + * As this is the same vCPU that will check the _Q_SLOW_VAL value and + * the hash table later on at unlock time, no atomic instruction is + * needed. + */ + WRITE_ONCE(lock->locked, _Q_SLOW_VAL); + (void)pv_hash(lock, node); +} + +/* + * Wait for l->locked to become clear and acquire the lock; + * halt the vcpu after a short spin. + * __pv_queued_spin_unlock() will wake us. + * + * The current value of the lock will be returned for additional processin= g. + */ +static u32 +pv_wait_head_or_lock(struct qspinlock *lock, struct qnode *node) +{ + struct qspinlock **lp =3D NULL; + int waitcnt =3D 0; + int loop; + + /* + * If pv_kick_node() already advanced our state, we don't need to + * insert ourselves into the hash table anymore. + */ + if (READ_ONCE(node->state) =3D=3D vcpu_hashed) + lp =3D (struct qspinlock **)1; + + /* + * Tracking # of slowpath locking operations + */ + lockevent_inc(lock_slowpath); + + for (;; waitcnt++) { + /* + * Set correct vCPU state to be used by queue node wait-early + * mechanism. + */ + WRITE_ONCE(node->state, vcpu_running); + + /* + * Set the pending bit in the active lock spinning loop to + * disable lock stealing before attempting to acquire the lock. + */ + set_pending(lock); + for (loop =3D SPIN_THRESHOLD; loop; loop--) { + if (trylock_clear_pending(lock)) + goto gotlock; + cpu_relax(); + } + clear_pending(lock); + + + if (!lp) { /* ONCE */ + lp =3D pv_hash(lock, node); + + /* + * We must hash before setting _Q_SLOW_VAL, such that + * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock() + * we'll be sure to be able to observe our hash entry. + * + * [S] [Rmw] l->locked =3D=3D _Q_SLOW_VAL + * MB RMB + * [RmW] l->locked =3D _Q_SLOW_VAL [L] + * + * Matches the smp_rmb() in __pv_queued_spin_unlock(). + */ + if (xchg(&lock->locked, _Q_SLOW_VAL) =3D=3D 0) { + /* + * The lock was free and now we own the lock. + * Change the lock value back to _Q_LOCKED_VAL + * and unhash the table. + */ + WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); + WRITE_ONCE(*lp, NULL); + goto gotlock; + } + } + WRITE_ONCE(node->state, vcpu_hashed); + lockevent_inc(pv_wait_head); + lockevent_cond_inc(pv_wait_again, waitcnt); + pv_wait(&lock->locked, _Q_SLOW_VAL); + + /* + * Because of lock stealing, the queue head vCPU may not be + * able to acquire the lock before it has to wait again. + */ + } + + /* + * The cmpxchg() or xchg() call before coming here provides the + * acquire semantics for locking. The dummy ORing of _Q_LOCKED_VAL + * here is to indicate to the compiler that the value will always + * be nozero to enable better code optimization. + */ +gotlock: + return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL); +} + +/* + * PV versions of the unlock fastpath and slowpath functions to be used + * instead of queued_spin_unlock(). + */ +__visible void +__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) +{ + struct qnode *node; + + if (unlikely(locked !=3D _Q_SLOW_VAL)) { + WARN(!debug_locks_silent, + "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n", + (unsigned long)lock, atomic_read(&lock->val)); + return; + } + + /* + * A failed cmpxchg doesn't provide any memory-ordering guarantees, + * so we need a barrier to order the read of the node data in + * pv_unhash *after* we've read the lock being _Q_SLOW_VAL. + * + * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL. + */ + smp_rmb(); + + /* + * Since the above failed to release, this must be the SLOW path. + * Therefore start by looking up the blocked node and unhashing it. + */ + node =3D pv_unhash(lock); + + /* + * Now that we have a reference to the (likely) blocked qnode, + * release the lock. + */ + smp_store_release(&lock->locked, 0); + + /* + * At this point the memory pointed at by lock can be freed/reused, + * however we can still use the qnode to kick the CPU. + * The other vCPU may not really be halted, but kicking an active + * vCPU is harmless other than the additional latency in completing + * the unlock. + */ + lockevent_inc(pv_kick_unlock); + pv_kick(node->cpu); +} + +/* + * Include the architecture specific callee-save thunk of the + * __pv_queued_spin_unlock(). This thunk is put together with + * __pv_queued_spin_unlock() to make the callee-save thunk and the real un= lock + * function close to each other sharing consecutive instruction cachelines. + * Alternatively, architecture specific version of __pv_queued_spin_unlock= () + * can be defined. + */ +#include + +#ifndef __pv_queued_spin_unlock +__visible void __pv_queued_spin_unlock(struct qspinlock *lock) +{ + u8 locked; + + /* + * We must not unlock if SLOW, because in that case we must first + * unhash. Otherwise it would be possible to have multiple @lock + * entries, which would be BAD. + */ + locked =3D cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0); + if (likely(locked =3D=3D _Q_LOCKED_VAL)) + return; + + __pv_queued_spin_unlock_slowpath(lock, locked); +} +#endif + #else /* CONFIG_PARAVIRT_SPINLOCKS */ static __always_inline void pv_init_node(struct qnode *node) { } static __always_inline void pv_wait_node(struct qnode *node, diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock= _paravirt.h deleted file mode 100644 index f1922e3a0f7d..000000000000 --- a/kernel/locking/qspinlock_paravirt.h +++ /dev/null @@ -1,490 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include -#include -#include - -/* - * Implement paravirt qspinlocks; the general idea is to halt the vcpus in= stead - * of spinning them. - * - * This relies on the architecture to provide two paravirt hypercalls: - * - * pv_wait(u8 *ptr, u8 val) -- suspends the vcpu if *ptr =3D=3D val - * pv_kick(cpu) -- wakes a suspended vcpu - * - * Using these we implement __pv_queued_spin_lock_slowpath() and - * __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath()= and - * native_queued_spin_unlock(). - */ - -#define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET) - -/* - * Queue Node Adaptive Spinning - * - * A queue node vCPU will stop spinning if the vCPU in the previous node is - * not running. The one lock stealing attempt allowed at slowpath entry - * mitigates the slight slowdown for non-overcommitted guest with this - * aggressive wait-early mechanism. - * - * The status of the previous node will be checked at fixed interval - * controlled by PV_PREV_CHECK_MASK. This is to ensure that we won't - * pound on the cacheline of the previous node too heavily. - */ -#define PV_PREV_CHECK_MASK 0xff - -/* - * Queue node uses: vcpu_running & vcpu_halted. - * Queue head uses: vcpu_running & vcpu_hashed. - */ -enum vcpu_state { - vcpu_running =3D 0, - vcpu_halted, /* Used only in pv_wait_node */ - vcpu_hashed, /* =3D pv_hash'ed + vcpu_halted */ -}; - -/* - * Hybrid PV queued/unfair lock - * - * This function is called once when a lock waiter enters the PV slowpath - * before being queued. - * - * The pending bit is set by the queue head vCPU of the MCS wait queue in - * pv_wait_head_or_lock() to signal that it is ready to spin on the lock. - * When that bit becomes visible to the incoming waiters, no lock stealing - * is allowed. The function will return immediately to make the waiters - * enter the MCS wait queue. So lock starvation shouldn't happen as long - * as the queued mode vCPUs are actively running to set the pending bit - * and hence disabling lock stealing. - * - * When the pending bit isn't set, the lock waiters will stay in the unfair - * mode spinning on the lock unless the MCS wait queue is empty. In this - * case, the lock waiters will enter the queued mode slowpath trying to - * become the queue head and set the pending bit. - * - * This hybrid PV queued/unfair lock combines the best attributes of a - * queued lock (no lock starvation) and an unfair lock (good performance - * on not heavily contended locks). - */ -static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) -{ - /* - * Stay in unfair lock mode as long as queued mode waiters are - * present in the MCS wait queue but the pending bit isn't set. - */ - for (;;) { - int val =3D atomic_read(&lock->val); - - if (!(val & _Q_LOCKED_PENDING_MASK) && - (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) =3D=3D 0)) { - lockevent_inc(pv_lock_stealing); - return true; - } - if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK)) - break; - - cpu_relax(); - } - - return false; -} - -/* - * Lock and MCS node addresses hash table for fast lookup - * - * Hashing is done on a per-cacheline basis to minimize the need to access - * more than one cacheline. - * - * Dynamically allocate a hash table big enough to hold at least 4X the - * number of possible cpus in the system. Allocation is done on page - * granularity. So the minimum number of hash buckets should be at least - * 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page. - * - * Since we should not be holding locks from NMI context (very rare indeed= ) the - * max load factor is 0.75, which is around the point where open addressing - * breaks down. - * - */ -struct pv_hash_entry { - struct qspinlock *lock; - struct qnode *node; -}; - -#define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry)) -#define PV_HE_MIN (PAGE_SIZE / sizeof(struct pv_hash_entry)) - -static struct pv_hash_entry *pv_lock_hash; -static unsigned int pv_lock_hash_bits __read_mostly; - -/* - * Allocate memory for the PV qspinlock hash buckets - * - * This function should be called from the paravirt spinlock initialization - * routine. - */ -void __init __pv_init_lock_hash(void) -{ - int pv_hash_size =3D ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE); - - if (pv_hash_size < PV_HE_MIN) - pv_hash_size =3D PV_HE_MIN; - - /* - * Allocate space from bootmem which should be page-size aligned - * and hence cacheline aligned. - */ - pv_lock_hash =3D alloc_large_system_hash("PV qspinlock", - sizeof(struct pv_hash_entry), - pv_hash_size, 0, - HASH_EARLY | HASH_ZERO, - &pv_lock_hash_bits, NULL, - pv_hash_size, pv_hash_size); -} - -#define for_each_hash_entry(he, offset, hash) \ - for (hash &=3D ~(PV_HE_PER_LINE - 1), he =3D &pv_lock_hash[hash], offset = =3D 0; \ - offset < (1 << pv_lock_hash_bits); \ - offset++, he =3D &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash= _bits) - 1)]) - -static struct qspinlock **pv_hash(struct qspinlock *lock, struct qnode *no= de) -{ - unsigned long offset, hash =3D hash_ptr(lock, pv_lock_hash_bits); - struct pv_hash_entry *he; - int hopcnt =3D 0; - - for_each_hash_entry(he, offset, hash) { - hopcnt++; - if (!cmpxchg(&he->lock, NULL, lock)) { - WRITE_ONCE(he->node, node); - lockevent_pv_hop(hopcnt); - return &he->lock; - } - } - /* - * Hard assume there is a free entry for us. - * - * This is guaranteed by ensuring every blocked lock only ever consumes - * a single entry, and since we only have 4 nesting levels per CPU - * and allocated 4*nr_possible_cpus(), this must be so. - * - * The single entry is guaranteed by having the lock owner unhash - * before it releases. - */ - BUG(); -} - -static struct qnode *pv_unhash(struct qspinlock *lock) -{ - unsigned long offset, hash =3D hash_ptr(lock, pv_lock_hash_bits); - struct pv_hash_entry *he; - struct qnode *node; - - for_each_hash_entry(he, offset, hash) { - if (READ_ONCE(he->lock) =3D=3D lock) { - node =3D READ_ONCE(he->node); - WRITE_ONCE(he->lock, NULL); - return node; - } - } - /* - * Hard assume we'll find an entry. - * - * This guarantees a limited lookup time and is itself guaranteed by - * having the lock owner do the unhash -- IFF the unlock sees the - * SLOW flag, there MUST be a hash entry. - */ - BUG(); -} - -/* - * Return true if when it is time to check the previous node which is not - * in a running state. - */ -static inline bool -pv_wait_early(struct qnode *prev, int loop) -{ - if ((loop & PV_PREV_CHECK_MASK) !=3D 0) - return false; - - return READ_ONCE(prev->state) !=3D vcpu_running; -} - -/* - * Initialize the PV part of the qnode. - */ -static void pv_init_node(struct qnode *node) -{ - node->cpu =3D smp_processor_id(); - node->state =3D vcpu_running; -} - -/* - * Wait for node->locked to become true, halt the vcpu after a short spin. - * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its - * behalf. - */ -static void pv_wait_node(struct qnode *node, struct qnode *prev) -{ - int loop; - bool wait_early; - - for (;;) { - for (wait_early =3D false, loop =3D SPIN_THRESHOLD; loop; loop--) { - if (READ_ONCE(node->locked)) - return; - if (pv_wait_early(prev, loop)) { - wait_early =3D true; - break; - } - cpu_relax(); - } - - /* - * Order node->state vs node->locked thusly: - * - * [S] node->state =3D vcpu_halted [S] next->locked =3D 1 - * MB MB - * [L] node->locked [RmW] node->state =3D vcpu_hashed - * - * Matches the cmpxchg() from pv_kick_node(). - */ - smp_store_mb(node->state, vcpu_halted); - - if (!READ_ONCE(node->locked)) { - lockevent_inc(pv_wait_node); - lockevent_cond_inc(pv_wait_early, wait_early); - pv_wait(&node->state, vcpu_halted); - } - - /* - * If pv_kick_node() changed us to vcpu_hashed, retain that - * value so that pv_wait_head_or_lock() knows to not also try - * to hash this lock. - */ - cmpxchg(&node->state, vcpu_halted, vcpu_running); - - /* - * If the locked flag is still not set after wakeup, it is a - * spurious wakeup and the vCPU should wait again. However, - * there is a pretty high overhead for CPU halting and kicking. - * So it is better to spin for a while in the hope that the - * MCS lock will be released soon. - */ - lockevent_cond_inc(pv_spurious_wakeup, - !READ_ONCE(node->locked)); - } - - /* - * By now our node->locked should be 1 and our caller will not actually - * spin-wait for it. We do however rely on our caller to do a - * load-acquire for us. - */ -} - -/* - * Called after setting next->locked =3D 1 when we're the lock owner. - * - * Instead of waking the waiters stuck in pv_wait_node() advance their sta= te - * such that they're waiting in pv_wait_head_or_lock(), this avoids a - * wake/sleep cycle. - */ -static void pv_kick_node(struct qspinlock *lock, struct qnode *node) -{ - /* - * If the vCPU is indeed halted, advance its state to match that of - * pv_wait_node(). If OTOH this fails, the vCPU was running and will - * observe its next->locked value and advance itself. - * - * Matches with smp_store_mb() and cmpxchg() in pv_wait_node() - * - * The write to next->locked in arch_mcs_spin_unlock_contended() - * must be ordered before the read of node->state in the cmpxchg() - * below for the code to work correctly. To guarantee full ordering - * irrespective of the success or failure of the cmpxchg(), - * a relaxed version with explicit barrier is used. The control - * dependency will order the reading of node->state before any - * subsequent writes. - */ - smp_mb__before_atomic(); - if (cmpxchg_relaxed(&node->state, vcpu_halted, vcpu_hashed) - !=3D vcpu_halted) - return; - - /* - * Put the lock into the hash table and set the _Q_SLOW_VAL. - * - * As this is the same vCPU that will check the _Q_SLOW_VAL value and - * the hash table later on at unlock time, no atomic instruction is - * needed. - */ - WRITE_ONCE(lock->locked, _Q_SLOW_VAL); - (void)pv_hash(lock, node); -} - -/* - * Wait for l->locked to become clear and acquire the lock; - * halt the vcpu after a short spin. - * __pv_queued_spin_unlock() will wake us. - * - * The current value of the lock will be returned for additional processin= g. - */ -static u32 -pv_wait_head_or_lock(struct qspinlock *lock, struct qnode *node) -{ - struct qspinlock **lp =3D NULL; - int waitcnt =3D 0; - int loop; - - /* - * If pv_kick_node() already advanced our state, we don't need to - * insert ourselves into the hash table anymore. - */ - if (READ_ONCE(node->state) =3D=3D vcpu_hashed) - lp =3D (struct qspinlock **)1; - - /* - * Tracking # of slowpath locking operations - */ - lockevent_inc(lock_slowpath); - - for (;; waitcnt++) { - /* - * Set correct vCPU state to be used by queue node wait-early - * mechanism. - */ - WRITE_ONCE(node->state, vcpu_running); - - /* - * Set the pending bit in the active lock spinning loop to - * disable lock stealing before attempting to acquire the lock. - */ - set_pending(lock); - for (loop =3D SPIN_THRESHOLD; loop; loop--) { - if (trylock_clear_pending(lock)) - goto gotlock; - cpu_relax(); - } - clear_pending(lock); - - - if (!lp) { /* ONCE */ - lp =3D pv_hash(lock, node); - - /* - * We must hash before setting _Q_SLOW_VAL, such that - * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock() - * we'll be sure to be able to observe our hash entry. - * - * [S] [Rmw] l->locked =3D=3D _Q_SLOW_VAL - * MB RMB - * [RmW] l->locked =3D _Q_SLOW_VAL [L] - * - * Matches the smp_rmb() in __pv_queued_spin_unlock(). - */ - if (xchg(&lock->locked, _Q_SLOW_VAL) =3D=3D 0) { - /* - * The lock was free and now we own the lock. - * Change the lock value back to _Q_LOCKED_VAL - * and unhash the table. - */ - WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); - WRITE_ONCE(*lp, NULL); - goto gotlock; - } - } - WRITE_ONCE(node->state, vcpu_hashed); - lockevent_inc(pv_wait_head); - lockevent_cond_inc(pv_wait_again, waitcnt); - pv_wait(&lock->locked, _Q_SLOW_VAL); - - /* - * Because of lock stealing, the queue head vCPU may not be - * able to acquire the lock before it has to wait again. - */ - } - - /* - * The cmpxchg() or xchg() call before coming here provides the - * acquire semantics for locking. The dummy ORing of _Q_LOCKED_VAL - * here is to indicate to the compiler that the value will always - * be nozero to enable better code optimization. - */ -gotlock: - return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL); -} - -/* - * PV versions of the unlock fastpath and slowpath functions to be used - * instead of queued_spin_unlock(). - */ -__visible void -__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) -{ - struct qnode *node; - - if (unlikely(locked !=3D _Q_SLOW_VAL)) { - WARN(!debug_locks_silent, - "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n", - (unsigned long)lock, atomic_read(&lock->val)); - return; - } - - /* - * A failed cmpxchg doesn't provide any memory-ordering guarantees, - * so we need a barrier to order the read of the node data in - * pv_unhash *after* we've read the lock being _Q_SLOW_VAL. - * - * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL. - */ - smp_rmb(); - - /* - * Since the above failed to release, this must be the SLOW path. - * Therefore start by looking up the blocked node and unhashing it. - */ - node =3D pv_unhash(lock); - - /* - * Now that we have a reference to the (likely) blocked qnode, - * release the lock. - */ - smp_store_release(&lock->locked, 0); - - /* - * At this point the memory pointed at by lock can be freed/reused, - * however we can still use the qnode to kick the CPU. - * The other vCPU may not really be halted, but kicking an active - * vCPU is harmless other than the additional latency in completing - * the unlock. - */ - lockevent_inc(pv_kick_unlock); - pv_kick(node->cpu); -} - -/* - * Include the architecture specific callee-save thunk of the - * __pv_queued_spin_unlock(). This thunk is put together with - * __pv_queued_spin_unlock() to make the callee-save thunk and the real un= lock - * function close to each other sharing consecutive instruction cachelines. - * Alternatively, architecture specific version of __pv_queued_spin_unlock= () - * can be defined. - */ -#include - -#ifndef __pv_queued_spin_unlock -__visible void __pv_queued_spin_unlock(struct qspinlock *lock) -{ - u8 locked; - - /* - * We must not unlock if SLOW, because in that case we must first - * unhash. Otherwise it would be possible to have multiple @lock - * entries, which would be BAD. - */ - locked =3D cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0); - if (likely(locked =3D=3D _Q_LOCKED_VAL)) - return; - - __pv_queued_spin_unlock_slowpath(lock, locked); -} -#endif /* __pv_queued_spin_unlock */ --=20 2.35.1 From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 83C42C433EF for ; Mon, 4 Jul 2022 14:39:28 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234793AbiGDOj1 (ORCPT ); Mon, 4 Jul 2022 10:39:27 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38714 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234776AbiGDOjI (ORCPT ); Mon, 4 Jul 2022 10:39:08 -0400 Received: from mail-pj1-x1030.google.com (mail-pj1-x1030.google.com [IPv6:2607:f8b0:4864:20::1030]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A9D96E031 for ; Mon, 4 Jul 2022 07:38:55 -0700 (PDT) Received: by mail-pj1-x1030.google.com with SMTP id g16-20020a17090a7d1000b001ea9f820449so13942228pjl.5 for ; Mon, 04 Jul 2022 07:38:55 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=owwlOHOM/M2Hjs70rPTtHe++aJyVN3o8w2pwCkxxqq0=; b=MrbOx9tzCae265g71lGO2iWt1p6ZUOflc/eX6CAA6Hboid4BHQsTVTzNHDDWtjlfn8 lczlWxDAay456EV3swF5HYHGy0zc6NKrXtZSUttM5i20IMVAv1j/K1L08DDmgOcbvNcI JOz8EFmaX4ORJlNBZe5zul6voC07gE0LLkIRPaEwcKwJgBhcQR4AzkXefuNCPJY0ILzk sFV/qrHHaE0nRqcaWqdq+d1Le2sSe3BnqM3QSTtSrlz8cXQjkDvCCJkAqOPMNcZbyYzd UL677s5D0pTr+dMe11UCbMYwLnfwbt7zR52oFEF8QdUAc2fbDdAkEun/FW27skCj9b3v BaEA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=owwlOHOM/M2Hjs70rPTtHe++aJyVN3o8w2pwCkxxqq0=; b=Y2V7SbscJ1PH3emBITS8J+Cpn4smrWMzkCtEMTh2phy4fh+IXv3QjJ8psvQsrIUnGM 86TDW+mOtC8bE1A+eLr4ZfVn9133dASmxcN+P9oNPE5tIS7kfWZkMH9FzlggAVJezY5z Xl+DUdhHn1orjDnf8zJGVF2ZSMi11iTEA+X1/kFpGdF0Bo3/S0tzRojt0QIOF0zv8kmg 0sGAvXba8kx4na2+ufZsnsV8bNxtA2fBeXgxXw7KVs0XDIoE9gv0KwyOqiCBCGRGVVrw fPWsBb8hxD5SkHL01bDiTc3fAeMHNhKe4wJDolsL0ETMBHWn8sz4S3qt1mPnlVzHujHZ JHOg== X-Gm-Message-State: AJIora9tA+HEfM4grZwjodCw8Pwg1+nvbVAUiJF2Dquu2lbM9UMzFQbp K/8agvid4KcVyqwMQreTEso= X-Google-Smtp-Source: AGRyM1sIhLiaNynLLXRi9+Xx5oKQt+1cfdGJOgDFZkSV3sQZrSqGwPIanXXb2CMJoV1P5XmFQXfakA== X-Received: by 2002:a17:902:ea09:b0:16b:a264:381f with SMTP id s9-20020a170902ea0900b0016ba264381fmr28268635plg.150.1656945534882; Mon, 04 Jul 2022 07:38:54 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.38.51 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:38:54 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 07/13] locking/qspinlock: remove arch qspinlock_paravirt.h includes Date: Tue, 5 Jul 2022 00:38:14 +1000 Message-Id: <20220704143820.3071004-8-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/qspinlock_paravirt.h | 7 -- arch/x86/include/asm/qspinlock.h | 4 ++ arch/x86/include/asm/qspinlock_paravirt.h | 72 ------------------- arch/x86/kernel/paravirt-spinlocks.c | 71 ++++++++++++++++++ kernel/locking/qspinlock.c | 11 +-- 5 files changed, 76 insertions(+), 89 deletions(-) delete mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h delete mode 100644 arch/x86/include/asm/qspinlock_paravirt.h diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/i= nclude/asm/qspinlock_paravirt.h deleted file mode 100644 index 6b60e7736a47..000000000000 --- a/arch/powerpc/include/asm/qspinlock_paravirt.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H -#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H - -EXPORT_SYMBOL(__pv_queued_spin_unlock); - -#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */ diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinl= ock.h index d87451df480b..7f914fe7bc30 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -34,6 +34,10 @@ extern void __pv_queued_spin_lock_slowpath(struct qspinl= ock *lock, u32 val); extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lo= ck); extern bool nopvspin; =20 +#ifdef CONFIG_64BIT +#define __pv_queued_spin_unlock __pv_queued_spin_unlock +#endif + #define queued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/a= sm/qspinlock_paravirt.h deleted file mode 100644 index 892fd8c3a6f7..000000000000 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_QSPINLOCK_PARAVIRT_H -#define __ASM_QSPINLOCK_PARAVIRT_H - -#include - -/* - * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit - * registers. For i386, however, only 1 32-bit register needs to be saved - * and restored. So an optimized version of __pv_queued_spin_unlock() is - * hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit. - */ -#ifdef CONFIG_64BIT - -PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); -#define __pv_queued_spin_unlock __pv_queued_spin_unlock -#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" -#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slow= path" - -/* - * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock - * which combines the registers saving trunk and the body of the following - * C code: - * - * void __pv_queued_spin_unlock(struct qspinlock *lock) - * { - * u8 lockval =3D cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); - * - * if (likely(lockval =3D=3D _Q_LOCKED_VAL)) - * return; - * pv_queued_spin_unlock_slowpath(lock, lockval); - * } - * - * For x86-64, - * rdi =3D lock (first argument) - * rsi =3D lockval (second argument) - * rdx =3D internal variable (set to 0) - */ -asm (".pushsection .text;" - ".globl " PV_UNLOCK ";" - ".type " PV_UNLOCK ", @function;" - ".align 4,0x90;" - PV_UNLOCK ": " - ASM_ENDBR - FRAME_BEGIN - "push %rdx;" - "mov $0x1,%eax;" - "xor %edx,%edx;" - LOCK_PREFIX "cmpxchg %dl,(%rdi);" - "cmp $0x1,%al;" - "jne .slowpath;" - "pop %rdx;" - FRAME_END - ASM_RET - ".slowpath: " - "push %rsi;" - "movzbl %al,%esi;" - "call " PV_UNLOCK_SLOWPATH ";" - "pop %rsi;" - "pop %rdx;" - FRAME_END - ASM_RET - ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" - ".popsection"); - -#else /* CONFIG_64BIT */ - -extern void __pv_queued_spin_unlock(struct qspinlock *lock); -PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock); - -#endif /* CONFIG_64BIT */ -#endif diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravir= t-spinlocks.c index 9e1ea99ad9df..c6a107dfe20d 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -7,6 +7,7 @@ #include #include =20 +#include #include =20 __visible void __native_queued_spin_unlock(struct qspinlock *lock) @@ -15,6 +16,76 @@ __visible void __native_queued_spin_unlock(struct qspinl= ock *lock) } PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock); =20 +#ifdef CONFIG_PARAVIRT_SPINLOCKS +/* + * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit + * registers. For i386, however, only 1 32-bit register needs to be saved + * and restored. So an optimized version of __pv_queued_spin_unlock() is + * hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit. + */ +#ifdef CONFIG_64BIT + +__visible void +__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked); + +PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); +#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" +#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slow= path" + +/* + * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock + * which combines the registers saving trunk and the body of the following + * C code: + * + * void __pv_queued_spin_unlock(struct qspinlock *lock) + * { + * u8 lockval =3D cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); + * + * if (likely(lockval =3D=3D _Q_LOCKED_VAL)) + * return; + * pv_queued_spin_unlock_slowpath(lock, lockval); + * } + * + * For x86-64, + * rdi =3D lock (first argument) + * rsi =3D lockval (second argument) + * rdx =3D internal variable (set to 0) + */ +asm (".pushsection .text;" + ".globl " PV_UNLOCK ";" + ".type " PV_UNLOCK ", @function;" + ".align 4,0x90;" + PV_UNLOCK ": " + ASM_ENDBR + FRAME_BEGIN + "push %rdx;" + "mov $0x1,%eax;" + "xor %edx,%edx;" + LOCK_PREFIX "cmpxchg %dl,(%rdi);" + "cmp $0x1,%al;" + "jne .slowpath;" + "pop %rdx;" + FRAME_END + ASM_RET + ".slowpath: " + "push %rsi;" + "movzbl %al,%esi;" + "call " PV_UNLOCK_SLOWPATH ";" + "pop %rsi;" + "pop %rdx;" + FRAME_END + ASM_RET + ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" + ".popsection"); + +#else /* CONFIG_64BIT */ + +extern void __pv_queued_spin_unlock(struct qspinlock *lock); +PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock); + +#endif /* CONFIG_64BIT */ +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + bool pv_is_native_spin_unlock(void) { return pv_ops.lock.queued_spin_unlock.func =3D=3D diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 9a235b0d98ca..4045b5683ecb 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -743,16 +743,6 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *loc= k, u8 locked) pv_kick(node->cpu); } =20 -/* - * Include the architecture specific callee-save thunk of the - * __pv_queued_spin_unlock(). This thunk is put together with - * __pv_queued_spin_unlock() to make the callee-save thunk and the real un= lock - * function close to each other sharing consecutive instruction cachelines. - * Alternatively, architecture specific version of __pv_queued_spin_unlock= () - * can be defined. - */ -#include - #ifndef __pv_queued_spin_unlock __visible void __pv_queued_spin_unlock(struct qspinlock *lock) { @@ -769,6 +759,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock= *lock) =20 __pv_queued_spin_unlock_slowpath(lock, locked); } +EXPORT_SYMBOL(__pv_queued_spin_unlock); #endif =20 #else /* CONFIG_PARAVIRT_SPINLOCKS */ --=20 2.35.1 From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C68A5C433EF for ; Mon, 4 Jul 2022 14:39:34 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234752AbiGDOjb (ORCPT ); Mon, 4 Jul 2022 10:39:31 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38672 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233418AbiGDOjK (ORCPT ); Mon, 4 Jul 2022 10:39:10 -0400 Received: from mail-pj1-x102d.google.com (mail-pj1-x102d.google.com [IPv6:2607:f8b0:4864:20::102d]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B9DFCBCAD for ; Mon, 4 Jul 2022 07:38:58 -0700 (PDT) Received: by mail-pj1-x102d.google.com with SMTP id z12-20020a17090a7b8c00b001ef84000b8bso3599706pjc.1 for ; Mon, 04 Jul 2022 07:38:58 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=oO4F7kwO8y9QHU02QhioohHedy1C18WaXBdKdAF3Ltk=; b=Q2peP0gXv+c3eGxO6wDx4MoXp0liVVhjJ31SqnVVaZdghfXRi8RTaTaA9NjDyfdBwx J0CZoTJtMqy24sTcyg2aRjefrCwKIoqTDkoAzxby/84hi+67hNbTYSYMAIVaTLui1IyX RnPZnKjegeIOOpYcNpGY+rORD37tL4ExnFkpGQNSkV3BSirLzhiYrEPnrkM79B+cmPe2 08BPeKGDOa2sI+RRMPT2T8N5b0gbMMygp7yAf2JcX61Z1nALHOkYaGyiprpvx+daFqjd 6ngOShkM3NIA4qZMlWVx474VhPQ/WYT8uzDDmebMRvkaXYIpDJzoHpzc6bdk2ihW4JXN SNHQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=oO4F7kwO8y9QHU02QhioohHedy1C18WaXBdKdAF3Ltk=; b=owynWqFUQCWQ+K4OFDUxuv3TFbtv7I7iPyUqdrGtydTFKPvuICFA5Ohjh61lKjIh/L QKwydAUMsPLuMeBA9cE2XuAduOhiCGbRaUHhuOF5M0K9ivQZ2DNjEZepePbNu1lH4g7h hwGOWXGqlnsJj3AKhvoIlZ8AYx7NyUWRBc622j+nOy7/iRtujjhvhICzBEYZEEDA7Wj+ JjT86m308A8V0FMGLjaKf8oHmph6T3snZmkw95THeMqedGYhOGUoOexx9P8UM8UDFsSq IsRywRUZ2C4FIgoj3qI5youR5izIWyrXY0ZOgHDhP49+i+VZv86GGSzAGYgM5E9ap8mS c/fg== X-Gm-Message-State: AJIora+NtM94s+jOneVAgqk9uH9IcBJj+EoMEb6RHe0RWgTit4g6SEQ4 sRl7ivqN7udxjYWiUNQUV44= X-Google-Smtp-Source: AGRyM1vzzhDNtkhWn+uv50KjwbVoeDowVndmdtM//KNYzeWqYbWDNHuOW9M5kw784Fw8ILzstnQwQw== X-Received: by 2002:a17:90b:4d05:b0:1e0:b53:f4a3 with SMTP id mw5-20020a17090b4d0500b001e00b53f4a3mr37301979pjb.3.1656945538452; Mon, 04 Jul 2022 07:38:58 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.38.55 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:38:58 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 08/13] locking/qspinlock: stop renaming queued_spin_lock_slowpath to native_queued_spin_lock_slowpath Date: Tue, 5 Jul 2022 00:38:15 +1000 Message-Id: <20220704143820.3071004-9-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" The native version can simply be queued_spin_lock_slowpath, and the paravirt version __pv_queued_spin_lock_slowpath, which is as they are named in the C code. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/qspinlock.h | 38 ++++++++++------------------ arch/x86/include/asm/qspinlock.h | 14 +++++++--- arch/x86/kernel/paravirt.c | 2 +- kernel/locking/qspinlock.c | 8 +----- 4 files changed, 26 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/as= m/qspinlock.h index b676c4fb90fd..dd231c756233 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -7,42 +7,32 @@ =20 #define _Q_PENDING_LOOPS (1 << 9) /* not tuned */ =20 -#ifdef CONFIG_PARAVIRT_SPINLOCKS -extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 v= al); -extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val= ); -extern void __pv_queued_spin_unlock(struct qspinlock *lock); +void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +void __pv_queued_spin_unlock(struct qspinlock *lock); =20 -static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lo= ck, u32 val) +static __always_inline void queued_spin_lock(struct qspinlock *lock) { - if (!is_shared_processor()) - native_queued_spin_lock_slowpath(lock, val); + u32 val =3D 0; + + if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL))) + return; + + if (!IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) || !is_shared_processor()) + queued_spin_lock_slowpath(lock, val); else __pv_queued_spin_lock_slowpath(lock, val); } +#define queued_spin_lock queued_spin_lock =20 -#define queued_spin_unlock queued_spin_unlock static inline void queued_spin_unlock(struct qspinlock *lock) { - if (!is_shared_processor()) + if (!IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) || !is_shared_processor()) smp_store_release(&lock->locked, 0); else __pv_queued_spin_unlock(lock); } - -#else -extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -#endif - -static __always_inline void queued_spin_lock(struct qspinlock *lock) -{ - u32 val =3D 0; - - if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL))) - return; - - queued_spin_lock_slowpath(lock, val); -} -#define queued_spin_lock queued_spin_lock +#define queued_spin_unlock queued_spin_unlock =20 #ifdef CONFIG_PARAVIRT_SPINLOCKS #define SPIN_THRESHOLD (1<<15) /* not tuned */ diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinl= ock.h index 7f914fe7bc30..603ad61e9dfe 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -28,7 +28,7 @@ static __always_inline u32 queued_fetch_set_pending_acqui= re(struct qspinlock *lo } =20 #ifdef CONFIG_PARAVIRT_SPINLOCKS -extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 v= al); +extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); extern void __pv_init_lock_hash(void); extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val= ); extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lo= ck); @@ -38,7 +38,6 @@ extern bool nopvspin; #define __pv_queued_spin_unlock __pv_queued_spin_unlock #endif =20 -#define queued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock * @lock : Pointer to queued spinlock structure @@ -50,22 +49,29 @@ static inline void native_queued_spin_unlock(struct qsp= inlock *lock) smp_store_release(&lock->locked, 0); } =20 -static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 v= al) +static inline void queued_spin_lock(struct qspinlock *lock) { + int val =3D 0; + + if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) + return; + pv_queued_spin_lock_slowpath(lock, val); } +#define queued_spin_lock queued_spin_lock =20 static inline void queued_spin_unlock(struct qspinlock *lock) { kcsan_release(); pv_queued_spin_unlock(lock); } +#define queued_spin_unlock queued_spin_unlock =20 -#define vcpu_is_preempted vcpu_is_preempted static inline bool vcpu_is_preempted(long cpu) { return pv_vcpu_is_preempted(cpu); } +#define vcpu_is_preempted vcpu_is_preempted #endif =20 #ifdef CONFIG_PARAVIRT diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 7ca2d46c08cc..f03e2962afa8 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -384,7 +384,7 @@ struct paravirt_patch_template pv_ops =3D { #if defined(CONFIG_PARAVIRT_SPINLOCKS) /* Lock ops. */ #ifdef CONFIG_SMP - .lock.queued_spin_lock_slowpath =3D native_queued_spin_lock_slowpath, + .lock.queued_spin_lock_slowpath =3D queued_spin_lock_slowpath, .lock.queued_spin_unlock =3D PV_CALLEE_SAVE(__native_queued_spin_unlock), .lock.wait =3D paravirt_nop, diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 4045b5683ecb..412b83040bac 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -295,8 +295,7 @@ static __always_inline void set_locked(struct qspinlock= *lock) * pv_kick(cpu) -- wakes a suspended vcpu * * Using these we implement __pv_queued_spin_lock_slowpath() and - * __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath()= and - * native_queued_spin_unlock(). + * __pv_queued_spin_unlock(). */ =20 #define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET) @@ -986,10 +985,6 @@ static inline void queued_spin_lock_mcs_queue(struct q= spinlock *lock, bool parav * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : * queue : ^--' : */ -#ifdef CONFIG_PARAVIRT_SPINLOCKS -#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath -#endif - void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { if (virt_spin_lock(lock)) @@ -1070,7 +1065,6 @@ void queued_spin_lock_slowpath(struct qspinlock *lock= , u32 val) EXPORT_SYMBOL(queued_spin_lock_slowpath); =20 #ifdef CONFIG_PARAVIRT_SPINLOCKS -#undef queued_spin_lock_slowpath void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { queued_spin_lock_mcs_queue(lock, true); --=20 2.35.1 From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6CF87C43334 for ; Mon, 4 Jul 2022 14:39:44 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234902AbiGDOjm (ORCPT ); Mon, 4 Jul 2022 10:39:42 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:39210 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234826AbiGDOjM (ORCPT ); Mon, 4 Jul 2022 10:39:12 -0400 Received: from mail-pg1-x533.google.com (mail-pg1-x533.google.com [IPv6:2607:f8b0:4864:20::533]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AD4F6E0AA for ; Mon, 4 Jul 2022 07:39:02 -0700 (PDT) Received: by mail-pg1-x533.google.com with SMTP id s206so9112695pgs.3 for ; Mon, 04 Jul 2022 07:39:02 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=yGW1NrTsUYC2Caw9DUBG+Ft8zg9T88zFyoolpZns3jU=; b=LxLX9txAP1I2ZyM9IadVNNwAEJhkH6guqgpiBx1/i4W56bj5m8uVGAgawSaIRE9v/z kbFlddZhORjCpGFuKAjw/Ga3hskx1zW8w1Ltn08LBjW3+H/XQlmllzL6XDHcSv8zLUB6 x/aBWIA/v5EGZ+RYnuxMEfyruuIFBP4Jz7tpVOTnZUIClvpKoUCTS/XR5YXtEVd8APpU uhs7FTLnbWJlRHs2IAhLgkic0eBMxvPJZSeWGUh0jWW26cGtvrAUI/e3hKv6FYzwSXye kdObCFZ6NRwHQE3ZEYPU1D3DqTyoI6Aq5o6PyvjhMSaiQmfURZPoVU1w89CIPNoMgkRR psTA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=yGW1NrTsUYC2Caw9DUBG+Ft8zg9T88zFyoolpZns3jU=; b=P0LnuBYkoAINcrFUomDURfiXLTNr5KvVOh6HNEslQtmpaAZLiCZI2k7+byyD/5fqhN kcW15mVDnyb/9mpYdgdvp68U2K0ikHpcLBn0NXybuOze2UBLjPWGLOavG/Crm/0i/ljx E+o+OgUk4AualJkEwZf/hx0VKs1IXYeH2Is3VciUiTvt3PwgNkLpIlLX7j4mJx5vcWTh Dg2Y0+YIuxyqMtH6iJ97KutzePppOlI8Y6vkEWoOZ8Bui95VfhavvRUIrytSIcdc54mp lcMITxx4CSPLSboLQQItb+1cAbJW6NAM5eqPpixUthCIopswo4TYH2X/mU3m14CnjUsf srmg== X-Gm-Message-State: AJIora8nfZjyPw33dZvLFGymOS2s7VIxNvRKOmpmGH7emL7Ce6RyUUh5 XVo7UwdG7PXsEZNmi/QhxddTA0MfsZo= X-Google-Smtp-Source: AGRyM1sIkImJ5OuS/BoTitHe4MSEwjyDwNCocDIjczgZeIrykkF8k7kA7nnjjIfALvoIfw1RStN1aA== X-Received: by 2002:a63:2014:0:b0:411:90a4:6e9c with SMTP id g20-20020a632014000000b0041190a46e9cmr22616835pgg.500.1656945542233; Mon, 04 Jul 2022 07:39:02 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.38.58 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:39:01 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 09/13] locking/qspinlock: rename __pv_init_lock_hash to pv_spinlocks_init Date: Tue, 5 Jul 2022 00:38:16 +1000 Message-Id: <20220704143820.3071004-10-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" The caller should not have to be aware what the implementation initialisation does. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/qspinlock.h | 7 ------- arch/powerpc/include/asm/spinlock.h | 2 +- arch/x86/hyperv/hv_spinlock.c | 2 +- arch/x86/include/asm/qspinlock.h | 1 - arch/x86/kernel/kvm.c | 2 +- arch/x86/xen/spinlock.c | 2 +- include/asm-generic/qspinlock.h | 6 ++++++ kernel/locking/qspinlock.c | 2 +- 8 files changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/as= m/qspinlock.h index dd231c756233..39c1c7f80579 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -53,13 +53,6 @@ static __always_inline void pv_kick(int cpu) prod_cpu(cpu); } =20 -extern void __pv_init_lock_hash(void); - -static inline void pv_spinlocks_init(void) -{ - __pv_init_lock_hash(); -} - #endif =20 /* diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm= /spinlock.h index bd75872a6334..7dafca8e3f02 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -13,7 +13,7 @@ /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() =20 -#ifndef CONFIG_PARAVIRT_SPINLOCKS +#ifndef CONFIG_PPC_QUEUED_SPINLOCKS static inline void pv_spinlocks_init(void) { } #endif =20 diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c index 91cfe698bde0..c7b5c3211c79 100644 --- a/arch/x86/hyperv/hv_spinlock.c +++ b/arch/x86/hyperv/hv_spinlock.c @@ -76,7 +76,7 @@ void __init hv_init_spinlocks(void) } pr_info("PV spinlocks enabled\n"); =20 - __pv_init_lock_hash(); + pv_spinlocks_init(); pv_ops.lock.queued_spin_lock_slowpath =3D __pv_queued_spin_lock_slowpath; pv_ops.lock.queued_spin_unlock =3D PV_CALLEE_SAVE(__pv_queued_spin_unlock= ); pv_ops.lock.wait =3D hv_qlock_wait; diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinl= ock.h index 603ad61e9dfe..9a03fcc1b2b7 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -29,7 +29,6 @@ static __always_inline u32 queued_fetch_set_pending_acqui= re(struct qspinlock *lo =20 #ifdef CONFIG_PARAVIRT_SPINLOCKS extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_init_lock_hash(void); extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val= ); extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lo= ck); extern bool nopvspin; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 1a3658f7e6d9..98a2c4d3e91d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -1106,7 +1106,7 @@ void __init kvm_spinlock_init(void) =20 pr_info("PV spinlocks enabled\n"); =20 - __pv_init_lock_hash(); + pv_spinlocks_init(); pv_ops.lock.queued_spin_lock_slowpath =3D __pv_queued_spin_lock_slowpath; pv_ops.lock.queued_spin_unlock =3D PV_CALLEE_SAVE(__pv_queued_spin_unlock); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 043c73dfd2c9..5145c4aec4ea 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -135,7 +135,7 @@ void __init xen_init_spinlocks(void) } printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); =20 - __pv_init_lock_hash(); + pv_spinlocks_init(); pv_ops.lock.queued_spin_lock_slowpath =3D __pv_queued_spin_lock_slowpath; pv_ops.lock.queued_spin_unlock =3D PV_CALLEE_SAVE(__pv_queued_spin_unlock); diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinloc= k.h index 995513fa2690..e0fb29ee1adc 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -147,4 +147,10 @@ static __always_inline bool virt_spin_lock(struct qspi= nlock *lock) #define arch_spin_trylock(l) queued_spin_trylock(l) #define arch_spin_unlock(l) queued_spin_unlock(l) =20 +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void pv_spinlocks_init(void); +#else +static inline void pv_spinlocks_init(void) { } +#endif + #endif /* __ASM_GENERIC_QSPINLOCK_H */ diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 412b83040bac..1d5b3443772c 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -403,7 +403,7 @@ static unsigned int pv_lock_hash_bits __read_mostly; * This function should be called from the paravirt spinlock initialization * routine. */ -void __init __pv_init_lock_hash(void) +void __init pv_spinlocks_init(void) { int pv_hash_size =3D ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE); =20 --=20 2.35.1 From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id EF182C43334 for ; Mon, 4 Jul 2022 14:39:47 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232519AbiGDOjq (ORCPT ); Mon, 4 Jul 2022 10:39:46 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:39286 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234851AbiGDOjO (ORCPT ); Mon, 4 Jul 2022 10:39:14 -0400 Received: from mail-pf1-x42e.google.com (mail-pf1-x42e.google.com [IPv6:2607:f8b0:4864:20::42e]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 76F0FEE14 for ; Mon, 4 Jul 2022 07:39:06 -0700 (PDT) Received: by mail-pf1-x42e.google.com with SMTP id g7so4349052pfb.10 for ; Mon, 04 Jul 2022 07:39:06 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=uNqvcKgo1+oe7KfjMD3BFid/MOvuDJRgw+z7dh4MpHg=; b=Rd5z8PJTAKeEtJxaGAFVCmXX2MztOoimi+HPammxoFcq0O6PnXW8JfFMtMvJp6haX4 N0zM5V0tjiNuFmpytb88WSTWOEnwywt0C6qP1EwTsCDXaX2dInDbnhxIPMzTowViP1bP 2ZZ8/AF9jglxfgKev3K+HAyB5vDOTavQMonRtdUVliPhnplSEZp0J+Phxb256DXUP32N E8SeZl8emqoBykXys17iDBDX+yrMLpLX/i11Cw85FEGSHDf827M85rRFl6o1PwCBvFQI 9B2n8L/5a6tMWzeIMGJL0m/ktzJI4GTnown2/j8btygQDhMAxvhMCNWQyAWIb5NajTOL xu8Q== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=uNqvcKgo1+oe7KfjMD3BFid/MOvuDJRgw+z7dh4MpHg=; b=isu1nOLYGfYFzFcXhJFwd2JtDsqIHBT/fUZ0qJdE4ZKfGSzhECBmFOxz0LJLXLPlyl TKxVNVNzeaQf+3/NjllU3mVHYaoPvoV8xTXTWH9n3SUm2Uydyqp0k+QP2z8bdwIRIN24 ctTqCHWotBzv7kZRxXdBgXArEBnUrbHUvD4GHU2lLPjAPZnFpVizdn7oDRU3ItVjULQg LzAL7SyhKpSKny75WMIzz+RIHMc+e8bgFXPiYOrG3p4lEz+9U+njT2++ERgEjamaDyBr jw5ueOxd46hnQHn2YgkBcvlXd5kvRY4BnUjZk8tJ3sD5Kx4R0X3dV/DUU3+dDe0H0hmD APoQ== X-Gm-Message-State: AJIora/HAYSdTl8i5xPvbXpXVA+Y82kj6DG2dArzxsh4r4iCJS1lvLCw PU2uamfWMrx4U0WHlLPOLBE= X-Google-Smtp-Source: AGRyM1tcLyesZ4LqtcBwRRAPb3M8h4Y5icA5Z0GODpjLR7sKbKnD1cHPy3oh6p6Bf2qsw7Zg6nFYVA== X-Received: by 2002:a63:6c42:0:b0:3fe:465:7a71 with SMTP id h63-20020a636c42000000b003fe04657a71mr25203899pgc.101.1656945545785; Mon, 04 Jul 2022 07:39:05 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.39.02 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:39:05 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 10/13] locking/qspinlock: paravirt use simple trylock in case idx overflows Date: Tue, 5 Jul 2022 00:38:17 +1000 Message-Id: <20220704143820.3071004-11-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" The mcs node overflow fallback locking case does not have to use pv_hybrid_queued_unfair_trylock as the trylock, which reduces the differences between pv and !pv cases. This was likely an artifact of function renaming making it use the pv trylock, which is not a bug but it is unexpected. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 1d5b3443772c..cef0ca7d94e1 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -798,13 +798,8 @@ static inline void queued_spin_lock_mcs_queue(struct q= spinlock *lock, bool parav */ if (unlikely(idx >=3D MAX_NODES)) { lockevent_inc(lock_no_node); - if (paravirt) { - while (!pv_hybrid_queued_unfair_trylock(lock)) - cpu_relax(); - } else { - while (!queued_spin_trylock(lock)) - cpu_relax(); - } + while (!queued_spin_trylock(lock)) + cpu_relax(); goto release; } =20 --=20 2.35.1 From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id B189CC433EF for ; Mon, 4 Jul 2022 14:39:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234786AbiGDOjy (ORCPT ); Mon, 4 Jul 2022 10:39:54 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38720 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234880AbiGDOjV (ORCPT ); Mon, 4 Jul 2022 10:39:21 -0400 Received: from mail-pg1-x533.google.com (mail-pg1-x533.google.com [IPv6:2607:f8b0:4864:20::533]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B4D7EDE9D for ; Mon, 4 Jul 2022 07:39:09 -0700 (PDT) Received: by mail-pg1-x533.google.com with SMTP id 145so9074859pga.12 for ; Mon, 04 Jul 2022 07:39:09 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=QLW2WGrfQ+doVQUh0tyNSCbcxWX6xMHulu7+fCXRtVo=; b=ZuwmvAD45+wZDM3dtZpxq7LZCjkFamhBYDnZJYSDO7ZdrXvAI65ppdF25k0P2CcK2j 7O9WWzwoomQL7M3D9mjSp3BiQLJpRYg6cPPuM/C5539DROqrFDQ0aQgDHRz8FNWj5GR2 +pF+para3eTvzPxsqSVdcNQC1Gia5sz5DB9M5VSuMi4HoIAyeB5tzJmMxom0SRUjVvSw HAnOzgxYHOCKPk399VLcP2bRn60d0wr/c6yS6ZQMcyroMW8QBLeJdjc3YUJvW27nP6dF SPB9MaRZe7mBcOFsXAZx9UMLrY+WR2EjT631DcEinbji3qEDf50cIjZGvOml2VABe5iS 2IRA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=QLW2WGrfQ+doVQUh0tyNSCbcxWX6xMHulu7+fCXRtVo=; b=ZAji8QG8XKW/m41xPwtxPs9RKkZuhoqpMA677rLShDPolg7agUKpMM/VF+OH9PrDh5 BefufV3rJDXcEgBHUhGoUB3ROt3WF29+1BGoKfBtMR9274gmW0aUstrcNteGPFelDjxU uSTxWYpYQu3cyfh4R32S6RONVBCLzbxyX0zdXVTi7zdo5MHN1l0RodXULcGqTIl7UWSC pQrfwOalMOKKH816/6ElxOENLJ+86HEAwOKPzvlTxLSkriDnrup4PJmcisXSJ65VM1CW hCiwq9RWfJDgTkePthobH3tH+NtRGJ9g1MYtCb3o3GOYCJvBRwrhYqdYZdPfO59dKzuW AiKA== X-Gm-Message-State: AJIora/59n8SRoRqot2ejgPoH0lCHVE2mAzkXdfKVQr5EJSz2Mrgp7Hd nGgBe5HxUjGPgazhurUGYlE= X-Google-Smtp-Source: AGRyM1u+1lQNYQetil64G1gF6mIS/beTLZhsEjukcaF81n5cqkdmrpDxLlKNapApZstM6OS0vxZCOA== X-Received: by 2002:a63:90c1:0:b0:40d:3be3:5609 with SMTP id a184-20020a6390c1000000b0040d3be35609mr25018499pge.421.1656945549245; Mon, 04 Jul 2022 07:39:09 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.39.06 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:39:08 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 11/13] locking/qspinlock: Use queued_spin_trylock in pv_hybrid_queued_unfair_trylock Date: Tue, 5 Jul 2022 00:38:18 +1000 Message-Id: <20220704143820.3071004-12-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Rather than open-code it as necessitated by the old function-renaming code generation that rendered queued_spin_trylock unavailable to use here. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index cef0ca7d94e1..9db168753124 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -357,7 +357,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(stru= ct qspinlock *lock) int val =3D atomic_read(&lock->val); =20 if (!(val & _Q_LOCKED_PENDING_MASK) && - (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) =3D=3D 0)) { + queued_spin_trylock(lock)) { lockevent_inc(pv_lock_stealing); return true; } --=20 2.35.1 From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id DECABC43334 for ; Mon, 4 Jul 2022 14:40:00 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233895AbiGDOj5 (ORCPT ); Mon, 4 Jul 2022 10:39:57 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:38658 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234716AbiGDOjW (ORCPT ); Mon, 4 Jul 2022 10:39:22 -0400 Received: from mail-pg1-x533.google.com (mail-pg1-x533.google.com [IPv6:2607:f8b0:4864:20::533]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id DCC68DF1D for ; Mon, 4 Jul 2022 07:39:12 -0700 (PDT) Received: by mail-pg1-x533.google.com with SMTP id s206so9112984pgs.3 for ; Mon, 04 Jul 2022 07:39:12 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=E3vZsiiHeLdWlriZeHfJHXGRio5cCE6j1rfnsSoRqsw=; b=kHaopAihyE/nO3+lgLXeoOf+vHGg+EnpfYQEIqMtVAgok67vFg7RkZFX5AurqesDmd Va/7byZR7sPYp8k6miFOW3N8wSbxVWAD3s09CECDgXjF649bgie+9Sf4pWomXlHzDPcg hxIN2mpdlwFfinWQyv1NIAO7brvnHW10AqdDnSoS9Gq9yrvcRUE7FrD0UA1EEJagcM3Y 1jLvs0fitrTHXYeq82/awdHX836trFf9XaN48SqpFYeU7w6hHcyT0T1U40k5Duu/UXl9 FO0BwuuYYLuloQsbSUncZCz4+Rn6jJgosrBvooEn136CBYKAtd5Ggeh9Ghg2IBv0tzeo M82w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=E3vZsiiHeLdWlriZeHfJHXGRio5cCE6j1rfnsSoRqsw=; b=jypzjSecoLbGuIs9oD+ePJ6R2jLDiAKKFe7QAAeSJw82pdmmWCm9gYPbd9KvVYpyLb 17uTCwhYnOaUrT284b3elc3tA1umx0VN32tqbrv+H4Firn6Chu2zrANq4wcHSfJLCJNE OVhRKqd0+QA0lGJjM47zkN35NPMpuO6NOWVH2At/EBylPWBHSc1ZLD9bei20tSV8M2jU 8dLb4sZ8m37Wknolc74/WtfzCSXlFzSQQZ2U//XS3T5IO8kga/4B4SG//wni1iLx9MZm U30vJKU49YmtUmJkPMu01uz+Gsu9QGkOY5aKwM81Ucw42MQ9wxwDZUSK44Ll3fmYQbVw KBVw== X-Gm-Message-State: AJIora+WbVfiLWUUkMS5vFINsWH/nTA1bmSsQ9LUhkphX3DSD0gQMx6E cArKa/fEfE2HmZsOgyis3sOrN+OLs/4= X-Google-Smtp-Source: AGRyM1tGv39z6d1FCO96hMCb97W+vTPVOrGKw/EqQYXSN6tTY/EMQsWHdDI6HjPBMUBU5xJzZAr/FA== X-Received: by 2002:a63:2c89:0:b0:411:66bf:9efc with SMTP id s131-20020a632c89000000b0041166bf9efcmr25069380pgs.589.1656945552669; Mon, 04 Jul 2022 07:39:12 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.39.09 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:39:12 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 12/13] locking/qspinlock: separate pv_wait_node from the non-paravirt path Date: Tue, 5 Jul 2022 00:38:19 +1000 Message-Id: <20220704143820.3071004-13-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" pv_wait_node waits until node->locked is non-zero, no need for the pv case to wait again by also executing the !pv code path. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 9db168753124..19e2f286be0a 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -862,10 +862,11 @@ static inline void queued_spin_lock_mcs_queue(struct = qspinlock *lock, bool parav /* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node); =20 + /* Wait for mcs node lock to be released */ if (paravirt) pv_wait_node(node, prev); - /* Wait for mcs node lock to be released */ - smp_cond_load_acquire(&node->locked, VAL); + else + smp_cond_load_acquire(&node->locked, VAL); =20 /* * While waiting for the MCS lock, the next pointer may have --=20 2.35.1 From nobody Sun Apr 26 10:47:28 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 35300C433EF for ; Mon, 4 Jul 2022 14:40:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232791AbiGDOkE (ORCPT ); Mon, 4 Jul 2022 10:40:04 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:39176 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233593AbiGDOjY (ORCPT ); Mon, 4 Jul 2022 10:39:24 -0400 Received: from mail-pj1-x102d.google.com (mail-pj1-x102d.google.com [IPv6:2607:f8b0:4864:20::102d]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9A987DF80 for ; Mon, 4 Jul 2022 07:39:16 -0700 (PDT) Received: by mail-pj1-x102d.google.com with SMTP id o31-20020a17090a0a2200b001ef7bd037bbso4500482pjo.0 for ; Mon, 04 Jul 2022 07:39:16 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=jFQES6MzxIEnn6pHmTEvGwQZiT4CpKtMxO5c3rFPXa8=; b=AX273GvMt9xAf208ZIqoMvp+Kbema18tuZkWCr+YL6wSVcdjkFVC3ZMVhIH9dZjBIb rAHTYLyFAkuUzTz48SBcxWusHWNeVav/bO4CvMcREaJxmpn+9ZUmGcQOuyZP3X4XpqPO NgdKgVr7aglmmERiB1ypzzqrFrwOnmnnYKVjby9//9yMC1YmAWWYcmgc7Sy88kv8viMv vnQL5mWx0MzXkSuG/Z1azCJSlFynUb8z4egBRwdPyjkr+Kr9M3ErhbTFD/vldG4RM0JX agTHks697JUgF2r5e+CQDPhsaHk7/StgIW6nHTXAGqfF/t9e1rFbePhUcbu1l2DGRBj2 IJYg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=jFQES6MzxIEnn6pHmTEvGwQZiT4CpKtMxO5c3rFPXa8=; b=RwUcOL0Q/AY0HFTNus7ihGgipYQ1uHoYx/tWKZhyOnrpnAtJdLnwySP+psLfSumX7O Iz9rt0LyPtT6oSoCFOpRbf8Np/m/lPzJAGqdLuzry9oBhxMbid4BESktjIywWQw3LUUO xcozlQg3Ghzn8lp7VXC+SRkvDGZT2ak+bwYlPToXp1lDJA510Mn9uzt3u9/f5TieIin7 VVqm5qAH8bNxYsrDXct+fTkvtmdl5nkJlt0Wxx7poyTWW2HaAV55bZcnbifklG745yr3 W8JMJCkg58tgMfZQ1xteS6KROOJfp4LZ5Vbz76fHBqlRRf2ycFMAwGG5vGLTD3w6x77w fdLg== X-Gm-Message-State: AJIora8+qLSK0wQlvNCEEdTgeToLT7qKe9eoHacT3c8HzGtOjDO4CVg3 4alQuIQ7GajQUek/f50tzlU= X-Google-Smtp-Source: AGRyM1uIrnAlF+1lfBP5W4tioNYEYP0f/J2LJDXc6zLr8CjOGkw8OeajkVXek2Drj1iEZQWFE5ZMmw== X-Received: by 2002:a17:903:18e:b0:16a:5f55:c4e with SMTP id z14-20020a170903018e00b0016a5f550c4emr35786598plg.33.1656945556142; Mon, 04 Jul 2022 07:39:16 -0700 (PDT) Received: from bobo.ozlabs.ibm.com ([203.221.247.188]) by smtp.gmail.com with ESMTPSA id u13-20020a63454d000000b0040d2224ae04sm20534365pgk.76.2022.07.04.07.39.13 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 04 Jul 2022 07:39:15 -0700 (PDT) From: Nicholas Piggin To: Peter Zijlstra Cc: Nicholas Piggin , Ingo Molnar , Will Deacon , Waiman Long , Boqun Feng , linux-kernel@vger.kernel.org Subject: [PATCH 13/13] locking/qspinlock: simplify pv_wait_head_or_lock calling scheme Date: Tue, 5 Jul 2022 00:38:20 +1000 Message-Id: <20220704143820.3071004-14-npiggin@gmail.com> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20220704143820.3071004-1-npiggin@gmail.com> References: <20220704143820.3071004-1-npiggin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" pv_wait_head_or_lock returns the lock word value ORed with a constant, which was done to achieve a constant folding compiler optimisation when the code was generated for both pv and !pv cases. This is no longer necessary with the explicit paravirt test, so make the calling convention simpler. Signed-off-by: Nicholas Piggin --- kernel/locking/qspinlock.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 19e2f286be0a..97f95bedfa66 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -609,8 +609,7 @@ static void pv_kick_node(struct qspinlock *lock, struct= qnode *node) * * The current value of the lock will be returned for additional processin= g. */ -static u32 -pv_wait_head_or_lock(struct qspinlock *lock, struct qnode *node) +static void pv_wait_head_or_lock(struct qspinlock *lock, struct qnode *nod= e) { struct qspinlock **lp =3D NULL; int waitcnt =3D 0; @@ -642,7 +641,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct qno= de *node) set_pending(lock); for (loop =3D SPIN_THRESHOLD; loop; loop--) { if (trylock_clear_pending(lock)) - goto gotlock; + return; /* got lock */ cpu_relax(); } clear_pending(lock); @@ -670,7 +669,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct qno= de *node) */ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); WRITE_ONCE(*lp, NULL); - goto gotlock; + return; /* got lock */ } } WRITE_ONCE(node->state, vcpu_hashed); @@ -686,12 +685,8 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct qn= ode *node) =20 /* * The cmpxchg() or xchg() call before coming here provides the - * acquire semantics for locking. The dummy ORing of _Q_LOCKED_VAL - * here is to indicate to the compiler that the value will always - * be nozero to enable better code optimization. + * acquire semantics for locking. */ -gotlock: - return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL); } =20 /* @@ -767,9 +762,8 @@ static __always_inline void pv_wait_node(struct qnode *= node, struct qnode *prev) { } static __always_inline void pv_kick_node(struct qspinlock *lock, struct qnode *node) { } -static __always_inline u32 pv_wait_head_or_lock(struct qspinlock *lock, - struct qnode *node) - { return 0; } +static __always_inline void pv_wait_head_or_lock(struct qspinlock *lock, + struct qnode *node) { } static __always_inline bool pv_hybrid_queued_unfair_trylock(struct qspinlo= ck *lock) { BUILD_BUG(); } #endif /* CONFIG_PARAVIRT_SPINLOCKS */ =20 @@ -890,24 +884,23 @@ static inline void queued_spin_lock_mcs_queue(struct = qspinlock *lock, bool parav * sequentiality; this is because the set_locked() function below * does not imply a full barrier. * - * The PV pv_wait_head_or_lock function, if active, will acquire - * the lock and return a non-zero value. So we have to skip the - * atomic_cond_read_acquire() call. As the next PV queue head hasn't - * been designated yet, there is no way for the locked value to become - * _Q_SLOW_VAL. So both the set_locked() and the + * The PV pv_wait_head_or_lock function will acquire the lock, so + * skip the atomic_cond_read_acquire() call. As the next PV queue head + * hasn't been designated yet, there is no way for the locked value to + * become _Q_SLOW_VAL. So both the set_locked() and the * atomic_cmpxchg_relaxed() calls will be safe. * * If PV isn't active, 0 will be returned instead. * */ if (paravirt) { - if ((val =3D pv_wait_head_or_lock(lock, node))) - goto locked; + pv_wait_head_or_lock(lock, node); + val =3D atomic_read(&lock->val); + } else { + val =3D atomic_cond_read_acquire(&lock->val, + !(VAL & _Q_LOCKED_PENDING_MASK)); } =20 - val =3D atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MA= SK)); - -locked: /* * claim the lock: * --=20 2.35.1