From nobody Sun Oct 5 21:59:57 2025 Received: from mail-pg1-f201.google.com (mail-pg1-f201.google.com [209.85.215.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6253A1FE45D for ; Tue, 29 Jul 2025 02:26:47 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.215.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756009; cv=none; b=TdnJS1gBVGXUtIqimtzGFDY7I7wVerGFgsniKU8qf/RmzXjXG3+FCk5IPTJtjfGwxtyq7g/PiLGV1U7sc2MEcsrTK1IJ2Xj4E/Zl8yWvs9f+RwC9WkqE49zouUCIBdEcXEgME9+0iYi7sjdgJuwOT3v9/VgyYwEO6+9Z1dLBd1k= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756009; c=relaxed/simple; bh=XsIbEo1QAt0OSwO87c3wfNo0TeJaG8VRy1MC8xgy3HE=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Content-Type; b=gNuLo83fcP04JxDRXpZFm92cZpY432hjTXW55Wwo2e/G6MyyNWe72rI+CfoUJLq4Ek5FJ2fI08jq+A7xQ17L1Rl1Jcux+Gj5CbJwOePHyEXb+dSPpE+a+YjGsEpirGVsUjwD17g/GyLHKbuQW+EuK+mdqTc5quwWZS/yxO66CkU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=TOsJjeAO; arc=none smtp.client-ip=209.85.215.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="TOsJjeAO" Received: by mail-pg1-f201.google.com with SMTP id 41be03b00d2f7-b34abbcdcf3so3839026a12.1 for ; Mon, 28 Jul 2025 19:26:47 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1753756007; x=1754360807; darn=vger.kernel.org; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :from:to:cc:subject:date:message-id:reply-to; bh=CpvVVuC6XWA9ztMyuKPQWMesncLZVwr6fWvq0ZdAuns=; b=TOsJjeAOIOtSUkoDOYMbzQHDB3nUPUoHIVEyvPFTVxgXBPN0TYgkJOKmiwAUz0aj66 +0ODGfMhxS915EAvQ7ILZE9ciGH/9RFwu+pgIkbJjKNkeYdkJjJ4Ibn0JtL4f8XprWFj TgQsqAiwHW/0EXioWrihzZM4zn34BuF+tUt1B+M19rgE+RvFiTFMn1GsOIT3gaDonDv4 iopwNCLxqTqkBT36mV7dEtCpiNXvtTz/KZdfO+7Dk9NWSHxdqbOxooHCGaVrMtD6u8jS oWL9LKSMuecKyJUsmTfeIDksgvzn9Te6EfSguXrNcrY4hbUczwC5FqmyzxyIVC3/84fW 5zKw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1753756007; x=1754360807; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=CpvVVuC6XWA9ztMyuKPQWMesncLZVwr6fWvq0ZdAuns=; b=EA8BV+P63UvqagcRDZTn8ph8WzPJkfaXL8p3WhKI0t1CyTd6Q9/spA03GODW0RjEFa xnFAJ9S55u6tartczXvwBuVJTLSOixQQzV95Vm3VcwqycloTdbqTWAOSnSIqgvzWeaDC odauV4vMjbpCizFHTNndJK3jAm7pjo4WFMxXBJOlGgccvI8kPQMbGSm2i0X5mtFNbnuz EGmIg1tZOS1PPmxdd75cirgdjPWorhQ0ZEcGSFihcggHk/Q/ygx9H3plBDjQRcGVqMY8 Suz/MrFof3+zCxlEvTpUpEnxMvDq2GQaRsqnNbp5QibuoQlsRWFhJ5t0moHroPx9Vj2F 5XFA== X-Forwarded-Encrypted: i=1; AJvYcCVXDxdeIIkBqclUztmbXzMcbCJT2nYEgjE5eMoH8gMz911Cl6oSSTp8u9LbsFoVNsOahhlVI+nnEbWthDY=@vger.kernel.org X-Gm-Message-State: AOJu0YzjvMLxVTgAOLZYpYLGBHaF7S5KMn89aRZeOrBLaJrXDR23CfLd pDAi24V6QuhBOO56dyiEwKFWFUZEpicxCpqKqeYwETAXnSlZhltEUPpfse3DklhtKtoiGf40RYJ Ym38kfA== X-Google-Smtp-Source: AGHT+IFt6+m168wx8T76kfzjdkF7E2tdg8yvH2KSyyNcVARAGAS5oWA8BSfGsd1blfwQzInw+l51kjZ6qRQ= X-Received: from pfqr16.prod.google.com ([2002:aa7:9ed0:0:b0:749:42ec:b4bd]) (user=yuzhuo job=prod-delivery.src-stubby-dispatcher) by 2002:a05:6a20:748e:b0:233:d85e:a698 with SMTP id adf61e73a8af0-23d701bdfb7mr20813996637.32.1753756006616; Mon, 28 Jul 2025 19:26:46 -0700 (PDT) Date: Mon, 28 Jul 2025 19:26:34 -0700 In-Reply-To: <20250729022640.3134066-1-yuzhuo@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250729022640.3134066-1-yuzhuo@google.com> X-Mailer: git-send-email 2.50.1.487.gc89ff58d15-goog Message-ID: <20250729022640.3134066-2-yuzhuo@google.com> Subject: [PATCH v1 1/7] tools: Import cmpxchg and xchg functions From: Yuzhuo Jing To: Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Namhyung Kim , Mark Rutland , Alexander Shishkin , Jiri Olsa , Ian Rogers , Adrian Hunter , Liang Kan , Yuzhuo Jing , Yuzhuo Jing , Andrea Parri , Palmer Dabbelt , Charlie Jenkins , Sebastian Andrzej Siewior , Kumar Kartikeya Dwivedi , Alexei Starovoitov , Barret Rhoden , Alexandre Ghiti , Guo Ren , linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Import necessary atomic functions used by qspinlock. Copied x86 implementation verbatim, and used compiler builtin for generic implementation. Signed-off-by: Yuzhuo Jing --- tools/arch/x86/include/asm/atomic.h | 14 +++ tools/arch/x86/include/asm/cmpxchg.h | 113 +++++++++++++++++++++++++ tools/include/asm-generic/atomic-gcc.h | 47 ++++++++++ tools/include/linux/atomic.h | 24 ++++++ tools/include/linux/compiler_types.h | 24 ++++++ 5 files changed, 222 insertions(+) diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/a= sm/atomic.h index 365cf182df12..a55ffd4eb5f1 100644 --- a/tools/arch/x86/include/asm/atomic.h +++ b/tools/arch/x86/include/asm/atomic.h @@ -71,6 +71,20 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, i= nt old, int new) return cmpxchg(&v->counter, old, new); } =20 +static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int = new) +{ + return try_cmpxchg(&v->counter, old, new); +} + +static __always_inline int atomic_fetch_or(int i, atomic_t *v) +{ + int val =3D atomic_read(v); + + do { } while (!atomic_try_cmpxchg(v, &val, val | i)); + + return val; +} + static inline int test_and_set_bit(long nr, unsigned long *addr) { GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, "Ir", nr, "%0", "c"); diff --git a/tools/arch/x86/include/asm/cmpxchg.h b/tools/arch/x86/include/= asm/cmpxchg.h index 0ed9ca2766ad..5372da8b27fc 100644 --- a/tools/arch/x86/include/asm/cmpxchg.h +++ b/tools/arch/x86/include/asm/cmpxchg.h @@ -8,6 +8,8 @@ * Non-existant functions to indicate usage errors at link time * (or compile-time if the compiler implements __compiletime_error(). */ +extern void __xchg_wrong_size(void) + __compiletime_error("Bad argument size for xchg"); extern void __cmpxchg_wrong_size(void) __compiletime_error("Bad argument size for cmpxchg"); =20 @@ -27,6 +29,49 @@ extern void __cmpxchg_wrong_size(void) #define __X86_CASE_Q -1 /* sizeof will never return -1 */ #endif =20 +/*=20 + * An exchange-type operation, which takes a value and a pointer, and + * returns the old value. + */ +#define __xchg_op(ptr, arg, op, lock) \ + ({ \ + __typeof__ (*(ptr)) __ret =3D (arg); \ + switch (sizeof(*(ptr))) { \ + case __X86_CASE_B: \ + asm_inline volatile (lock #op "b %b0, %1" \ + : "+q" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_W: \ + asm_inline volatile (lock #op "w %w0, %1" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_L: \ + asm_inline volatile (lock #op "l %0, %1" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_Q: \ + asm_inline volatile (lock #op "q %q0, %1" \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + default: \ + __ ## op ## _wrong_size(); \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ + }) + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway. + * Since this is generally used to protect other memory information, we + * use "asm volatile" and "memory" clobbers to prevent gcc from moving + * information around. + */ +#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "") + /* * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is @@ -86,5 +131,73 @@ extern void __cmpxchg_wrong_size(void) #define cmpxchg(ptr, old, new) \ __cmpxchg(ptr, old, new, sizeof(*(ptr))) =20 +#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \ +({ \ + bool success; \ + __typeof__(_ptr) _old =3D (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old =3D *_old; \ + __typeof__(*(_ptr)) __new =3D (_new); \ + switch (size) { \ + case __X86_CASE_B: \ + { \ + volatile u8 *__ptr =3D (volatile u8 *)(_ptr); \ + asm_inline volatile(lock "cmpxchgb %[new], %[ptr]" \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*__ptr), \ + [old] "+a" (__old) \ + : [new] "q" (__new) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_W: \ + { \ + volatile u16 *__ptr =3D (volatile u16 *)(_ptr); \ + asm_inline volatile(lock "cmpxchgw %[new], %[ptr]" \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*__ptr), \ + [old] "+a" (__old) \ + : [new] "r" (__new) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_L: \ + { \ + volatile u32 *__ptr =3D (volatile u32 *)(_ptr); \ + asm_inline volatile(lock "cmpxchgl %[new], %[ptr]" \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*__ptr), \ + [old] "+a" (__old) \ + : [new] "r" (__new) \ + : "memory"); \ + break; \ + } \ + case __X86_CASE_Q: \ + { \ + volatile u64 *__ptr =3D (volatile u64 *)(_ptr); \ + asm_inline volatile(lock "cmpxchgq %[new], %[ptr]" \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*__ptr), \ + [old] "+a" (__old) \ + : [new] "r" (__new) \ + : "memory"); \ + break; \ + } \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + if (unlikely(!success)) \ + *_old =3D __old; \ + likely(success); \ +}) + +#define __try_cmpxchg(ptr, pold, new, size) \ + __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX) + +#define try_cmpxchg(ptr, pold, new) \ + __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr))) =20 #endif /* TOOLS_ASM_X86_CMPXCHG_H */ diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-gen= eric/atomic-gcc.h index 9b3c528bab92..08b7b3b36873 100644 --- a/tools/include/asm-generic/atomic-gcc.h +++ b/tools/include/asm-generic/atomic-gcc.h @@ -62,6 +62,12 @@ static inline int atomic_dec_and_test(atomic_t *v) return __sync_sub_and_fetch(&v->counter, 1) =3D=3D 0; } =20 +#define xchg(ptr, v) \ + __atomic_exchange_n(ptr, v, __ATOMIC_SEQ_CST) + +#define xchg_relaxed(ptr, v) \ + __atomic_exchange_n(ptr, v, __ATOMIC_RELAXED) + #define cmpxchg(ptr, oldval, newval) \ __sync_val_compare_and_swap(ptr, oldval, newval) =20 @@ -70,6 +76,47 @@ static inline int atomic_cmpxchg(atomic_t *v, int oldval= , int newval) return cmpxchg(&(v)->counter, oldval, newval); } =20 +/** + * atomic_try_cmpxchg() - atomic compare and exchange with full ordering + * @v: pointer to atomic_t + * @old: pointer to int value to compare with + * @new: int value to assign + * + * If (@v =3D=3D @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified, @old is updated to the current value of = @v, + * and relaxed ordering is provided. + * + * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg() there. + * + * Return: @true if the exchange occured, @false otherwise. + */ +static __always_inline bool +atomic_try_cmpxchg(atomic_t *v, int *old, int new) +{ + int r, o =3D *old; + r =3D atomic_cmpxchg(v, o, new); + if (unlikely(r !=3D o)) + *old =3D r; + return likely(r =3D=3D o); +} + +/** + * atomic_fetch_or() - atomic bitwise OR with full ordering + * @i: int value + * @v: pointer to atomic_t + * + * Atomically updates @v to (@v | @i) with full ordering. + * + * Unsafe to use in noinstr code; use raw_atomic_fetch_or() there. + * + * Return: The original value of @v. + */ +static __always_inline int +atomic_fetch_or(int i, atomic_t *v) +{ + return __sync_fetch_and_or(&v->counter, i); +} + static inline int test_and_set_bit(long nr, unsigned long *addr) { unsigned long mask =3D BIT_MASK(nr); diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h index 01907b33537e..332a34177995 100644 --- a/tools/include/linux/atomic.h +++ b/tools/include/linux/atomic.h @@ -12,4 +12,28 @@ void atomic_long_set(atomic_long_t *v, long i); #define atomic_cmpxchg_release atomic_cmpxchg #endif /* atomic_cmpxchg_relaxed */ =20 +#ifndef atomic_cmpxchg_acquire +#define atomic_cmpxchg_acquire atomic_cmpxchg +#endif + +#ifndef atomic_try_cmpxchg_acquire +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg +#endif + +#ifndef atomic_try_cmpxchg_relaxed +#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg +#endif + +#ifndef atomic_fetch_or_acquire +#define atomic_fetch_or_acquire atomic_fetch_or +#endif + +#ifndef xchg_relaxed +#define xchg_relaxed xchg +#endif + +#ifndef cmpxchg_release +#define cmpxchg_release cmpxchg +#endif + #endif /* __TOOLS_LINUX_ATOMIC_H */ diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/com= piler_types.h index d09f9dc172a4..9a2a2f8d7b6c 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -31,6 +31,28 @@ # define __cond_lock(x,c) (c) #endif /* __CHECKER__ */ =20 +/* + * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving + * non-scalar types unchanged. + */ +/* + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'c= har' + * is not type-compatible with 'signed char', and we define a separate cas= e. + */ +#define __scalar_type_to_expr_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (signed type)0 + +#define __unqual_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (char)0, \ + __scalar_type_to_expr_cases(char), \ + __scalar_type_to_expr_cases(short), \ + __scalar_type_to_expr_cases(int), \ + __scalar_type_to_expr_cases(long), \ + __scalar_type_to_expr_cases(long long), \ + default: (x))) + /* Compiler specific macros. */ #ifdef __GNUC__ #include @@ -40,4 +62,6 @@ #define asm_goto_output(x...) asm goto(x) #endif =20 +#define asm_inline asm + #endif /* __LINUX_COMPILER_TYPES_H */ --=20 2.50.1.487.gc89ff58d15-goog From nobody Sun Oct 5 21:59:57 2025 Received: from mail-pl1-f201.google.com (mail-pl1-f201.google.com [209.85.214.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C006020296E for ; Tue, 29 Jul 2025 02:26:48 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756010; cv=none; b=G5MfhvsdlhJO0LXLytIG9lMI0z/AXeSsa3PUAs9lr9bzJ7CqcBNvDrkEtz9sEEjOxbyGkqBkUoPCOsJbGs5zyi3tnq1HqqwflMHRO+LC7aJ8086hjWs1p72PnJf10npm7koyrNrvwTNjamFW5IQBckQNx1DVIKwJ6zLmWwAL4h8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756010; c=relaxed/simple; bh=4ISGW0qCLPw0BM53+WZj1UO7Eu/DiHDQi040hwtDTro=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Content-Type; b=d2o2a1QtuktGsGSCwbquI5OcpDtmh92Ho6hKVXVq6ASYYkc9GyQoz3iGyQtPsUNwEY++hRFLsLy8nLmK+qShc1T5PuVkKKRcyEJBT2chk6piShKspJtS0VfVz2SMi4FY7/DSuaxrwbNok6+b9bRZXaHcFFsQpukzKdy2NenJnDM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=yvy3uJGj; arc=none smtp.client-ip=209.85.214.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="yvy3uJGj" Received: by mail-pl1-f201.google.com with SMTP id d9443c01a7336-2403e4c82dbso12415315ad.0 for ; Mon, 28 Jul 2025 19:26:48 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1753756008; x=1754360808; darn=vger.kernel.org; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :from:to:cc:subject:date:message-id:reply-to; bh=MZDFdixHdB12wePHGCdlmdShiJsvmZbe+2EaeGSxRAI=; b=yvy3uJGjVeSFxfka0z1c0zQiLzmtMvwka3ld+dW61JGQdNKt+rPtw2RRh0s5zkj7vB TBiB2Wkdp7NlJv0AwYrY8jby+27RbFo6+QsLLaBQu2BL4Sr3axPH6F8WuG2vyEZr2Ze6 leFFoYnXCNCcMUe4j1dSPmZYx+qtDvYMn0UQepywD+Dt0zI2BJROOGC1dUUGuT36GnKN 9xyTVl0q3pTGTjg+W8ODZbLVUJVYPDBYqipjkCJisMLnl66wlT9L7GLcjXgS5uVavXQV JggyLiBGICQHWGtSPSmyAA3wKYcCc0ri9yaSW3McrCdgmZK8JFd6PDFGkEMky/MkxKSY oB5A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1753756008; x=1754360808; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=MZDFdixHdB12wePHGCdlmdShiJsvmZbe+2EaeGSxRAI=; b=OjYphjrxnZschElXCBf1wnN+GY1rqGGNkcLxQs5mcNzoGJuQORrFl0lZwsf3Wi8vrT z5ZnSNW3HMITeGGUoA2fAdty6ksO3zwnINaX6Jc8trTFKxoLF4kXq+iG5EnrzdWoyNxS e3bw4syn9Z1C+MDmsR8qc9e1oW/AW9gvXvJIY36OfO8eq8l1ad4FyKezVU7kCnNREPqY EZ7LJLxtewzyrNhftwF69KKc0ES7jFft8RURGVkdF/IIhPUD9xMuWRdy0Q3bkjnAFgab xSvjTDqcUFwOxNmSy8A2BH4cGgaqfoD+KhrX41Mp2JzQhdD4zIdOCYt9bhQ67nGMPoOh I02A== X-Forwarded-Encrypted: i=1; AJvYcCXFayxMFytRb9ZOdXiDhH+YWdZbwlGBSM+KQuIzqlltViefAi2CV1EZm9G7CLEykgMz1DFWowPhw0/o4fo=@vger.kernel.org X-Gm-Message-State: AOJu0Yw4Vl8w/SkK3KX+wvCHe64CkP0Ju7uo09jL/bDaQC3SxCyP5baM 0NU9Tu54CQcRSnSlI5JHUpt7oLHucCCfAktxGaaOQG49cxWmyZNsnTCHL5ZIAAixH8kuyrM/7gL zIXR5Yw== X-Google-Smtp-Source: AGHT+IGT/z+KJFAljNSW6NANlfZq/RIKeylnOweKpxXQz/9KYWN22xeESvB+lMHSNFQTjy3lnaZ3WI8wSiY= X-Received: from plbme16.prod.google.com ([2002:a17:902:fc50:b0:23f:8c3f:e0fc]) (user=yuzhuo job=prod-delivery.src-stubby-dispatcher) by 2002:a17:902:f54c:b0:240:71a5:f30 with SMTP id d9443c01a7336-24071a51d35mr8329275ad.22.1753756008099; Mon, 28 Jul 2025 19:26:48 -0700 (PDT) Date: Mon, 28 Jul 2025 19:26:35 -0700 In-Reply-To: <20250729022640.3134066-1-yuzhuo@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250729022640.3134066-1-yuzhuo@google.com> X-Mailer: git-send-email 2.50.1.487.gc89ff58d15-goog Message-ID: <20250729022640.3134066-3-yuzhuo@google.com> Subject: [PATCH v1 2/7] tools: Import smp_cond_load and atomic_cond_read From: Yuzhuo Jing To: Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Namhyung Kim , Mark Rutland , Alexander Shishkin , Jiri Olsa , Ian Rogers , Adrian Hunter , Liang Kan , Yuzhuo Jing , Yuzhuo Jing , Andrea Parri , Palmer Dabbelt , Charlie Jenkins , Sebastian Andrzej Siewior , Kumar Kartikeya Dwivedi , Alexei Starovoitov , Barret Rhoden , Alexandre Ghiti , Guo Ren , linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Import generic barrier implementation of smp_cond_load_{acquire,relaxed} and import macro definitions of atomic_cond_read_{acquire,relaxed}. Signed-off-by: Yuzhuo Jing --- tools/include/asm/barrier.h | 58 ++++++++++++++++++++++++++++++++++++ tools/include/linux/atomic.h | 3 ++ 2 files changed, 61 insertions(+) diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h index 0c21678ac5e6..5150c955c1c9 100644 --- a/tools/include/asm/barrier.h +++ b/tools/include/asm/barrier.h @@ -63,3 +63,61 @@ do { \ ___p1; \ }) #endif + +#ifndef cpu_relax +#define cpu_relax() ({}) +#endif + +/** + * smp_acquire__after_ctrl_dep() - Provide ACQUIRE ordering after a contro= l dependency + * + * A control dependency provides a LOAD->STORE order, the additional RMB + * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} ord= er, + * aka. (load)-ACQUIRE. + * + * Architectures that do not do load speculation can have this be barrier(= ). + */ +#ifndef smp_acquire__after_ctrl_dep +#define smp_acquire__after_ctrl_dep() smp_rmb() +#endif + +/** + * smp_cond_load_relaxed() - (Spin) wait for cond with no ordering guarant= ees + * @ptr: pointer to the variable to wait on + * @cond: boolean expression to wait for + * + * Equivalent to using READ_ONCE() on the condition variable. + * + * Due to C lacking lambda expressions we load the value of *ptr into a + * pre-named variable @VAL to be used in @cond. + */ +#ifndef smp_cond_load_relaxed +#define smp_cond_load_relaxed(ptr, cond_expr) ({ \ + typeof(ptr) __PTR =3D (ptr); \ + __unqual_scalar_typeof(*ptr) VAL; \ + for (;;) { \ + VAL =3D READ_ONCE(*__PTR); \ + if (cond_expr) \ + break; \ + cpu_relax(); \ + } \ + (typeof(*ptr))VAL; \ +}) +#endif + +/** + * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering + * @ptr: pointer to the variable to wait on + * @cond: boolean expression to wait for + * + * Equivalent to using smp_load_acquire() on the condition variable but em= ploys + * the control dependency of the wait to reduce the barrier on many platfo= rms. + */ +#ifndef smp_cond_load_acquire +#define smp_cond_load_acquire(ptr, cond_expr) ({ \ + __unqual_scalar_typeof(*ptr) _val; \ + _val =3D smp_cond_load_relaxed(ptr, cond_expr); \ + smp_acquire__after_ctrl_dep(); \ + (typeof(*ptr))_val; \ +}) +#endif diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h index 332a34177995..6baee2c41b55 100644 --- a/tools/include/linux/atomic.h +++ b/tools/include/linux/atomic.h @@ -36,4 +36,7 @@ void atomic_long_set(atomic_long_t *v, long i); #define cmpxchg_release cmpxchg #endif =20 +#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter= , (c)) +#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter= , (c)) + #endif /* __TOOLS_LINUX_ATOMIC_H */ --=20 2.50.1.487.gc89ff58d15-goog From nobody Sun Oct 5 21:59:57 2025 Received: from mail-pl1-f201.google.com (mail-pl1-f201.google.com [209.85.214.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A343520C48A for ; Tue, 29 Jul 2025 02:26:50 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756012; cv=none; b=bP26xkXG5tlyEuv1yNum3nobyN0OemLhlGc5eZkJHoAPcKurzUZMQtwNHrn81+M7eegTZYJ/5l4PaewgHw1zuoXAdS3YuiEeENhcpdt3V19jIprXdgBG/E0hesXO+NmDdcdeAnk0ioeT4LiM7BlxL6oP23U8lfe87KJD2KWtHMM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756012; c=relaxed/simple; bh=bpPKQzVvF2Y3jUpkSIAzRUU7htVMnv8MWQb3f1guygM=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Content-Type; b=D3FYGif7m2UX/jYWAcybD5M9rvblbcT5PEKHsM7ppGHOFJU9WXKA+g2DYr1BsyCpX005qrXRADxvwUGRfDRxaDfiYNx284zPw675xHIqudNkLx9bRrhUeMcnJc6xCDLy1TxLTTDEptxs46j9qu9hQErEWr6pPKjX8i1FEDfUS+A= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=sNxNF3r+; arc=none smtp.client-ip=209.85.214.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="sNxNF3r+" Received: by mail-pl1-f201.google.com with SMTP id d9443c01a7336-2369dd58602so52165935ad.1 for ; Mon, 28 Jul 2025 19:26:50 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1753756010; x=1754360810; darn=vger.kernel.org; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :from:to:cc:subject:date:message-id:reply-to; bh=NPDuevloimGKVsWil7Slcd/Yj9vleIb69P9iimghSK4=; b=sNxNF3r+5VVjyrbiG1R8TKESohNemm/A6syHOxVpFiwYedSwnM1lIeV4GjuvB07F3v j4x3nmF3YQSfP98uJT/cQBi37gIc4J30k1Mv0GZGJaIqvmi1xQk0YiTpeVW05fteCMyz S6P/JUyVu5bkqHs6gfGhdgIxPCQPPa9J4qEpa0cj8CgFb0Kz9YJhJJImkXTlnOelslb3 OU99M/O3epFnAjMqAvClXaXPvS2FbGS7JiU5iYGmJaM9VTlzuBK3O8FiYU33tzMQfZ0T DEQqeV38AiF2dUa7vkd4S31D390cA9pqdATadBP76pxmPLiuYW75vKhUHjAIq8XqUYsB 5SeQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1753756010; x=1754360810; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=NPDuevloimGKVsWil7Slcd/Yj9vleIb69P9iimghSK4=; b=KuyuJ4+8SJnswNq9HTDFXRTI/UCuM6iv98jsPpK1lUGi5YXBi6/9QYda81xWp2kyaB 1SPUcACit+jvKgcwM4xxzVTa0KtAQ/+FbuHbHaVGjRCE98yzTqTDoghbewvh6PN9Y3kt rJlbMO//1/sNreROLF6iqKKZHLRm9i/FG/cfHnWTu+v9/EAgLlNtoYhwq2WjcpBGVldw zspgVjPXS0fbR4fBt+xKpgJWP2bOfXvMpewrGmgY75xJj1/vnknSr8Bq7M08w2/9iTbq RfGa7TA6oNQrwhjg4O+dd4ZZbwNKXjsPq2BNY1Ujj2rCcygnlzJ6fogP0W6EKZUaA/za Dvmw== X-Forwarded-Encrypted: i=1; AJvYcCVEFJaSuIGFydbhH+qgUF2kdoTd6zA6eQmXTDbAE88y59rO9rK5PV1J/Gxs4gST0/i9xFhhcXpqoCxnbiE=@vger.kernel.org X-Gm-Message-State: AOJu0YxxDkwEIXGB2PC13y/GySfiC+2ymZhjjicxGCmxTLOogZ4gELPU vIcU5ZoG3lOR7lq8FqShfLsVdaMmNzUb0Zn3GGwiUgQIj77F12emk6rxezNkrleAxiJCgyHZrM0 WI8WMLg== X-Google-Smtp-Source: AGHT+IGyyM3fDnm9QBVau2QFKbJOjQeX2Y82KWdawzVLWv6ahiPgYmXl8LvmKTgkRHyMXJzIeDPEu2d5dAI= X-Received: from pjbsn7.prod.google.com ([2002:a17:90b:2e87:b0:313:2d44:397b]) (user=yuzhuo job=prod-delivery.src-stubby-dispatcher) by 2002:a17:903:228e:b0:240:1f19:d35c with SMTP id d9443c01a7336-2401f19d5acmr92061085ad.39.1753756009846; Mon, 28 Jul 2025 19:26:49 -0700 (PDT) Date: Mon, 28 Jul 2025 19:26:36 -0700 In-Reply-To: <20250729022640.3134066-1-yuzhuo@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250729022640.3134066-1-yuzhuo@google.com> X-Mailer: git-send-email 2.50.1.487.gc89ff58d15-goog Message-ID: <20250729022640.3134066-4-yuzhuo@google.com> Subject: [PATCH v1 3/7] tools: Partial import of prefetch.h From: Yuzhuo Jing To: Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Namhyung Kim , Mark Rutland , Alexander Shishkin , Jiri Olsa , Ian Rogers , Adrian Hunter , Liang Kan , Yuzhuo Jing , Yuzhuo Jing , Andrea Parri , Palmer Dabbelt , Charlie Jenkins , Sebastian Andrzej Siewior , Kumar Kartikeya Dwivedi , Alexei Starovoitov , Barret Rhoden , Alexandre Ghiti , Guo Ren , linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Import only prefetch and prefetchw but not page and range related methods. Signed-off-by: Yuzhuo Jing --- tools/include/linux/prefetch.h | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tools/include/linux/prefetch.h diff --git a/tools/include/linux/prefetch.h b/tools/include/linux/prefetch.h new file mode 100644 index 000000000000..1ed8678f4824 --- /dev/null +++ b/tools/include/linux/prefetch.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Generic cache management functions. Everything is arch-specific, =20 + * but this header exists to make sure the defines/functions can be + * used in a generic way. + * + * 2000-11-13 Arjan van de Ven + * + */ + +#ifndef _LINUX_PREFETCH_H +#define _LINUX_PREFETCH_H + +/* + prefetch(x) attempts to pre-emptively get the memory pointed to + by address "x" into the CPU L1 cache.=20 + prefetch(x) should not cause any kind of exception, prefetch(0) is + specifically ok. + + prefetch() should be defined by the architecture, if not, the=20 + #define below provides a no-op define.=09 +=09 + There are 2 prefetch() macros: +=09 + prefetch(x) - prefetches the cacheline at "x" for read + prefetchw(x) - prefetches the cacheline at "x" for write +=09 + there is also PREFETCH_STRIDE which is the architecure-preferred=20 + "lookahead" size for prefetching streamed operations. +=09 +*/ + +#ifndef ARCH_HAS_PREFETCH +#define prefetch(x) __builtin_prefetch(x) +#endif + +#ifndef ARCH_HAS_PREFETCHW +#define prefetchw(x) __builtin_prefetch(x,1) +#endif + +#endif --=20 2.50.1.487.gc89ff58d15-goog From nobody Sun Oct 5 21:59:57 2025 Received: from mail-pg1-f201.google.com (mail-pg1-f201.google.com [209.85.215.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6E81E2147E5 for ; Tue, 29 Jul 2025 02:26:52 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.215.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756014; cv=none; b=feLxyRtQSmfuCNyp+nGC8X+Znxd+HQco4LVres0Ed2zbk15IzrdiXRy7l4Yf2RthV04Wre86srTmN3VLDBzHb1g2zMVpsO+aWd4NYsMVW091ZZiqfHGUsBsIDEgGHF0PU+uHF0xwacX0uy7NwGLMNhJk7qsKTrxMa+jUESrtg/Q= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756014; c=relaxed/simple; bh=5uumQFEakBzCNefgnK5KQH0r1rdGvF5P6NYOneXfUyE=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Content-Type; b=M8Az6Dv2b+8ImXD/fDIx/T6GdxW5YZT0gCkvYp7v6+IXg3OydLtXfL8rr/mxHG6n3gd/E9ZMbP5TVNmTQwnZayXm1cSiuFJEYwCE++w9qqKURBwgEWFrCnoHrJFwD5jxaxd75X245yaLdErzoBTJwQPKl7B1cEmZZGVX5lmfJ/M= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=Een4VQzq; arc=none smtp.client-ip=209.85.215.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="Een4VQzq" Received: by mail-pg1-f201.google.com with SMTP id 41be03b00d2f7-b41f56fdab3so1593889a12.0 for ; Mon, 28 Jul 2025 19:26:52 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1753756012; x=1754360812; darn=vger.kernel.org; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :from:to:cc:subject:date:message-id:reply-to; bh=nR+qnYIXhky/aP885u9agL4XJhjaPZr+Fl5Gin8Crt0=; b=Een4VQzqbZ7JY+H+sBZhtsSbJZNoRQuxZyHaJGHoFW7XQ90mp6N/X3Ijibf7Bdtrrz Sb2aP0LLU2J8Ada6Xxjhir5kInaJyO+CqxKbnnPQ00oFe15jbyDOYG0BXnP7Jnh/uR3u fJsfJ1x5Fn94qiPSVyb2JaUeUeP6qasSbz5Mbqk7ncGB+oRU97z/rSoI6WFi/rtfQRks KMK139+peZ21r2X+uTDYyV4Y/gQcLmx0LAgENqRFLas6/KrcZM2dbYGt0da8bq2uXhQG ivlt536Tx3sIeLPzBc5ViVyToQQpCJ8ZjoxM5z02y/b9UX+v9WAz0A0d2XwLJSqEUyng enRQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1753756012; x=1754360812; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=nR+qnYIXhky/aP885u9agL4XJhjaPZr+Fl5Gin8Crt0=; b=Qk+UPUuo8e3yI4AQ4gLrWKOAs+7wUHK7TgSYdCu1T0YSfkAMwv6fgtBAjgB3ryhqdx Cw74HF6C7hEFO5pO0dHxQHm9aSsR+DhVPRZLOJHgkU+VFCp5C5MtJWOMmDFHsBaG9poy rxGDgqaENQkLc3MLl9ut+G2tnSpNPx53qZdnYZgXAU1KJA1JC2M847RkDECvkKaj4cBF hq1+hRGA7IB6dUgfXnjJq/NWx3ke/z8o4oTNAn4t8O5JuWUMkbPL5GLbU49AMrXWr5pQ mG4E7O21aFuWRz6VW7MmAOz0r8WS6h+FEhAHOH6BC1E+OfWsp9Da2U434L3VV0pcFVUD XO5g== X-Forwarded-Encrypted: i=1; AJvYcCU1kukt9yV2l0SYYLUxQvNnRcDSmlXkLbpPKrmZI9mW+80HWb/djlpssjRtlvsSnDZ9wI0rSFKgjA4Yy/4=@vger.kernel.org X-Gm-Message-State: AOJu0YxqruHVCMuGOzsi6qm0PzfAgI6Fx/+Zzhnrh6aMfUHBJ+SkY2Im TeFVH57h6jcwTijl5l19aB74vup4imaG2hUPZg/e3lyxZHOOhOu/LE+TLYYcmwR4fGZGMQkhG6V Wf4J0uQ== X-Google-Smtp-Source: AGHT+IGaz+zt9fkjFW/IQ8BgR/CTX8RaQPImDuqvY7R9r+tO/y2WcXT503K699hl7CqHOeIz+AfKnHSrNZE= X-Received: from pfjj11.prod.google.com ([2002:a05:6a00:234b:b0:746:30f0:9b33]) (user=yuzhuo job=prod-delivery.src-stubby-dispatcher) by 2002:a05:6a20:938c:b0:220:33e9:15da with SMTP id adf61e73a8af0-23d6ffe49aamr22500947637.2.1753756011644; Mon, 28 Jul 2025 19:26:51 -0700 (PDT) Date: Mon, 28 Jul 2025 19:26:37 -0700 In-Reply-To: <20250729022640.3134066-1-yuzhuo@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250729022640.3134066-1-yuzhuo@google.com> X-Mailer: git-send-email 2.50.1.487.gc89ff58d15-goog Message-ID: <20250729022640.3134066-5-yuzhuo@google.com> Subject: [PATCH v1 4/7] tools: Implement userspace per-cpu From: Yuzhuo Jing To: Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Namhyung Kim , Mark Rutland , Alexander Shishkin , Jiri Olsa , Ian Rogers , Adrian Hunter , Liang Kan , Yuzhuo Jing , Yuzhuo Jing , Andrea Parri , Palmer Dabbelt , Charlie Jenkins , Sebastian Andrzej Siewior , Kumar Kartikeya Dwivedi , Alexei Starovoitov , Barret Rhoden , Alexandre Ghiti , Guo Ren , linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Implement userspace per-cpu for imported kernel code. Compared with simple thread-local definition, the kernel per-cpu provides 1) a guarantee of static lifetime even when thread exits, and 2) the ability to access other CPU's per-cpu data. This patch adds an alternative implementation and interface for userspace per-cpu. The kernel implementation uses special ELF sections and offset calculation. For simplicity, this version defines a PERCPU_MAX length global array for each per-cpu data, and uses a thread-local cpu id for indexing. Signed-off-by: Yuzhuo Jing --- tools/include/linux/compiler_types.h | 3 + tools/include/linux/percpu-simulate.h | 128 ++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 tools/include/linux/percpu-simulate.h diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/com= piler_types.h index 9a2a2f8d7b6c..46550c500b8c 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -31,6 +31,9 @@ # define __cond_lock(x,c) (c) #endif /* __CHECKER__ */ =20 +/* Per-cpu checker flag does not use address space attribute in userspace = */ +#define __percpu + /* * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving * non-scalar types unchanged. diff --git a/tools/include/linux/percpu-simulate.h b/tools/include/linux/pe= rcpu-simulate.h new file mode 100644 index 000000000000..a6af2f2211eb --- /dev/null +++ b/tools/include/linux/percpu-simulate.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Userspace implementation of per_cpu_ptr for adapted kernel code. + * + * Userspace code does not have and does not need a per-cpu concept, but + * instead can declare variables as thread-local. However, the kernel per= -cpu + * further provides 1) the guarantee of static lifetime when thread exits,= and + * 2) the ability to access other CPU's per-cpu data. This file provides a + * simple implementation of such functionality, but with slightly different + * APIs and without linker script changes. + * + * 2025 Yuzhuo Jing + */ +#ifndef __PERCPU_SIMULATE_H__ +#define __PERCPU_SIMULATE_H__ + +#include + +#include +#include + +/* + * The maximum supported number of CPUs. Per-cpu variables are defined as= a + * PERCPU_MAX length array, indexed by a thread-local cpu id. + */ +#define PERCPU_MAX 4096 + +#ifdef ASSERT_PERCPU +#define __check_cpu_id(cpu) \ +({ \ + u32 cpuid =3D (cpu); \ + assert(cpuid < PERCPU_MAX); \ + cpuid; \ +}) +#else +#define __check_cpu_id(cpu) (cpu) +#endif + +/* + * Use weak symbol: only define __thread_per_cpu_id variable if any perf t= ool + * includes this header file. + */ +_Thread_local u32 __thread_per_cpu_id __weak; + +static inline u32 get_this_cpu_id(void) +{ + return __thread_per_cpu_id; +} + +/* + * The user code must call this function inside of each thread that uses + * per-cpu data structures. The user code can choose an id of their choic= e, + * but must ensure each thread uses a different id. + * + * Safety: asserts CPU id smaller than PERCPU_MAX if ASSERT_PERCPU is defi= ned. + */ +static inline void set_this_cpu_id(u32 id) +{ + __thread_per_cpu_id =3D __check_cpu_id(id); +} + +/* + * Declare a per-cpu data structure. This only declares the data type and + * array length. Different per-cpu data are differentiated by a key (ident= ifer). + * + * Different from the kernel version, this API must be called before the a= ctual + * definition (i.e. DEFINE_PER_CPU_ALIGNED). + * + * Note that this implementation does not support prepending static qualif= ier, + * or appending assignment expressions. + */ +#define DECLARE_PER_CPU_ALIGNED(key, type, data) \ + extern struct __percpu_type_##key { \ + type data; \ + } __percpu_data_##key[PERCPU_MAX] + +/* + * Define the per-cpu data storage for a given key. This uses a previously + * defined data type in DECLARE_PER_CPU_ALIGNED. + * + * Different from the kernel version, this API only accepts a key name. + */ +#define DEFINE_PER_CPU_ALIGNED(key) \ + struct __percpu_type_##key __percpu_data_##key[PERCPU_MAX] + +#define __raw_per_cpu_value(key, field, cpu) \ + (__percpu_data_##key[cpu].field) + +/* + * Get a pointer of per-cpu data for a given key. + * + * Different from the kernel version, users of this API don't need to pass= the + * address of the base variable (through `&varname'). + * + * Safety: asserts CPU id smaller than PERCPU_MAX if ASSERT_PERCPU is defi= ned. + */ +#define per_cpu_ptr(key, field, cpu) (&per_cpu_value(key, field, cpu)) +#define this_cpu_ptr(key, field) (&this_cpu_value(key, field)) + +/* + * Additional APIs for direct value access. Effectively, `*per_cpu_ptr(..= .)'. + * + * Safety: asserts CPU id smaller than PERCPU_MAX if ASSERT_PERCPU is defi= ned. + */ +#define per_cpu_value(key, field, cpu) \ + (__raw_per_cpu_value(key, field, __check_cpu_id(cpu))) +#define this_cpu_value(key, field) \ + (__raw_per_cpu_value(key, field, __thread_per_cpu_id)) + +/* + * Helper functions of simple per-cpu operations. + * + * The kernel version differentiates __this_cpu_* from this_cpu_* for + * preemption/interrupt-safe contexts, but the userspace version defines t= hem + * as the same. + */ + +#define __this_cpu_add(key, field, val) (this_cpu_value(key, field) +=3D (= val)) +#define __this_cpu_sub(key, field, val) (this_cpu_value(key, field) -=3D (= val)) +#define __this_cpu_inc(key, field) (++this_cpu_value(key, field)) +#define __this_cpu_dec(key, field) (--this_cpu_value(key, field)) + +#define this_cpu_add __this_cpu_add +#define this_cpu_sub __this_cpu_sub +#define this_cpu_inc __this_cpu_inc +#define this_cpu_dec __this_cpu_dec + +#endif /* __PERCPU_SIMULATE_H__ */ --=20 2.50.1.487.gc89ff58d15-goog From nobody Sun Oct 5 21:59:57 2025 Received: from mail-pg1-f202.google.com (mail-pg1-f202.google.com [209.85.215.202]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 04F6621B905 for ; Tue, 29 Jul 2025 02:26:53 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.215.202 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756016; cv=none; b=HT2l8hVy96cK5ek4cxr3GhbFovGKQVBq664zWR5n/3E7GTk2m05Ec2NnoiV7O1J5UilRzkKhwOrGR4IASS9sxOMZ6o5aKTx7y+DNwECBrFv2DUhfHX7pZEfCLVK8V8BXMCfz5KDh1Zm13S5Agl08MB61VrR8hWl8xsnsY56VmYo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756016; c=relaxed/simple; bh=aTizRNpiVqfl4GjPCDh5g3qb75q8oeXtZoPQIbo24no=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Content-Type; b=Y7QHhKfErN9b9gebHNk3v53rbIepw7DS1nNYFbppYnO+WUoK9kQXu5CLMP0lYmbND/a6fuvc5MkARBUfLdXLdYhOl8UBnQlHqugcXOp576rLBiW0uZU3+tL5TAgPFFHi80ZNC6/Onn3NcHT2GlzpfFEtc+I+anm7b6T2qEH95TY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=wUUR5aLN; arc=none smtp.client-ip=209.85.215.202 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="wUUR5aLN" Received: by mail-pg1-f202.google.com with SMTP id 41be03b00d2f7-b2fa1a84566so4181538a12.1 for ; Mon, 28 Jul 2025 19:26:53 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1753756013; x=1754360813; darn=vger.kernel.org; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :from:to:cc:subject:date:message-id:reply-to; bh=2Spz5It1QJWoWrn1v3EPBOJi/HDbSwdDv1z5ubqFy+w=; b=wUUR5aLNPRt1A0D4viMdu76XxripHbF042/33c3Gg2HeAJhlu6kCWgCbv43MBYW9F3 qluQY0VUYJ5YdhbDzxRQhQdpF5KXgUD6hVmTNpyQJuLWOOIUCfnzmG0RDxdCJRrsIdzs 7XlT/L/vJMbHI+9SbsVDay9qsddU6CT9K8dv/1u5sn7L10wk4qAbktGKqVET3bOAezM6 V57Qr3yHOtF51tBigDNo6TDTmEguEosIEb8x/Izh2dmkyawPEbKDT+5jf4rrnlO++h2e 5jK0qpOR84jk2lxEgOdJpdMeosdItslgzwH1Gsf6Kwcg+cHa4DCapT9GmJHetoJoPtJU jXXg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1753756013; x=1754360813; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=2Spz5It1QJWoWrn1v3EPBOJi/HDbSwdDv1z5ubqFy+w=; b=vIhMBFM8slIau79win0GdJpBsK084UeC+171UzgNOPFnCa9b+oB6cqVMgq45iPubwm zNpn59q6KmKXHM9WWUlIkDL/qOaz4IXwsDMwpISF56BVfNqxENNXJ4AeUk9sjYxpYbqd HJ43VYhnwkYk/NkYh4amH6yET79gsIBBYo4Qqm5ykUfYjjEaooEOuqq/9562O1Q8Yb0u 4OOxvDLXraarDU5qRT1HLM9X75qDU+gq/0rJbEzTXBBaBvmwr8TIH/h1KoVUDlDX7nw3 F9bGT/ES9oCHuzAwjYdHiNI9HeCIf60l9zLiTeTNZfKvMr+ahzcrRsAyKgXRXNMPtrPl BvvA== X-Forwarded-Encrypted: i=1; AJvYcCXWmWI9/xGPCFcGf33xcgv/E/nLqH9988A+d3pGSKWXYeTsZQ6S2Y8Lqg8cXLFe8/pnxVDzSr9kIvDA0XE=@vger.kernel.org X-Gm-Message-State: AOJu0YwcA2gKkAyJr5rb8XIIt6Uma8c5z8sqp+RyDZsN8MMvnJtKOBhe 0a4Snv9Lgreb0VBF4MfWk+rZhjPPJaHO5ifVBQtbzW29eb4zU08/PUlqGzzeNZryv66w6dUOMsL SwRX74g== X-Google-Smtp-Source: AGHT+IEdGaAV1jTffTDfV72LV49nNGl5UBLoYE649qAQR8v15K/pM5MNc8kjrrTcjum/HzqqoHbYQSbR4Hk= X-Received: from pfbmy24-n2.prod.google.com ([2002:a05:6a00:6d58:20b0:748:df52:fdc5]) (user=yuzhuo job=prod-delivery.src-stubby-dispatcher) by 2002:a05:6a20:a104:b0:1f5:7ba7:69d8 with SMTP id adf61e73a8af0-23d700480c8mr23190933637.15.1753756013401; Mon, 28 Jul 2025 19:26:53 -0700 (PDT) Date: Mon, 28 Jul 2025 19:26:38 -0700 In-Reply-To: <20250729022640.3134066-1-yuzhuo@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250729022640.3134066-1-yuzhuo@google.com> X-Mailer: git-send-email 2.50.1.487.gc89ff58d15-goog Message-ID: <20250729022640.3134066-6-yuzhuo@google.com> Subject: [PATCH v1 5/7] perf bench: Import qspinlock from kernel From: Yuzhuo Jing To: Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Namhyung Kim , Mark Rutland , Alexander Shishkin , Jiri Olsa , Ian Rogers , Adrian Hunter , Liang Kan , Yuzhuo Jing , Yuzhuo Jing , Andrea Parri , Palmer Dabbelt , Charlie Jenkins , Sebastian Andrzej Siewior , Kumar Kartikeya Dwivedi , Alexei Starovoitov , Barret Rhoden , Alexandre Ghiti , Guo Ren , linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Import the qspinlock implementation from kernel with userland-specific adaption. Updated tools/perf/check-headers.sh file to detect kernel file changes in the future. Signed-off-by: Yuzhuo Jing --- tools/include/linux/compiler_types.h | 3 + .../perf/bench/include/mcs_spinlock-private.h | 115 +++++ tools/perf/bench/include/mcs_spinlock.h | 19 + tools/perf/bench/include/qspinlock-private.h | 204 +++++++++ tools/perf/bench/include/qspinlock.h | 153 +++++++ tools/perf/bench/include/qspinlock_types.h | 98 +++++ tools/perf/bench/qspinlock.c | 411 ++++++++++++++++++ tools/perf/check-headers.sh | 32 ++ 8 files changed, 1035 insertions(+) create mode 100644 tools/perf/bench/include/mcs_spinlock-private.h create mode 100644 tools/perf/bench/include/mcs_spinlock.h create mode 100644 tools/perf/bench/include/qspinlock-private.h create mode 100644 tools/perf/bench/include/qspinlock.h create mode 100644 tools/perf/bench/include/qspinlock_types.h create mode 100644 tools/perf/bench/qspinlock.c diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/com= piler_types.h index 46550c500b8c..261a508ef5bd 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -34,6 +34,9 @@ /* Per-cpu checker flag does not use address space attribute in userspace = */ #define __percpu =20 +/* Do not change lock sections in user space */ +#define __lockfunc + /* * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving * non-scalar types unchanged. diff --git a/tools/perf/bench/include/mcs_spinlock-private.h b/tools/perf/b= ench/include/mcs_spinlock-private.h new file mode 100644 index 000000000000..f9e4bab804db --- /dev/null +++ b/tools/perf/bench/include/mcs_spinlock-private.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * MCS lock defines + * + * This file contains the main data structure and API definitions of MCS l= ock. + * + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lo= ck + * with the desirable properties of being fair, and with each cpu trying + * to acquire the lock spinning on a local variable. + * It avoids expensive cache bounces that common test-and-set spin-lock + * implementations incur. + */ +#ifndef __LINUX_MCS_SPINLOCK_H +#define __LINUX_MCS_SPINLOCK_H + +#include +#include +#include "mcs_spinlock.h" + +#ifndef arch_mcs_spin_lock_contended +/* + * Using smp_cond_load_acquire() provides the acquire semantics + * required so that subsequent operations happen after the + * lock is acquired. Additionally, some architectures such as + * ARM64 would like to do spin-waiting instead of purely + * spinning, and smp_cond_load_acquire() provides that behavior. + */ +#define arch_mcs_spin_lock_contended(l) \ + smp_cond_load_acquire(l, VAL) +#endif + +#ifndef arch_mcs_spin_unlock_contended +/* + * smp_store_release() provides a memory barrier to ensure all + * operations in the critical section has been completed before + * unlocking. + */ +#define arch_mcs_spin_unlock_contended(l) \ + smp_store_release((l), 1) +#endif + +/* + * Note: the smp_load_acquire/smp_store_release pair is not + * sufficient to form a full memory barrier across + * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. + * For applications that need a full barrier across multiple cpus + * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be + * used after mcs_lock. + */ + +/* + * In order to acquire the lock, the caller should declare a local node and + * pass a reference of the node to this function in addition to the lock. + * If the lock has already been acquired, then this will proceed to spin + * on this node->locked until the previous lock holder sets the node->lock= ed + * in mcs_spin_unlock(). + */ +static inline +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *prev; + + /* Init node */ + node->locked =3D 0; + node->next =3D NULL; + + /* + * We rely on the full barrier with global transitivity implied by the + * below xchg() to order the initialization stores above against any + * observation of @node. And to provide the ACQUIRE ordering associated + * with a LOCK primitive. + */ + prev =3D xchg(lock, node); + if (likely(prev =3D=3D NULL)) { + /* + * Lock acquired, don't need to set node->locked to 1. Threads + * only spin on its own node->locked value for lock acquisition. + * However, since this thread can immediately acquire the lock + * and does not proceed to spin on its own node->locked, this + * value won't be used. If a debug mode is needed to + * audit lock status, then set node->locked value here. + */ + return; + } + WRITE_ONCE(prev->next, node); + + /* Wait until the lock holder passes the lock down. */ + arch_mcs_spin_lock_contended(&node->locked); +} + +/* + * Releases the lock. The caller should pass in the corresponding node that + * was used to acquire the lock. + */ +static inline +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *next =3D READ_ONCE(node->next); + + if (likely(!next)) { + /* + * Release the lock by setting it to NULL + */ + if (likely(cmpxchg_release(lock, node, NULL) =3D=3D node)) + return; + /* Wait until the next pointer is set */ + while (!(next =3D READ_ONCE(node->next))) + cpu_relax(); + } + + /* Pass lock to next waiter. */ + arch_mcs_spin_unlock_contended(&next->locked); +} + +#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/tools/perf/bench/include/mcs_spinlock.h b/tools/perf/bench/inc= lude/mcs_spinlock.h new file mode 100644 index 000000000000..39c94012b88a --- /dev/null +++ b/tools/perf/bench/include/mcs_spinlock.h @@ -0,0 +1,19 @@ +#ifndef __ASM_MCS_SPINLOCK_H +#define __ASM_MCS_SPINLOCK_H + +struct mcs_spinlock { + struct mcs_spinlock *next; + int locked; /* 1 if lock acquired */ + int count; /* nesting count, see qspinlock.c */ +}; + +/* + * Architectures can define their own: + * + * arch_mcs_spin_lock_contended(l) + * arch_mcs_spin_unlock_contended(l) + * + * See kernel/locking/mcs_spinlock.c. + */ + +#endif /* __ASM_MCS_SPINLOCK_H */ diff --git a/tools/perf/bench/include/qspinlock-private.h b/tools/perf/benc= h/include/qspinlock-private.h new file mode 100644 index 000000000000..699f70bac980 --- /dev/null +++ b/tools/perf/bench/include/qspinlock-private.h @@ -0,0 +1,204 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Queued spinlock defines + * + * This file contains macro definitions and functions shared between diffe= rent + * qspinlock slow path implementations. + */ +#ifndef __LINUX_QSPINLOCK_H +#define __LINUX_QSPINLOCK_H + +#include +#include +#include +#include +#include "qspinlock_types.h" +#include "mcs_spinlock.h" + +#define _Q_MAX_NODES 4 + +/* + * The pending bit spinning loop count. + * This heuristic is used to limit the number of lockword accesses + * made by atomic_cond_read_relaxed when waiting for the lock to + * transition out of the "=3D=3D _Q_PENDING_VAL" state. We don't spin + * indefinitely because there's no guarantee that we'll make forward + * progress. + */ +#ifndef _Q_PENDING_LOOPS +#define _Q_PENDING_LOOPS 1 +#endif + +/* + * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in + * size and four of them will fit nicely in one 64-byte cacheline. For + * pvqspinlock, however, we need more space for extra data. To accommodate + * that, we insert two more long words to pad it up to 32 bytes. IOW, only + * two of them can fit in a cacheline in this case. That is OK as it is ra= re + * to have more than 2 levels of slowpath nesting in actual use. We don't + * want to penalize pvqspinlocks to optimize for a rare case in native + * qspinlocks. + */ +struct qnode { + struct mcs_spinlock mcs; +#ifdef CONFIG_PARAVIRT_SPINLOCKS + long reserved[2]; +#endif +}; + +DECLARE_PER_CPU_ALIGNED(qnodes, struct qnode, qnodes[_Q_MAX_NODES]); + +/* + * We must be able to distinguish between no-tail and the tail at 0:0, + * therefore increment the cpu number by one. + */ + +static inline __pure u32 encode_tail(int cpu, int idx) +{ + u32 tail; + + tail =3D (cpu + 1) << _Q_TAIL_CPU_OFFSET; + tail |=3D idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */ + + return tail; +} + +static inline __pure struct mcs_spinlock *decode_tail(u32 tail) +{ + int cpu =3D (tail >> _Q_TAIL_CPU_OFFSET) - 1; + int idx =3D (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; + + return per_cpu_ptr(qnodes, qnodes[idx].mcs, cpu); +} + +static inline __pure +struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx) +{ + return &((struct qnode *)base + idx)->mcs; +} + +#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) + +#if _Q_PENDING_BITS =3D=3D 8 +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,* -> *,0,* + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + WRITE_ONCE(lock->pending, 0); +} + +/** + * clear_pending_set_locked - take ownership and clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,0 -> *,0,1 + * + * Lock stealing is not allowed if this function is used. + */ +static __always_inline void clear_pending_set_locked(struct qspinlock *loc= k) +{ + WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); +} + +/* + * xchg_tail - Put in the new queue tail code word & retrieve previous one + * @lock : Pointer to queued spinlock structure + * @tail : The new queue tail code word + * Return: The previous queue tail code word + * + * xchg(lock, tail), which heads an address dependency + * + * p,*,* -> n,*,* ; prev =3D xchg(lock, node) + */ +static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) +{ + /* + * We can use relaxed semantics since the caller ensures that the + * MCS node is properly initialized before updating the tail. + */ + return (u32)xchg_relaxed(&lock->tail, + tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; +} + +#else /* _Q_PENDING_BITS =3D=3D 8 */ + +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,* -> *,0,* + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + atomic_andnot(_Q_PENDING_VAL, &lock->val); +} + +/** + * clear_pending_set_locked - take ownership and clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,0 -> *,0,1 + */ +static __always_inline void clear_pending_set_locked(struct qspinlock *loc= k) +{ + atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); +} + +/** + * xchg_tail - Put in the new queue tail code word & retrieve previous one + * @lock : Pointer to queued spinlock structure + * @tail : The new queue tail code word + * Return: The previous queue tail code word + * + * xchg(lock, tail) + * + * p,*,* -> n,*,* ; prev =3D xchg(lock, node) + */ +static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) +{ + u32 old, new; + + old =3D atomic_read(&lock->val); + do { + new =3D (old & _Q_LOCKED_PENDING_MASK) | tail; + /* + * We can use relaxed semantics since the caller ensures that + * the MCS node is properly initialized before updating the + * tail. + */ + } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new)); + + return old; +} +#endif /* _Q_PENDING_BITS =3D=3D 8 */ + +/** + * queued_fetch_set_pending_acquire - fetch the whole lock value and set p= ending + * @lock : Pointer to queued spinlock structure + * Return: The previous lock value + * + * *,*,* -> *,1,* + */ +#ifndef queued_fetch_set_pending_acquire +static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlo= ck *lock) +{ + return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); +} +#endif + +/** + * set_locked - Set the lock bit and own the lock + * @lock: Pointer to queued spinlock structure + * + * *,*,0 -> *,0,1 + */ +static __always_inline void set_locked(struct qspinlock *lock) +{ + WRITE_ONCE(lock->locked, _Q_LOCKED_VAL); +} + +#endif /* __LINUX_QSPINLOCK_H */ diff --git a/tools/perf/bench/include/qspinlock.h b/tools/perf/bench/includ= e/qspinlock.h new file mode 100644 index 000000000000..2c5b00121929 --- /dev/null +++ b/tools/perf/bench/include/qspinlock.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Queued spinlock + * + * A 'generic' spinlock implementation that is based on MCS locks. For an + * architecture that's looking for a 'generic' spinlock, please first cons= ider + * ticket-lock.h and only come looking here when you've considered all the + * constraints below and can show your hardware does actually perform bett= er + * with qspinlock. + * + * qspinlock relies on atomic_*_release()/atomic_*_acquire() to be RCsc (o= r no + * weaker than RCtso if you're power), where regular code only expects ato= mic_t + * to be RCpc. + * + * qspinlock relies on a far greater (compared to asm-generic/spinlock.h) = set + * of atomic operations to behave well together, please audit them careful= ly to + * ensure they all have forward progress. Many atomic operations may defau= lt to + * cmpxchg() loops which will not have good forward progress properties on + * LL/SC architectures. + * + * One notable example is atomic_fetch_or_acquire(), which x86 cannot (che= aply) + * do. Carefully read the patches that introduced + * queued_fetch_set_pending_acquire(). + * + * qspinlock also heavily relies on mixed size atomic operations, in speci= fic + * it requires architectures to have xchg16; something which many LL/SC + * architectures need to implement as a 32bit and+or in order to satisfy t= he + * forward progress guarantees mentioned above. + * + * Further reading on mixed size atomics that might be relevant: + * + * http://www.cl.cam.ac.uk/~pes20/popl17/mixed-size.pdf + * + * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. + * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP + * + * Authors: Waiman Long + */ +#ifndef __ASM_GENERIC_QSPINLOCK_H +#define __ASM_GENERIC_QSPINLOCK_H + +#include "qspinlock_types.h" +#include +#include + +#ifndef queued_spin_is_locked +/** + * queued_spin_is_locked - is the spinlock locked? + * @lock: Pointer to queued spinlock structure + * Return: 1 if it is locked, 0 otherwise + */ +static __always_inline int queued_spin_is_locked(struct qspinlock *lock) +{ + /* + * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL + * isn't immediately observable. + */ + return atomic_read(&lock->val); +} +#endif + +/** + * queued_spin_value_unlocked - is the spinlock structure unlocked? + * @lock: queued spinlock structure + * Return: 1 if it is unlocked, 0 otherwise + * + * N.B. Whenever there are tasks waiting for the lock, it is considered + * locked wrt the lockref code to avoid lock stealing by the lockref + * code and change things underneath the lock. This also allows some + * optimizations to be applied without conflict with lockref. + */ +static __always_inline int queued_spin_value_unlocked(struct qspinlock loc= k) +{ + return !lock.val.counter; +} + +/** + * queued_spin_is_contended - check if the lock is contended + * @lock : Pointer to queued spinlock structure + * Return: 1 if lock contended, 0 otherwise + */ +static __always_inline int queued_spin_is_contended(struct qspinlock *lock) +{ + return atomic_read(&lock->val) & ~_Q_LOCKED_MASK; +} +/** + * queued_spin_trylock - try to acquire the queued spinlock + * @lock : Pointer to queued spinlock structure + * Return: 1 if lock acquired, 0 if failed + */ +static __always_inline int queued_spin_trylock(struct qspinlock *lock) +{ + int val =3D atomic_read(&lock->val); + + if (unlikely(val)) + return 0; + + return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)= ); +} + +extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); + +#ifndef queued_spin_lock +/** + * queued_spin_lock - acquire a queued spinlock + * @lock: Pointer to queued spinlock structure + */ +static __always_inline void queued_spin_lock(struct qspinlock *lock) +{ + int val =3D 0; + + if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) + return; + + queued_spin_lock_slowpath(lock, val); +} +#endif + +#ifndef queued_spin_unlock +/** + * queued_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + */ +static __always_inline void queued_spin_unlock(struct qspinlock *lock) +{ + /* + * unlock() needs release semantics: + */ + smp_store_release(&lock->locked, 0); +} +#endif + +#ifndef virt_spin_lock +static __always_inline bool virt_spin_lock(struct qspinlock *lock __maybe_= unused) +{ + return false; +} +#endif + +#ifndef __no_arch_spinlock_redefine +/* + * Remapping spinlock architecture specific functions to the corresponding + * queued spinlock functions. + */ +#define arch_spin_is_locked(l) queued_spin_is_locked(l) +#define arch_spin_is_contended(l) queued_spin_is_contended(l) +#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l) +#define arch_spin_lock(l) queued_spin_lock(l) +#define arch_spin_trylock(l) queued_spin_trylock(l) +#define arch_spin_unlock(l) queued_spin_unlock(l) +#endif + +#endif /* __ASM_GENERIC_QSPINLOCK_H */ diff --git a/tools/perf/bench/include/qspinlock_types.h b/tools/perf/bench/= include/qspinlock_types.h new file mode 100644 index 000000000000..93a959689070 --- /dev/null +++ b/tools/perf/bench/include/qspinlock_types.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Queued spinlock + * + * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. + * + * Authors: Waiman Long + */ +#ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H +#define __ASM_GENERIC_QSPINLOCK_TYPES_H + +#include +#include + +#define CONFIG_NR_CPUS PERCPU_MAX + +typedef struct qspinlock { + union { + atomic_t val; + + /* + * By using the whole 2nd least significant byte for the + * pending bit, we can allow better optimization of the lock + * acquisition for the pending bit holder. + */ +#ifdef __LITTLE_ENDIAN + struct { + u8 locked; + u8 pending; + }; + struct { + u16 locked_pending; + u16 tail; + }; +#else + struct { + u16 tail; + u16 locked_pending; + }; + struct { + u8 reserved[2]; + u8 pending; + u8 locked; + }; +#endif + }; +} arch_spinlock_t; + +/* + * Initializier + */ +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val =3D ATOMIC_INIT(0) } } + +/* + * Bitfields in the atomic value: + * + * When NR_CPUS < 16K + * 0- 7: locked byte + * 8: pending + * 9-15: not used + * 16-17: tail index + * 18-31: tail cpu (+1) + * + * When NR_CPUS >=3D 16K + * 0- 7: locked byte + * 8: pending + * 9-10: tail index + * 11-31: tail cpu (+1) + */ +#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\ + << _Q_ ## type ## _OFFSET) +#define _Q_LOCKED_OFFSET 0 +#define _Q_LOCKED_BITS 8 +#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) + +#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS) +#if CONFIG_NR_CPUS < (1U << 14) +#define _Q_PENDING_BITS 8 +#else +#define _Q_PENDING_BITS 1 +#endif +#define _Q_PENDING_MASK _Q_SET_MASK(PENDING) + +#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS) +#define _Q_TAIL_IDX_BITS 2 +#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX) + +#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS) +#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET) +#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU) + +#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET +#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK) + +#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) +#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET) + +#endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */ diff --git a/tools/perf/bench/qspinlock.c b/tools/perf/bench/qspinlock.c new file mode 100644 index 000000000000..b678dd16b059 --- /dev/null +++ b/tools/perf/bench/qspinlock.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Queued spinlock + * + * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. + * (C) Copyright 2013-2014,2018 Red Hat, Inc. + * (C) Copyright 2015 Intel Corp. + * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP + * + * Authors: Waiman Long + * Peter Zijlstra + */ + +#ifndef _GEN_PV_LOCK_SLOWPATH + +#include +#include +#include +#include +#include +#include "include/qspinlock.h" + +#define lockevent_inc(x) ({}) +#define lockevent_cond_inc(x, y) ({}) +#define trace_contention_begin(x, y) ({}) +#define trace_contention_end(x, y) ({}) + +#define smp_processor_id get_this_cpu_id + +/* + * Include queued spinlock definitions and statistics code + */ +#include "include/qspinlock-private.h" + +/* + * The basic principle of a queue-based spinlock can best be understood + * by studying a classic queue-based spinlock implementation called the + * MCS lock. A copy of the original MCS lock paper ("Algorithms for Scalab= le + * Synchronization on Shared-Memory Multiprocessors by Mellor-Crummey and + * Scott") is available at + * + * https://bugzilla.kernel.org/show_bug.cgi?id=3D206115 + * + * This queued spinlock implementation is based on the MCS lock, however to + * make it fit the 4 bytes we assume spinlock_t to be, and preserve its + * existing API, we must modify it somehow. + * + * In particular; where the traditional MCS lock consists of a tail pointer + * (8 bytes) and needs the next pointer (another 8 bytes) of its own node = to + * unlock the next pending (next->locked), we compress both these: {tail, + * next->locked} into a single u32 value. + * + * Since a spinlock disables recursion of its own context and there is a l= imit + * to the contexts that can nest; namely: task, softirq, hardirq, nmi. As = there + * are at most 4 nesting levels, it can be encoded by a 2-bit number. Now + * we can encode the tail by combining the 2-bit nesting level with the cpu + * number. With one byte for the lock value and 3 bytes for the tail, only= a + * 32-bit word is now needed. Even though we only need 1 bit for the lock, + * we extend it to a full byte to achieve better performance for architect= ures + * that support atomic byte write. + * + * We also change the first spinner to spin on the lock bit instead of its + * node; whereby avoiding the need to carry a node from lock to unlock, and + * preserving existing lock API. This also makes the unlock code simpler a= nd + * faster. + * + * N.B. The current implementation only supports architectures that allow + * atomic operations on smaller 8-bit and 16-bit data types. + * + */ + +#include "include/mcs_spinlock-private.h" + +/* + * Per-CPU queue node structures; we can never have more than 4 nested + * contexts: task, softirq, hardirq, nmi. + * + * Exactly fits one 64-byte cacheline on a 64-bit architecture. + * + * PV doubles the storage and uses the second cacheline for PV state. + */ +DEFINE_PER_CPU_ALIGNED(qnodes); + +/* + * Generate the native code for queued_spin_unlock_slowpath(); provide NOP= s for + * all the PV callbacks. + */ + +static __always_inline void __pv_init_node(struct mcs_spinlock *node __may= be_unused) { } +static __always_inline void __pv_wait_node(struct mcs_spinlock *node __may= be_unused, + struct mcs_spinlock *prev __maybe_unused) { } +static __always_inline void __pv_kick_node(struct qspinlock *lock __maybe_= unused, + struct mcs_spinlock *node __maybe_unused) { } +static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock = __maybe_unused, + struct mcs_spinlock *node __maybe_unused) + { return 0; } + +#define pv_enabled() false + +#define pv_init_node __pv_init_node +#define pv_wait_node __pv_wait_node +#define pv_kick_node __pv_kick_node +#define pv_wait_head_or_lock __pv_wait_head_or_lock + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath +#endif + +#endif /* _GEN_PV_LOCK_SLOWPATH */ + +/** + * queued_spin_lock_slowpath - acquire the queued spinlock + * @lock: Pointer to queued spinlock structure + * @val: Current value of the queued spinlock 32-bit word + * + * (queue tail, pending bit, lock value) + * + * fast : slow : u= nlock + * : : + * uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (= *,*,0) + * : | ^--------.------. / : + * : v \ \ | : + * pending : (0,1,1) +--> (0,1,0) \ | : + * : | ^--' | | : + * : v | | : + * uncontended : (n,x,y) +--> (n,0,0) --' | : + * queue : | ^--' | : + * : v | : + * contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' : + * queue : ^--' : + */ +void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + struct mcs_spinlock *prev, *next, *node; + u32 old, tail; + int idx; + + BUILD_BUG_ON(CONFIG_NR_CPUS >=3D (1U << _Q_TAIL_CPU_BITS)); + + if (pv_enabled()) + goto pv_queue; + + if (virt_spin_lock(lock)) + return; + + /* + * Wait for in-progress pending->locked hand-overs with a bounded + * number of spins so that we guarantee forward progress. + * + * 0,1,0 -> 0,0,1 + */ + if (val =3D=3D _Q_PENDING_VAL) { + int cnt =3D _Q_PENDING_LOOPS; + val =3D atomic_cond_read_relaxed(&lock->val, + (VAL !=3D _Q_PENDING_VAL) || !cnt--); + } + + /* + * If we observe any contention; queue. + */ + if (val & ~_Q_LOCKED_MASK) + goto queue; + + /* + * trylock || pending + * + * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock + */ + val =3D queued_fetch_set_pending_acquire(lock); + + /* + * If we observe contention, there is a concurrent locker. + * + * Undo and queue; our setting of PENDING might have made the + * n,0,0 -> 0,0,0 transition fail and it will now be waiting + * on @next to become !NULL. + */ + if (unlikely(val & ~_Q_LOCKED_MASK)) { + + /* Undo PENDING if we set it. */ + if (!(val & _Q_PENDING_MASK)) + clear_pending(lock); + + goto queue; + } + + /* + * We're pending, wait for the owner to go away. + * + * 0,1,1 -> *,1,0 + * + * this wait loop must be a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because not all + * clear_pending_set_locked() implementations imply full + * barriers. + */ + if (val & _Q_LOCKED_MASK) + smp_cond_load_acquire(&lock->locked, !VAL); + + /* + * take ownership and clear the pending bit. + * + * 0,1,0 -> 0,0,1 + */ + clear_pending_set_locked(lock); + lockevent_inc(lock_pending); + return; + + /* + * End of pending bit optimistic spinning and beginning of MCS + * queuing. + */ +queue: + lockevent_inc(lock_slowpath); +pv_queue: + node =3D this_cpu_ptr(qnodes, qnodes[0].mcs); + idx =3D node->count++; + tail =3D encode_tail(smp_processor_id(), idx); + + trace_contention_begin(lock, LCB_F_SPIN); + + /* + * 4 nodes are allocated based on the assumption that there will + * not be nested NMIs taking spinlocks. That may not be true in + * some architectures even though the chance of needing more than + * 4 nodes will still be extremely unlikely. When that happens, + * we fall back to spinning on the lock directly without using + * any MCS node. This is not the most elegant solution, but is + * simple enough. + */ + if (unlikely(idx >=3D _Q_MAX_NODES)) { + lockevent_inc(lock_no_node); + while (!queued_spin_trylock(lock)) + cpu_relax(); + goto release; + } + + node =3D grab_mcs_node(node, idx); + + /* + * Keep counts of non-zero index values: + */ + lockevent_cond_inc(lock_use_node2 + idx - 1, idx); + + /* + * Ensure that we increment the head node->count before initialising + * the actual node. If the compiler is kind enough to reorder these + * stores, then an IRQ could overwrite our assignments. + */ + barrier(); + + node->locked =3D 0; + node->next =3D NULL; + pv_init_node(node); + + /* + * We touched a (possibly) cold cacheline in the per-cpu queue node; + * attempt the trylock once more in the hope someone let go while we + * weren't watching. + */ + if (queued_spin_trylock(lock)) + goto release; + + /* + * Ensure that the initialisation of @node is complete before we + * publish the updated tail via xchg_tail() and potentially link + * @node into the waitqueue via WRITE_ONCE(prev->next, node) below. + */ + smp_wmb(); + + /* + * Publish the updated tail. + * We have already touched the queueing cacheline; don't bother with + * pending stuff. + * + * p,*,* -> n,*,* + */ + old =3D xchg_tail(lock, tail); + next =3D NULL; + + /* + * if there was a previous node; link it and wait until reaching the + * head of the waitqueue. + */ + if (old & _Q_TAIL_MASK) { + prev =3D decode_tail(old); + + /* Link @node into the waitqueue. */ + WRITE_ONCE(prev->next, node); + + pv_wait_node(node, prev); + arch_mcs_spin_lock_contended(&node->locked); + + /* + * While waiting for the MCS lock, the next pointer may have + * been set by another lock waiter. We optimistically load + * the next pointer & prefetch the cacheline for writing + * to reduce latency in the upcoming MCS unlock operation. + */ + next =3D READ_ONCE(node->next); + if (next) + prefetchw(next); + } + + /* + * we're at the head of the waitqueue, wait for the owner & pending to + * go away. + * + * *,x,y -> *,0,0 + * + * this wait loop must use a load-acquire such that we match the + * store-release that clears the locked bit and create lock + * sequentiality; this is because the set_locked() function below + * does not imply a full barrier. + * + * The PV pv_wait_head_or_lock function, if active, will acquire + * the lock and return a non-zero value. So we have to skip the + * atomic_cond_read_acquire() call. As the next PV queue head hasn't + * been designated yet, there is no way for the locked value to become + * _Q_SLOW_VAL. So both the set_locked() and the + * atomic_cmpxchg_relaxed() calls will be safe. + * + * If PV isn't active, 0 will be returned instead. + * + */ + if ((val =3D pv_wait_head_or_lock(lock, node))) + goto locked; + + val =3D atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MA= SK)); + +locked: + /* + * claim the lock: + * + * n,0,0 -> 0,0,1 : lock, uncontended + * *,*,0 -> *,*,1 : lock, contended + * + * If the queue head is the only one in the queue (lock value =3D=3D tail) + * and nobody is pending, clear the tail code and grab the lock. + * Otherwise, we only need to grab the lock. + */ + + /* + * In the PV case we might already have _Q_LOCKED_VAL set, because + * of lock stealing; therefore we must also allow: + * + * n,0,1 -> 0,0,1 + * + * Note: at this point: (val & _Q_PENDING_MASK) =3D=3D 0, because of the + * above wait condition, therefore any concurrent setting of + * PENDING will make the uncontended transition fail. + */ + if ((val & _Q_TAIL_MASK) =3D=3D tail) { + if (atomic_try_cmpxchg_relaxed(&lock->val, (int *)&val, _Q_LOCKED_VAL)) + goto release; /* No contention */ + } + + /* + * Either somebody is queued behind us or _Q_PENDING_VAL got set + * which will then detect the remaining tail and queue behind us + * ensuring we'll see a @next. + */ + set_locked(lock); + + /* + * contended path; wait for next if not observed yet, release. + */ + if (!next) + next =3D smp_cond_load_relaxed(&node->next, (VAL)); + + arch_mcs_spin_unlock_contended(&next->locked); + pv_kick_node(lock, next); + +release: + trace_contention_end(lock, 0); + + /* + * release the node + */ + __this_cpu_dec(qnodes, qnodes[0].mcs.count); +} + +/* + * Generate the paravirt code for queued_spin_unlock_slowpath(). + */ +#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS) +#define _GEN_PV_LOCK_SLOWPATH + +#undef pv_enabled +#define pv_enabled() true + +#undef pv_init_node +#undef pv_wait_node +#undef pv_kick_node +#undef pv_wait_head_or_lock + +#undef queued_spin_lock_slowpath +#define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath + +#include "qspinlock_paravirt.h" +#include "qspinlock.c" + +bool nopvspin; +static __init int parse_nopvspin(char *arg) +{ + nopvspin =3D true; + return 0; +} +early_param("nopvspin", parse_nopvspin); +#endif diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index be519c433ce4..b827b10e19c1 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -118,6 +118,25 @@ check_2 () { fi } =20 +check_2_sed () { + tools_file=3D$1 + orig_file=3D$2 + sed_cmd=3D"$3" + + shift + shift + shift + + cmd=3D"diff $* <(sed '$sed_cmd' $tools_file) $orig_file > /dev/null" + + if [ -f "$orig_file" ] && ! eval "$cmd" + then + FAILURES+=3D( + "$tools_file $orig_file" + ) + fi +} + check () { file=3D$1 =20 @@ -207,6 +226,19 @@ check_2 tools/perf/arch/parisc/entry/syscalls/syscall.= tbl arch/parisc/entry/sysc check_2 tools/perf/arch/arm64/entry/syscalls/syscall_32.tbl arch/arm64/ent= ry/syscalls/syscall_32.tbl check_2 tools/perf/arch/arm64/entry/syscalls/syscall_64.tbl arch/arm64/ent= ry/syscalls/syscall_64.tbl =20 +# diff qspinlock files +qsl_sed=3D's/ __maybe_unused//' +qsl_common=3D'-I "^#include" -I __percpu -I this_cpu_ -I per_cpu_ -I decod= e_tail \ + -I DECLARE_PER_CPU_ALIGNED -I DEFINE_PER_CPU_ALIGNED -I CONFIG_NR_CPUS -B' +check_2_sed tools/perf/bench/include/qspinlock.h include/asm-generic/qspin= lock.h "$qsl_sed" "$qsl_common" +check_2 tools/perf/bench/include/qspinlock_types.h include/asm-generic/qsp= inlock_types.h "$qsl_common" +check_2 tools/perf/bench/include/mcs_spinlock.h include/asm-generic/mcs_s= pinlock.h +check_2 tools/perf/bench/include/qspinlock-private.h kernel/locking/qspinl= ock.h "$qsl_common" +check_2 tools/perf/bench/include/mcs_spinlock-private.h kernel/locking/mcs= _spinlock.h "$qsl_common" +check_2_sed tools/perf/bench/qspinlock.c kernel/locking/qspinlock.c "$qsl= _sed" \ + "$qsl_common"' -I EXPORT_SYMBOL -I "^#define lockevent_" -I "^#define tra= ce_" \ + -I smp_processor_id -I atomic_try_cmpxchg_relaxed' + for i in "${BEAUTY_FILES[@]}" do beauty_check "$i" -B --=20 2.50.1.487.gc89ff58d15-goog From nobody Sun Oct 5 21:59:57 2025 Received: from mail-pg1-f202.google.com (mail-pg1-f202.google.com [209.85.215.202]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id ADF304A23 for ; Tue, 29 Jul 2025 02:26:55 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.215.202 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756017; cv=none; b=RCIh6OeQRvYPwnnksl7Ppu3f4wYDonI2sVPAlc90OD5hbhAnWKWpUYlilTkdjNGkMn4eX62Rnpq/aGVnwH0Z99zqyfbMhS9eb66PeKDkj9mGExvKWy9mxqs3sB6q/pG/CjTqQaFyxgh0nQwmmSBPZ2WLeDc71iyqXGO0tnu2lhI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756017; c=relaxed/simple; bh=SR0yGjMpjNaiXRA6AQUdbwzU/jeOGsnKSCSE5b9t9Xs=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Content-Type; b=WCyRtGrA65XFPUZy9HndXDoHLHmlcGsxowBXRO2KifGx1Ou9q3FqCqIY28xuP2vgh4fWgXO3414GCWGnxAENtoDD5HUVAgMMpj6h8kSv98d34YLTnmpFmDxgc4+/Ja6ZsXkkln35TkjW3bf+HQqsBsTA1wi3jSy88YkdfRgwQDE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=Q50HViec; arc=none smtp.client-ip=209.85.215.202 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="Q50HViec" Received: by mail-pg1-f202.google.com with SMTP id 41be03b00d2f7-b3beafa8d60so7375229a12.3 for ; Mon, 28 Jul 2025 19:26:55 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1753756015; x=1754360815; darn=vger.kernel.org; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :from:to:cc:subject:date:message-id:reply-to; bh=0IpzWh1GSupEuJD1HvbYqpGAkZZZklj4Nj25KDP0OIM=; b=Q50HVieceozS3MZgef+Un34PxDeRPeZyBZF1xECtyPTTuoPt3fZWWs0OvySCTVhFR+ E3ZMW10uyE//oXoWOR9GkQ3I3u98Fh4SXxaVaGvTKoC8t7pT0BRjMdQBxTMqSi+a7xo1 kuaRjVPcOsxAXjoVfcGUbzj1Eh65l+k4EAE0Rb9dD+iEFodkQ2o9DS0fmcIx9pKCVAFx 0oRhkSUU/IgrzCuPyG3COQt2KFxCBWbux5NBZ7HM/ACNgmrHsiNZmvN1k0ParqMeesB2 4Ou1AYYpPCejuJSecmcgGWiBvxz7zD8/3hXiaMTwOt6b2ygIJULJcfS+7eKFs+THX+yA zbCw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1753756015; x=1754360815; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=0IpzWh1GSupEuJD1HvbYqpGAkZZZklj4Nj25KDP0OIM=; b=uwp67svhhHYTNgXFykUC4dEhd/Y9OBh4vlg8RrrhajAfybwtAKugR+/SVcoBwl04By S9sgTUfLKg+AHClhlUD53ewGWqyq/k/9ueYb4IvzMGNY6bwh4N+qHhUxizlA1lRIKWNW OHs2/OG60uUHfrhCa0LB3TdnVAtHWaPX+90YsyhMUA7qZTLUh3AC60j1LytJk8mvkAER oRVuKXZx2IQG5OAglWYcjH1cVfEtNLnnypqQ76g97zGvuKoifzqzMaMn2M4ep2LzQQjg 87WuOFS1KnhA0AbHcDoszGvVrx0L57yB1luT4VXn9xd5omQac2zNpVH/gmv9GJBzW/NB oawA== X-Forwarded-Encrypted: i=1; AJvYcCUHQbMZ7lMa6dLu07IEIlx7okdeL5CBHLZuNZu8QAzyz0r26maL0WAoFQZaluS2Wc/lSQelD5bA8zUtRcA=@vger.kernel.org X-Gm-Message-State: AOJu0Ywop7kxpvFFneTpJWaVIYqwtakETAst3k8cSBupd+P0oExnwxwP SsKoa7d3VMFn7LcIY3oM6iubZAeOopRcLRn19siusmQrQhEsZ09NDF8WAdOfWHPYLbAAN1bUnAe 7VDiK5Q== X-Google-Smtp-Source: AGHT+IHvM7XsglD+A2nSBVKJqo9GO6rSg0j3vR/q3GCnbggYdamThZ8mgI+B95CYlCTxKhmLXmIRcKjQl5Y= X-Received: from pfbki17.prod.google.com ([2002:a05:6a00:9491:b0:746:2840:68a6]) (user=yuzhuo job=prod-delivery.src-stubby-dispatcher) by 2002:a05:6a20:4310:b0:23d:33ce:bc7a with SMTP id adf61e73a8af0-23d70171e0emr21607108637.23.1753756015101; Mon, 28 Jul 2025 19:26:55 -0700 (PDT) Date: Mon, 28 Jul 2025 19:26:39 -0700 In-Reply-To: <20250729022640.3134066-1-yuzhuo@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250729022640.3134066-1-yuzhuo@google.com> X-Mailer: git-send-email 2.50.1.487.gc89ff58d15-goog Message-ID: <20250729022640.3134066-7-yuzhuo@google.com> Subject: [PATCH v1 6/7] perf bench: Add 'bench sync qspinlock' subcommand From: Yuzhuo Jing To: Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Namhyung Kim , Mark Rutland , Alexander Shishkin , Jiri Olsa , Ian Rogers , Adrian Hunter , Liang Kan , Yuzhuo Jing , Yuzhuo Jing , Andrea Parri , Palmer Dabbelt , Charlie Jenkins , Sebastian Andrzej Siewior , Kumar Kartikeya Dwivedi , Alexei Starovoitov , Barret Rhoden , Alexandre Ghiti , Guo Ren , linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Benchmark kernel queued spinlock implementation in user space. Support settings of the number of threads and the number of acquire/releases. Signed-off-by: Yuzhuo Jing --- tools/perf/bench/Build | 2 + tools/perf/bench/bench.h | 1 + tools/perf/bench/sync.c | 234 +++++++++++++++++++++++++++++++++++++ tools/perf/builtin-bench.c | 7 ++ 4 files changed, 244 insertions(+) create mode 100644 tools/perf/bench/sync.c diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index b558ab98719f..13558279fa0e 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -19,6 +19,8 @@ perf-bench-y +=3D evlist-open-close.o perf-bench-y +=3D breakpoint.o perf-bench-y +=3D pmu-scan.o perf-bench-y +=3D uprobe.o +perf-bench-y +=3D sync.o +perf-bench-y +=3D qspinlock.o =20 perf-bench-$(CONFIG_X86_64) +=3D mem-memcpy-x86-64-asm.o perf-bench-$(CONFIG_X86_64) +=3D mem-memset-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 9f736423af53..dd6c8b6126d3 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -22,6 +22,7 @@ int bench_numa(int argc, const char **argv); int bench_sched_messaging(int argc, const char **argv); int bench_sched_pipe(int argc, const char **argv); int bench_sched_seccomp_notify(int argc, const char **argv); +int bench_sync_qspinlock(int argc, const char **argv); int bench_syscall_basic(int argc, const char **argv); int bench_syscall_getpgid(int argc, const char **argv); int bench_syscall_fork(int argc, const char **argv); diff --git a/tools/perf/bench/sync.c b/tools/perf/bench/sync.c new file mode 100644 index 000000000000..2685cb66584c --- /dev/null +++ b/tools/perf/bench/sync.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Synchronization benchmark. + * + * 2025 Yuzhuo Jing + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bench.h" + +#include "include/qspinlock.h" + +#define NS 1000000000ull +#define CACHELINE_SIZE 64 + +static unsigned int nthreads; +static unsigned long nspins =3D 10000ul; + +struct barrier_t; + +typedef void(*lock_fn)(void *); + +/* + * Lock operation definition to support multiple implmentations of locks. + * + * The lock and unlock functions only take one variable, the data pointer. + */ +struct lock_ops { + lock_fn lock; + lock_fn unlock; + void *data; +}; + +struct worker { + pthread_t thd; + unsigned int tid; + struct lock_ops *ops; + struct barrier_t *barrier; + u64 runtime; // in nanoseconds +}; + +static const struct option options[] =3D { + OPT_UINTEGER('t', "threads", &nthreads, + "Specify number of threads (default: number of CPUs)."), + OPT_ULONG('n', "spins", &nspins, + "Number of lock acquire operations per thread (default: 10,000 times)."), + OPT_END() +}; + +static const char *const bench_sync_usage[] =3D { + "perf bench sync qspinlock ", + NULL +}; + +/* + * A atomic-based barrier. Expect to have lower latency than pthread barr= ier + * that sleeps the thread. + */ +struct barrier_t { + unsigned int count __aligned(CACHELINE_SIZE); +}; + +/* + * A atomic-based barrier. Expect to have lower latency than pthread barr= ier + * that sleeps the thread. + */ +__always_inline void wait_barrier(struct barrier_t *b) +{ + if (__atomic_sub_fetch(&b->count, 1, __ATOMIC_RELAXED) =3D=3D 0) + return; + while (__atomic_load_n(&b->count, __ATOMIC_RELAXED)) + ; +} + +static int bench_sync_lock_generic(struct lock_ops *ops, int argc, const c= har **argv); + +/* + * Benchmark of linux kernel queued spinlock in user land. + */ +int bench_sync_qspinlock(int argc, const char **argv) +{ + struct qspinlock lock =3D __ARCH_SPIN_LOCK_UNLOCKED; + struct lock_ops ops =3D { + .lock =3D (lock_fn)queued_spin_lock, + .unlock =3D (lock_fn)queued_spin_unlock, + .data =3D &lock, + }; + return bench_sync_lock_generic(&ops, argc, argv); +} + +/* + * A busy loop to acquire and release the given lock N times. + */ +static void lock_loop(const struct lock_ops *ops, unsigned long n) +{ + unsigned long i; + + for (i =3D 0; i < n; ++i) { + ops->lock(ops->data); + ops->unlock(ops->data); + } +} + +/* + * Thread worker function. Runs lock loop for N/5 times before and after + * the main timed loop. + */ +static void *sync_workerfn(void *args) +{ + struct worker *worker =3D (struct worker *)args; + struct timespec starttime, endtime; + + set_this_cpu_id(worker->tid); + + /* Barrier to let all threads start together */ + wait_barrier(worker->barrier); + + /* Warmup loop (not counted) to keep the below loop contended. */ + lock_loop(worker->ops, nspins / 5); + + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &starttime); + lock_loop(worker->ops, nspins); + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &endtime); + + /* Tail loop (not counted) to keep the above loop contended. */ + lock_loop(worker->ops, nspins / 5); + + worker->runtime =3D (endtime.tv_sec - starttime.tv_sec) * NS + + endtime.tv_nsec - starttime.tv_nsec; + + return NULL; +} + +/* + * Generic lock synchronization benchmark function. Sets up threads and + * thread affinities. + */ +static int bench_sync_lock_generic(struct lock_ops *ops, int argc, const c= har **argv) +{ + struct perf_cpu_map *online_cpus; + unsigned int online_cpus_nr; + struct worker *workers; + u64 totaltime =3D 0, total_spins, avg_ns, avg_ns_dot; + struct barrier_t barrier; + cpu_set_t *cpuset; + size_t cpuset_size; + + argc =3D parse_options(argc, argv, options, bench_sync_usage, 0); + if (argc) { + usage_with_options(bench_sync_usage, options); + exit(EXIT_FAILURE); + } + + /* CPU count setup. */ + online_cpus =3D perf_cpu_map__new_online_cpus(); + if (!online_cpus) + err(EXIT_FAILURE, "No online CPUs available"); + online_cpus_nr =3D perf_cpu_map__nr(online_cpus); + + if (!nthreads) /* default to the number of CPUs */ + nthreads =3D online_cpus_nr; + + workers =3D calloc(nthreads, sizeof(*workers)); + if (!workers) + err(EXIT_FAILURE, "calloc"); + + barrier.count =3D nthreads; + + printf("Running with %u threads.\n", nthreads); + + cpuset =3D CPU_ALLOC(online_cpus_nr); + if (!cpuset) + err(EXIT_FAILURE, "Cannot allocate cpuset."); + cpuset_size =3D CPU_ALLOC_SIZE(online_cpus_nr); + + /* Create worker data structures, set CPU affinity, and create */ + for (unsigned int i =3D 0; i < nthreads; ++i) { + pthread_attr_t thread_attr; + int ret; + + /* Basic worker thread information */ + workers[i].tid =3D i; + workers[i].barrier =3D &barrier; + workers[i].ops =3D ops; + + /* Set CPU affinity */ + pthread_attr_init(&thread_attr); + CPU_ZERO_S(cpuset_size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(online_cpus, i % online_cpus_nr).cpu, + cpuset_size, cpuset); + + if (pthread_attr_setaffinity_np(&thread_attr, cpuset_size, cpuset)) + err(EXIT_FAILURE, "Pthread set affinity failed"); + + /* Create and block thread */ + ret =3D pthread_create(&workers[i].thd, &thread_attr, sync_workerfn, &wo= rkers[i]); + if (ret !=3D 0) + err(EXIT_FAILURE, "Error creating thread: %s", strerror(ret)); + + pthread_attr_destroy(&thread_attr); + } + + CPU_FREE(cpuset); + + for (unsigned int i =3D 0; i < nthreads; ++i) { + int ret =3D pthread_join(workers[i].thd, NULL); + + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + /* Calculate overall average latency. */ + for (unsigned int i =3D 0; i < nthreads; ++i) + totaltime +=3D workers[i].runtime; + + total_spins =3D (u64)nthreads * nspins; + avg_ns =3D totaltime / total_spins; + avg_ns_dot =3D (totaltime % total_spins) * 10000 / total_spins; + + printf("Lock-unlock latency of %u threads: %"PRIu64".%"PRIu64" ns.\n", + nthreads, avg_ns, avg_ns_dot); + + free(workers); + + return 0; +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 2c1a9f3d847a..cfe6f6dc6ed4 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -52,6 +52,12 @@ static struct bench sched_benchmarks[] =3D { { NULL, NULL, NULL } }; =20 +static struct bench sync_benchmarks[] =3D { + { "qspinlock", "Benchmark for queued spinlock", bench_sync_qspinlock }, + { "all", "Run all synchronization benchmarks", NULL }, + { NULL, NULL, NULL } +}; + static struct bench syscall_benchmarks[] =3D { { "basic", "Benchmark for basic getppid(2) calls", bench_syscall_basic }, { "getpgid", "Benchmark for getpgid(2) calls", bench_syscall_getpgid }, @@ -122,6 +128,7 @@ struct collection { =20 static struct collection collections[] =3D { { "sched", "Scheduler and IPC benchmarks", sched_benchmarks }, + { "sync", "Synchronization benchmarks", sync_benchmarks }, { "syscall", "System call benchmarks", syscall_benchmarks }, { "mem", "Memory access benchmarks", mem_benchmarks }, #ifdef HAVE_LIBNUMA_SUPPORT --=20 2.50.1.487.gc89ff58d15-goog From nobody Sun Oct 5 21:59:57 2025 Received: from mail-pf1-f201.google.com (mail-pf1-f201.google.com [209.85.210.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 64ACE220F20 for ; Tue, 29 Jul 2025 02:26:57 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756019; cv=none; b=Isqy2CoXAw2d/j9LI4xhdaYS4ZHkimwLX01bS2/yJAWCu4GnVUQmQWLquOc2cdqw48IbIUVP9enAJf0V5SoKAQ5IvX8RILwQxl2JxqrsQUVv0a/df9+NoFPhw+SKNRPY+k2yWBUgTwRGmsIthFFNckB+uVvV/+Tpk09yH9+qGCI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753756019; c=relaxed/simple; bh=78XwSvfsUo5qjFzpcoZ2YE5qIEQTQanZ2gjLZQMR2U4=; h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From: To:Content-Type; b=QDroAM+ixLl0dVPnpDCbfQwP/56irc/Nu9JNmKpo6P6eiFiDYXtXswOyWzdj3ntErsT6ovAKeUXDV/7aWu0Y9hkxxPt7I6sTMkEyZ1V7Y390Z+NM8kirBrpGsd98givyrWHvOL0uV9ehtDbZwjkoNcvpha3GNq83QS/Vc86yRPc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b=WReMhAPy; arc=none smtp.client-ip=209.85.210.201 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=google.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=flex--yuzhuo.bounces.google.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=google.com header.i=@google.com header.b="WReMhAPy" Received: by mail-pf1-f201.google.com with SMTP id d2e1a72fcca58-75e28bcec3bso8113041b3a.1 for ; Mon, 28 Jul 2025 19:26:57 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1753756017; x=1754360817; darn=vger.kernel.org; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :from:to:cc:subject:date:message-id:reply-to; bh=lpJZQW2XhZpV9woIMXPe23IdtVvXCXsSMOWR9kV9OUE=; b=WReMhAPyKngyzGyRBpodM2hg4ltB+BKpDYYCyPo9/W66DPi539LagpZ4OU15ZH0N81 hqu+6b/GXf/c8d+7xCq+xYV7XEHxPkvdUvZHHB58Ua1PGC5/s1YGro0KVvunPfenY21H yPcw3mA3P4g5cfgbZH0T85XfwPBPaYx9ybV7MBMpL1sD7DkJKfWKl2BBnG5XSGW8B2DC yBkwSL2AVi9iLCfWjBhzos6j/Oigkw7AFcqZmV5rDK0N12Jz/pXlYSxrlG8jWFvDdHpv bHKTnNfqixNRP14+Ftrc7CrVvYMHn1Ust7wSWIwXo0boERAwURlABFPXGaUa9eJMFMy3 ncLg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1753756017; x=1754360817; h=to:from:subject:message-id:references:mime-version:in-reply-to:date :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=lpJZQW2XhZpV9woIMXPe23IdtVvXCXsSMOWR9kV9OUE=; b=nmLG6Ahrc7SbW+7KE/tj5trSo/6QoSOseyKI1Wrb+AOljc+jcIJreWM4opeOIKH9L+ sX8eIDc08SR+LyIyUNWs+jMQAn9BaMeAzp8dc482yaALLui7IFQW0gQgtCQdYRef2TQc wEO4KzC5Yrdxqysy+/qFEPOJr49tY1Qb9EG6Gyzp/RtCuvbkLI2Kx0V6mELz5X1o7NEh W14b2OO8qLlJWUC7nXxeAwL9+6XDqP45efbcWdHuspEXCrNV91MC+5RbZzZPHc8ubOaR 8rSB5oZvRCFcufwxOFVyUgiiVRtW6uAp69evVxfwTMjGpyovTxXlNgexf8xkUQENGeSf 92LA== X-Forwarded-Encrypted: i=1; AJvYcCVoa94O5pkoexPsG14L1HA9LZd9YKlCB6dUuRcdOVWFxcnF3wOjKFOMLEiSUHrwMiEMPFAz58EVEFDaRdY=@vger.kernel.org X-Gm-Message-State: AOJu0Yy5v63CU8EW8zCBkbv99H19jlRbOYuoMCdJ9GmuAp0ShXtYZBpX oDlegn7IG4AXizVRNAkD9uLyhH4SuVNRjc0ECGffZRaWscfdttaZKNnwEHEE7nXbPaEgPHXjENK LMqaCMw== X-Google-Smtp-Source: AGHT+IF3BgulVEdztgtpO3hFWLE1HkY5w4Qq15DrebDHOcpen7DayFyYTAX+bxT3j4xX+6CCPymj3m5uGog= X-Received: from pfoo28.prod.google.com ([2002:a05:6a00:1a1c:b0:748:f16c:14c5]) (user=yuzhuo job=prod-delivery.src-stubby-dispatcher) by 2002:a05:6a00:10cb:b0:748:f750:14c6 with SMTP id d2e1a72fcca58-76336f1bcb7mr20458767b3a.14.1753756016683; Mon, 28 Jul 2025 19:26:56 -0700 (PDT) Date: Mon, 28 Jul 2025 19:26:40 -0700 In-Reply-To: <20250729022640.3134066-1-yuzhuo@google.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: Mime-Version: 1.0 References: <20250729022640.3134066-1-yuzhuo@google.com> X-Mailer: git-send-email 2.50.1.487.gc89ff58d15-goog Message-ID: <20250729022640.3134066-8-yuzhuo@google.com> Subject: [PATCH v1 7/7] perf bench sync: Add latency histogram functionality From: Yuzhuo Jing To: Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Namhyung Kim , Mark Rutland , Alexander Shishkin , Jiri Olsa , Ian Rogers , Adrian Hunter , Liang Kan , Yuzhuo Jing , Yuzhuo Jing , Andrea Parri , Palmer Dabbelt , Charlie Jenkins , Sebastian Andrzej Siewior , Kumar Kartikeya Dwivedi , Alexei Starovoitov , Barret Rhoden , Alexandre Ghiti , Guo Ren , linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add an option to print the histogram of lock acquire latencies (unit in TSCs). Signed-off-by: Yuzhuo Jing --- tools/perf/bench/sync.c | 97 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/tools/perf/bench/sync.c b/tools/perf/bench/sync.c index 2685cb66584c..c85e9853c72a 100644 --- a/tools/perf/bench/sync.c +++ b/tools/perf/bench/sync.c @@ -15,14 +15,19 @@ #include =20 #include "bench.h" +#include "../util/tsc.h" =20 #include "include/qspinlock.h" =20 #define NS 1000000000ull #define CACHELINE_SIZE 64 =20 +#define DEFAULT_HIST_INTERVAL 1000 + static unsigned int nthreads; static unsigned long nspins =3D 10000ul; +static bool do_hist; +static u64 hist_interval =3D DEFAULT_HIST_INTERVAL; =20 struct barrier_t; =20 @@ -45,6 +50,7 @@ struct worker { struct lock_ops *ops; struct barrier_t *barrier; u64 runtime; // in nanoseconds + u64 *lock_latency; // in TSCs }; =20 static const struct option options[] =3D { @@ -52,6 +58,10 @@ static const struct option options[] =3D { "Specify number of threads (default: number of CPUs)."), OPT_ULONG('n', "spins", &nspins, "Number of lock acquire operations per thread (default: 10,000 times)."), + OPT_BOOLEAN(0, "hist", &do_hist, + "Print a histogram of lock acquire TSCs."), + OPT_U64(0, "hist-interval", &hist_interval, + "Histogram bucket size (default 1,000 TSCs)."), OPT_END() }; =20 @@ -109,6 +119,25 @@ static void lock_loop(const struct lock_ops *ops, unsi= gned long n) } } =20 +/* + * A busy loop to acquire and release the given lock N times, and also col= lect + * all acquire latencies, for histogram use. Note that the TSC operations + * latency itself is also included. + */ +static void lock_loop_timing(const struct lock_ops *ops, unsigned long n, = u64 *sample_buffer) +{ + unsigned long i; + u64 t1, t2; + + for (i =3D 0; i < n; ++i) { + t1 =3D rdtsc(); + ops->lock(ops->data); + t2 =3D rdtsc(); + ops->unlock(ops->data); + sample_buffer[i] =3D t2 - t1; + } +} + /* * Thread worker function. Runs lock loop for N/5 times before and after * the main timed loop. @@ -127,7 +156,10 @@ static void *sync_workerfn(void *args) lock_loop(worker->ops, nspins / 5); =20 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &starttime); - lock_loop(worker->ops, nspins); + if (worker->lock_latency) + lock_loop_timing(worker->ops, nspins, worker->lock_latency); + else + lock_loop(worker->ops, nspins); clock_gettime(CLOCK_THREAD_CPUTIME_ID, &endtime); =20 /* Tail loop (not counted) to keep the above loop contended. */ @@ -139,6 +171,57 @@ static void *sync_workerfn(void *args) return NULL; } =20 +/* + * Calculate and print a histogram. + */ +static void print_histogram(struct worker *workers) +{ + u64 tsc_max =3D 0; + u64 *buckets; + unsigned long nbuckets; + + if (hist_interval =3D=3D 0) + hist_interval =3D DEFAULT_HIST_INTERVAL; + + printf("Lock acquire histogram:\n"); + + /* Calculate the max TSC value to get the number of buckets needed. */ + for (unsigned int i =3D 0; i < nthreads; ++i) { + struct worker *w =3D workers + i; + + for (unsigned long j =3D 0; j < nspins; ++j) + tsc_max =3D max(w->lock_latency[j], tsc_max); + } + nbuckets =3D (tsc_max + hist_interval - 1) / hist_interval; + + /* Allocate the actual bucket. The bucket definition may be optimized + * if it is sparse. + */ + buckets =3D calloc(nbuckets, sizeof(*buckets)); + if (!buckets) + err(EXIT_FAILURE, "calloc"); + + /* Iterate through all latencies again to fill the buckets. */ + for (unsigned int i =3D 0; i < nthreads; ++i) { + struct worker *w =3D workers + i; + + for (unsigned long j =3D 0; j < nspins; ++j) { + u64 latency =3D w->lock_latency[j]; + ++buckets[latency / hist_interval]; + } + } + + /* Print the histogram as a table. */ + printf("Bucket, Count\n"); + for (unsigned long i =3D 0; i < nbuckets; ++i) { + if (buckets[i] =3D=3D 0) + continue; + printf("%"PRIu64", %"PRIu64"\n", hist_interval * (i + 1), buckets[i]); + } + + free(buckets); +} + /* * Generic lock synchronization benchmark function. Sets up threads and * thread affinities. @@ -191,6 +274,12 @@ static int bench_sync_lock_generic(struct lock_ops *op= s, int argc, const char ** workers[i].barrier =3D &barrier; workers[i].ops =3D ops; =20 + if (do_hist) { + workers[i].lock_latency =3D calloc(nspins, sizeof(*workers[i].lock_late= ncy)); + if (!workers[i].lock_latency) + err(EXIT_FAILURE, "calloc"); + } + /* Set CPU affinity */ pthread_attr_init(&thread_attr); CPU_ZERO_S(cpuset_size, cpuset); @@ -228,6 +317,12 @@ static int bench_sync_lock_generic(struct lock_ops *op= s, int argc, const char ** printf("Lock-unlock latency of %u threads: %"PRIu64".%"PRIu64" ns.\n", nthreads, avg_ns, avg_ns_dot); =20 + /* Print histogram if requested. */ + if (do_hist) + print_histogram(workers); + + for (unsigned int i =3D 0; i < nthreads; ++i) + free(workers[i].lock_latency); free(workers); =20 return 0; --=20 2.50.1.487.gc89ff58d15-goog