From nobody Sun Feb 8 18:08:53 2026 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2C27720409C; Mon, 2 Dec 2024 11:14:23 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=193.142.43.55 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1733138066; cv=none; b=N+i95skYH+mTjZ8YMDJIVEJ9Pbw85BzcC2UCc8lWtg5OKy23fsnR6L2y6R3ZE9GwOw5T0Pmnllq2K9/9S1XrA6M9+UIzTGUlIPsQV44kf9qFcRsUGlfGADC9Mfeb2zHW+bl62vr9VeUgCN6szv5G9zG3ToiqQ1VnYsX4n8ZUC4Q= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1733138066; c=relaxed/simple; bh=zZpgJZjIEnoTsRwMvT9tmgI44th/5FrP7wwiYapkIpY=; h=Date:From:To:Subject:Cc:In-Reply-To:References:MIME-Version: Message-ID:Content-Type; b=VK1AlA0IJVR+2SP9DPE7/4aA5CkHFub8OYXDyLOfxrXDA8+dsTouk+wRPjocxUkm+SzrqlETrl5JaBlBC6x5apqLkhL7i7qNRzCpXGrCDd9lPFbeGnVysvsg+CIzeZXjoQPjE5aiu3hV0Cp3YaS4z11Gc8bY7dBvp8eC9FI6buM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de; spf=pass smtp.mailfrom=linutronix.de; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=FVkrUGNm; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=iqoWAmH6; arc=none smtp.client-ip=193.142.43.55 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linutronix.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="FVkrUGNm"; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="iqoWAmH6" Date: Mon, 02 Dec 2024 11:14:21 -0000 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1733138062; h=from:from:sender:sender:reply-to:reply-to:subject:subject:date:date: message-id:message-id:to:to:cc:cc:mime-version:mime-version: content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=3dZ1hjAedscyazHjRXlD8lPbhCABbV4EkknJ45EeKho=; b=FVkrUGNm3PL7DUiWdAoUe2E54056VyLoFobUoTrB9j3Im/r94R5KGQXc5GTrm2m7chQZbA D5uq0vucuWv3QJayNVW6DOLdlBn89AbCOZYmWfFw6SXU253//F6sBBToQSgHDr1AYYQCo7 m0pRW6dYmvq38qUjYcnetvDqWwJDxFItZ+XSL79Kb4S7HPnNuInFiDfWnsasHTB4qsMDoA 4jcOSdZweHxsTXNudxj/QtzDkE5xD4VRyoHOAx0MQWPb5aIC8SZJbI1QyCo9yzQVrsgYv1 x81SESqahIzJRRG0FOymYDM4M2TLaP5E7KKxp2hs2GAYQIsudt2q2lluaWPOkg== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1733138062; h=from:from:sender:sender:reply-to:reply-to:subject:subject:date:date: message-id:message-id:to:to:cc:cc:mime-version:mime-version: content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=3dZ1hjAedscyazHjRXlD8lPbhCABbV4EkknJ45EeKho=; b=iqoWAmH6bkRq0+nGTlsuiufYOXx+bluXZE2gXKaiQT8lWyXTH8KSAUyAixV9ktG+zb+n4J N0h5JGZE12FhnWAw== From: "tip-bot2 for Kan Liang" Sender: tip-bot2@linutronix.de Reply-to: linux-kernel@vger.kernel.org To: linux-tip-commits@vger.kernel.org Subject: [tip: perf/core] perf/x86/intel/ds: Clarify adaptive PEBS processing Cc: Stephane Eranian , Kan Liang , "Peter Zijlstra (Intel)" , x86@kernel.org, linux-kernel@vger.kernel.org In-Reply-To: <20241119135504.1463839-3-kan.liang@linux.intel.com> References: <20241119135504.1463839-3-kan.liang@linux.intel.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Message-ID: <173313806137.412.15982953324191121188.tip-bot2@tip-bot2> Robot-ID: Robot-Unsubscribe: Contact to get blacklisted from these emails Precedence: bulk Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable The following commit has been merged into the perf/core branch of tip: Commit-ID: 7087bfb0adc9a12ec3b463b1d38072c5efce5d6c Gitweb: https://git.kernel.org/tip/7087bfb0adc9a12ec3b463b1d38072c5e= fce5d6c Author: Kan Liang AuthorDate: Tue, 19 Nov 2024 05:55:02 -08:00 Committer: Peter Zijlstra CommitterDate: Mon, 02 Dec 2024 12:01:34 +01:00 perf/x86/intel/ds: Clarify adaptive PEBS processing Modify the pebs_basic and pebs_meminfo structs to make the bitfields more explicit to ease readability of the code. Co-developed-by: Stephane Eranian Signed-off-by: Stephane Eranian Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241119135504.1463839-3-kan.liang@linux.in= tel.com --- arch/x86/events/intel/ds.c | 43 +++++++++++++----------------- arch/x86/include/asm/perf_event.h | 16 +++++++++-- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 34cba39..450f318 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1915,8 +1915,6 @@ static void adaptive_pebs_save_regs(struct pt_regs *r= egs, } =20 #define PEBS_LATENCY_MASK 0xffff -#define PEBS_CACHE_LATENCY_OFFSET 32 -#define PEBS_RETIRE_LATENCY_OFFSET 32 =20 /* * With adaptive PEBS the layout depends on what fields are configured. @@ -1930,8 +1928,7 @@ static void setup_pebs_adaptive_sample_data(struct pe= rf_event *event, struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); struct pebs_basic *basic =3D __pebs; void *next_record =3D basic + 1; - u64 sample_type; - u64 format_size; + u64 sample_type, format_group; struct pebs_meminfo *meminfo =3D NULL; struct pebs_gprs *gprs =3D NULL; struct x86_perf_regs *perf_regs; @@ -1943,7 +1940,7 @@ static void setup_pebs_adaptive_sample_data(struct pe= rf_event *event, perf_regs->xmm_regs =3D NULL; =20 sample_type =3D event->attr.sample_type; - format_size =3D basic->format_size; + format_group =3D basic->format_group; perf_sample_data_init(data, 0, event->hw.last_period); data->period =3D event->hw.last_period; =20 @@ -1964,7 +1961,7 @@ static void setup_pebs_adaptive_sample_data(struct pe= rf_event *event, =20 if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY) - data->weight.var3_w =3D format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEB= S_LATENCY_MASK; + data->weight.var3_w =3D basic->retire_latency; else data->weight.var3_w =3D 0; } @@ -1974,12 +1971,12 @@ static void setup_pebs_adaptive_sample_data(struct = perf_event *event, * But PERF_SAMPLE_TRANSACTION needs gprs->ax. * Save the pointer here but process later. */ - if (format_size & PEBS_DATACFG_MEMINFO) { + if (format_group & PEBS_DATACFG_MEMINFO) { meminfo =3D next_record; next_record =3D meminfo + 1; } =20 - if (format_size & PEBS_DATACFG_GP) { + if (format_group & PEBS_DATACFG_GP) { gprs =3D next_record; next_record =3D gprs + 1; =20 @@ -1992,14 +1989,13 @@ static void setup_pebs_adaptive_sample_data(struct = perf_event *event, adaptive_pebs_save_regs(regs, gprs); } =20 - if (format_size & PEBS_DATACFG_MEMINFO) { + if (format_group & PEBS_DATACFG_MEMINFO) { if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { - u64 weight =3D meminfo->latency; + u64 latency =3D x86_pmu.flags & PMU_FL_INSTR_LATENCY ? + meminfo->cache_latency : meminfo->mem_latency; =20 - if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) { - data->weight.var2_w =3D weight & PEBS_LATENCY_MASK; - weight >>=3D PEBS_CACHE_LATENCY_OFFSET; - } + if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) + data->weight.var2_w =3D meminfo->instr_latency; =20 /* * Although meminfo::latency is defined as a u64, @@ -2007,12 +2003,13 @@ static void setup_pebs_adaptive_sample_data(struct = perf_event *event, * in practice on Ice Lake and earlier platforms. */ if (sample_type & PERF_SAMPLE_WEIGHT) { - data->weight.full =3D weight ?: + data->weight.full =3D latency ?: intel_get_tsx_weight(meminfo->tsx_tuning); } else { - data->weight.var1_dw =3D (u32)(weight & PEBS_LATENCY_MASK) ?: + data->weight.var1_dw =3D (u32)latency ?: intel_get_tsx_weight(meminfo->tsx_tuning); } + data->sample_flags |=3D PERF_SAMPLE_WEIGHT_TYPE; } =20 @@ -2033,16 +2030,16 @@ static void setup_pebs_adaptive_sample_data(struct = perf_event *event, } } =20 - if (format_size & PEBS_DATACFG_XMMS) { + if (format_group & PEBS_DATACFG_XMMS) { struct pebs_xmm *xmm =3D next_record; =20 next_record =3D xmm + 1; perf_regs->xmm_regs =3D xmm->xmm; } =20 - if (format_size & PEBS_DATACFG_LBRS) { + if (format_group & PEBS_DATACFG_LBRS) { struct lbr_entry *lbr =3D next_record; - int num_lbr =3D ((format_size >> PEBS_DATACFG_LBR_SHIFT) + int num_lbr =3D ((format_group >> PEBS_DATACFG_LBR_SHIFT) & 0xff) + 1; next_record =3D next_record + num_lbr * sizeof(struct lbr_entry); =20 @@ -2052,11 +2049,11 @@ static void setup_pebs_adaptive_sample_data(struct = perf_event *event, } } =20 - WARN_ONCE(next_record !=3D __pebs + (format_size >> 48), - "PEBS record size %llu, expected %llu, config %llx\n", - format_size >> 48, + WARN_ONCE(next_record !=3D __pebs + basic->format_size, + "PEBS record size %u, expected %llu, config %llx\n", + basic->format_size, (u64)(next_record - __pebs), - basic->format_size); + format_group); } =20 static inline void * diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_= event.h index d95f902..cb9c467 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -422,7 +422,9 @@ static inline bool is_topdown_idx(int idx) */ =20 struct pebs_basic { - u64 format_size; + u64 format_group:32, + retire_latency:16, + format_size:16; u64 ip; u64 applicable_counters; u64 tsc; @@ -431,7 +433,17 @@ struct pebs_basic { struct pebs_meminfo { u64 address; u64 aux; - u64 latency; + union { + /* pre Alder Lake */ + u64 mem_latency; + /* Alder Lake and later */ + struct { + u64 instr_latency:16; + u64 pad2:16; + u64 cache_latency:16; + u64 pad3:16; + }; + }; u64 tsx_tuning; };