From nobody Sun Jun 28 10:34:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C9A59C433EF for ; Tue, 8 Feb 2022 22:37:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1387248AbiBHWf4 (ORCPT ); Tue, 8 Feb 2022 17:35:56 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37498 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1386835AbiBHVQz (ORCPT ); Tue, 8 Feb 2022 16:16:55 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id DA4BFC0612B8 for ; Tue, 8 Feb 2022 13:16:53 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id z15-20020a25bb0f000000b00613388c7d99so368437ybg.8 for ; Tue, 08 Feb 2022 13:16:53 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=cGNJBKF6PsQgduFsVJ5Gh/suqswt2MaW3a4I4Gi0t+E=; b=Hm7/TufQO9AsHm4IIkCqa7Qhk1A5LQyxNQZSg1lgGy5u6TZA+6FoNJPutQGdwVsQrn CbAXBX2lpvyf47VHBbUAFYYl4O7pBrzflI0Lp5HdaSR697hin6dUk8jPdyvsnJYoVYLo 4GkACrLCH4OWVMPoQErPa7HmmRspR4XvsCoCzEJFR7jBR4Lr+ZhjjhDwj2S9rjC6SVsx 7ljOwCgE862AHqUyL4ATikJ2K/zN4Ftk435/nd/Oqd5shZEw6IKjV6lNU5ZMunhs2fGr f5KBugqWdLtFN8abbg5JzblrH6dgDQNG+QhBfhvkPNqkYQA90BukuPBoS/G7Hu7d9BZ3 1CWw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=cGNJBKF6PsQgduFsVJ5Gh/suqswt2MaW3a4I4Gi0t+E=; b=3tL0+f7Ti94XOSRfKKxBEJ3lEutQhR/vtRCWZVDMRYopMTVjeUAb9E1I3Bm6+tdlKX kaMaivyHtjqaGn79/2lKCTYOMifdDzB19B7OMRK/O3ThcuBAyk2qGJBeYZpy5seq7iJJ QynbPoKv+EBKl+LACYUrU/ptEQkBZ7rfMm99x58RTW8UM5d/z1zAM9o7jP49WP+wZEb/ uXf7FR2kuTuugnubM1kP9Wy2KV9rrB/CucduYUn/6dg+zz8xlRwXWGNQE1JjiMO4flWr TsVitapuwTYLWaTKzQOHIcsUDRUiCub1Xgs16owFV8HyZ49mlWebj6Dq5ROP19jmSOiy /B+Q== X-Gm-Message-State: AOAM5323UEwN6muG1hexQQPXCh1lVL+VDEyuS9LCXJdj3CyU7/cDnnsZ 6n6mYrLQWPxm4eRFycWybr6AWqXuWkPfbNw4eB0Ad5QlfB6HOVyXHODzDDBkzO+87Wc8CsGw8bu KT6JG7V44F7Y8pCLWF4Tbv0YxOMBlHcMG6IKafcPyDx+nTZMro8VYBakc9jVT12B7/BqYgLJw X-Google-Smtp-Source: ABdhPJyBwnf/TcqlVb9aM+QBBfi+sfUmTWUzarAY36/5Ju7kQX8q4LTcWN6yrsR8hBEZiRL1mcwLx89q5PXy X-Received: from uluru3.svl.corp.google.com ([2620:15c:2cd:202:6875:3c51:69be:6e2c]) (user=eranian job=sendgmr) by 2002:a25:9a49:: with SMTP id r9mr6800491ybo.90.1644355013096; Tue, 08 Feb 2022 13:16:53 -0800 (PST) Date: Tue, 8 Feb 2022 13:16:26 -0800 In-Reply-To: <20220208211637.2221872-1-eranian@google.com> Message-Id: <20220208211637.2221872-2-eranian@google.com> Mime-Version: 1.0 References: <20220208211637.2221872-1-eranian@google.com> X-Mailer: git-send-email 2.35.0.263.gb82422642f-goog Subject: [PATCH v6 01/12] perf/core: add perf_clear_branch_entry_bitfields() helper From: Stephane Eranian To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, kim.phillips@amd.com, acme@redhat.com, jolsa@redhat.com, songliubraving@fb.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Make it simpler to reset all the info fields on the perf_branch_entry by adding a helper inline function. The goal is to centralize the initialization to avoid missing a field in case more are added. Signed-off-by: Stephane Eranian --- arch/x86/events/intel/lbr.c | 36 +++++++++++++++++------------------- include/linux/perf_event.h | 16 ++++++++++++++++ 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 669c2be14784..6a903113d3a6 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -769,6 +769,7 @@ void intel_pmu_lbr_disable_all(void) void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) { unsigned long mask =3D x86_pmu.lbr_nr - 1; + struct perf_branch_entry *br =3D cpuc->lbr_entries; u64 tos =3D intel_pmu_lbr_tos(); int i; =20 @@ -784,15 +785,11 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) =20 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); =20 - cpuc->lbr_entries[i].from =3D msr_lastbranch.from; - cpuc->lbr_entries[i].to =3D msr_lastbranch.to; - cpuc->lbr_entries[i].mispred =3D 0; - cpuc->lbr_entries[i].predicted =3D 0; - cpuc->lbr_entries[i].in_tx =3D 0; - cpuc->lbr_entries[i].abort =3D 0; - cpuc->lbr_entries[i].cycles =3D 0; - cpuc->lbr_entries[i].type =3D 0; - cpuc->lbr_entries[i].reserved =3D 0; + perf_clear_branch_entry_bitfields(br); + + br->from =3D msr_lastbranch.from; + br->to =3D msr_lastbranch.to; + br++; } cpuc->lbr_stack.nr =3D i; cpuc->lbr_stack.hw_idx =3D tos; @@ -807,6 +804,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) { bool need_info =3D false, call_stack =3D false; unsigned long mask =3D x86_pmu.lbr_nr - 1; + struct perf_branch_entry *br =3D cpuc->lbr_entries; u64 tos =3D intel_pmu_lbr_tos(); int i; int out =3D 0; @@ -878,15 +876,14 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) if (abort && x86_pmu.lbr_double_abort && out > 0) out--; =20 - cpuc->lbr_entries[out].from =3D from; - cpuc->lbr_entries[out].to =3D to; - cpuc->lbr_entries[out].mispred =3D mis; - cpuc->lbr_entries[out].predicted =3D pred; - cpuc->lbr_entries[out].in_tx =3D in_tx; - cpuc->lbr_entries[out].abort =3D abort; - cpuc->lbr_entries[out].cycles =3D cycles; - cpuc->lbr_entries[out].type =3D 0; - cpuc->lbr_entries[out].reserved =3D 0; + perf_clear_branch_entry_bitfields(br+out); + br[out].from =3D from; + br[out].to =3D to; + br[out].mispred =3D mis; + br[out].predicted =3D pred; + br[out].in_tx =3D in_tx; + br[out].abort =3D abort; + br[out].cycles =3D cycles; out++; } cpuc->lbr_stack.nr =3D out; @@ -951,6 +948,8 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *c= puc, to =3D rdlbr_to(i, lbr); info =3D rdlbr_info(i, lbr); =20 + perf_clear_branch_entry_bitfields(e); + e->from =3D from; e->to =3D to; e->mispred =3D get_lbr_mispred(info); @@ -959,7 +958,6 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *c= puc, e->abort =3D !!(info & LBR_INFO_ABORT); e->cycles =3D get_lbr_cycles(info); e->type =3D get_lbr_br_type(info); - e->reserved =3D 0; } =20 cpuc->lbr_stack.nr =3D i; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 733649184b27..496eb6aa6e54 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1063,6 +1063,22 @@ static inline void perf_sample_data_init(struct perf= _sample_data *data, data->txn =3D 0; } =20 +/* + * Clear all bitfields in the perf_branch_entry. + * The to and from fields are not cleared because they are + * systematically modified by caller. + */ +static inline void perf_clear_branch_entry_bitfields(struct perf_branch_en= try *br) +{ + br->mispred =3D 0; + br->predicted =3D 0; + br->in_tx =3D 0; + br->abort =3D 0; + br->cycles =3D 0; + br->type =3D 0; + br->reserved =3D 0; +} + extern void perf_output_sample(struct perf_output_handle *handle, struct perf_event_header *header, struct perf_sample_data *data, --=20 2.35.0.263.gb82422642f-goog From nobody Sun Jun 28 10:34:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 31A33C433EF for ; Tue, 8 Feb 2022 22:24:32 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1386406AbiBHWY3 (ORCPT ); Tue, 8 Feb 2022 17:24:29 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37510 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1386838AbiBHVQ5 (ORCPT ); Tue, 8 Feb 2022 16:16:57 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B900CC0612B8 for ; Tue, 8 Feb 2022 13:16:56 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id q199-20020a252ad0000000b0061e113c9953so254768ybq.18 for ; Tue, 08 Feb 2022 13:16:56 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=ITTJ1A6lXvS57TZRz+Myy0dfx6nkaF0DnZNJ01aeefk=; b=R8njhaB+c1wmWoG0XOQ9y15iLwF8nqIFMc++mJHiv8K6Y14SMIgc2E5W7t/1d2x4bl jSPfZeX8JfkXAz5jqGvXyDm/WJUh3TIwx/xarNaSTYjsFmPST6lb1vhExTPJbkeSyx5z vBaO6RnGFRPqICmEa5HqbHTo77y3B/tDX8FuI3vWa6zHue7poO/K/8YoYUW+XhpoLMQZ ne9rwQhfZWr40hQw2bx/cJuo4J/55zUrF55PFthaJpnhmtNroU8YooetYYYdjIj0XEcQ nSsKv5o5dsxZQGhj5Ukc6zM+I63aHw+AYe/lljVMAbMKbQnEu/vvkr004fLFxve2E0fi 0jiA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=ITTJ1A6lXvS57TZRz+Myy0dfx6nkaF0DnZNJ01aeefk=; b=Yjhz60UqcriXasOW2+jC3rvs/6vAHwYdZuuhM1xGiq4kaOB2Sy0O05YrTTWOCHE1ap S/1NRbwhJuPZ2IHYajJikCCDj5c7pw1BiJZp1H2+7/u1My4ffwOH8Yxb2ZraTy0btFgn /y1PTuj6mm13crRmaMRR4jmx4PR89wvyErVLj+MPnOSEpzZlVbyEtO+LbKb2M6iVD7fk MTiFLqB+V9kcwV4NpRdxR2xfff8HtXT3W/ZBEN6JRQw3CajYAxLFm2X7zn+XOZuzTZ22 DyaCpZIwe7TaBDmsR1ilWNRcy4rdKn88ZkORCek4u2W/xyvq+F35/f2oxJRwOnL8r+iI 1CDg== X-Gm-Message-State: AOAM530LMMhWPH1G9l8TibvJ6sE7+Al7KWN0QjODm5o902s3W/BKCadO TcVQ/0qx5z1QOsOx/isKvz+E9rZIoAWmMKaWoe9lqEftpXgrVW/9nCTZQIZhrp8Tk64wLPhRn9P psrV/UC4/aIHHxjL7uwFUkI7o2P4lYJRP4buulP9dSzBEsW4Xy6Xgo7R3KQ0cZEVnbf8Vv4ai X-Google-Smtp-Source: ABdhPJxQG0xikzGnUvz90O/VQxJVyp/N/4fcLoZFSygdHqkxNqNdoRbL1nu8C1hVgS2iVkIFX6h3jydmX8b3 X-Received: from uluru3.svl.corp.google.com ([2620:15c:2cd:202:6875:3c51:69be:6e2c]) (user=eranian job=sendgmr) by 2002:a25:2086:: with SMTP id g128mr6605100ybg.759.1644355015927; Tue, 08 Feb 2022 13:16:55 -0800 (PST) Date: Tue, 8 Feb 2022 13:16:27 -0800 In-Reply-To: <20220208211637.2221872-1-eranian@google.com> Message-Id: <20220208211637.2221872-3-eranian@google.com> Mime-Version: 1.0 References: <20220208211637.2221872-1-eranian@google.com> X-Mailer: git-send-email 2.35.0.263.gb82422642f-goog Subject: [PATCH v6 02/12] x86/cpufeatures: add AMD Fam19h Branch Sampling feature From: Stephane Eranian To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, kim.phillips@amd.com, acme@redhat.com, jolsa@redhat.com, songliubraving@fb.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a cpu feature for AMD Fam19h Branch Sampling feature as bit 31 of EBX on CPUID leaf function 0x80000008. Signed-off-by: Stephane Eranian --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpuf= eatures.h index 4cc0ef96152c..a3e895aded02 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -315,6 +315,7 @@ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store= Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass = is fixed in hardware. */ #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performanc= e Control */ +#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ =20 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 1= 4 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ --=20 2.35.0.263.gb82422642f-goog From nobody Sun Jun 28 10:34:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1527BC47082 for ; Tue, 8 Feb 2022 22:34:14 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1388766AbiBHWd3 (ORCPT ); Tue, 8 Feb 2022 17:33:29 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37530 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1386840AbiBHVRB (ORCPT ); Tue, 8 Feb 2022 16:17:01 -0500 Received: from mail-pf1-x449.google.com (mail-pf1-x449.google.com [IPv6:2607:f8b0:4864:20::449]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6DB47C0612B8 for ; Tue, 8 Feb 2022 13:16:59 -0800 (PST) Received: by mail-pf1-x449.google.com with SMTP id bd15-20020a056a00278f00b004c7617c47dbso292017pfb.0 for ; Tue, 08 Feb 2022 13:16:59 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=dWVFAIfTb7kBJhKGfwIwbu/UilYrpip4/ly3fpynVPg=; b=ewPN/sBd6KV2TBeWI0cpJYkLrdXd3XObAxQ/YSCiiqQresqaRQzmA1cPEV8wHANP9I naCD1oALgAaIi0wDzevzU+qFST0cHoK0i0TjP2kQ/hkRGw3kIIXRlBTVxFvSuRC2XJcd BeCjXIJ4RIKBpULIT4WR2ZcQoKdlbERAobE7oQCq07Vdl529Pgbr2M2aDWlCv5aEpYRg ybcFICFBjaqCspbLiTz2P9igGYsZVzjz4FJj8JtGPHnx28HhlvIq5856q6It5kOrKfGU Gnr+qCstlhQRPDvWk2siiN2vCZ+5snDjtSOzxVq6raJ9wuzWxd0lrXjhsdPGKi8zERbw /gXw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=dWVFAIfTb7kBJhKGfwIwbu/UilYrpip4/ly3fpynVPg=; b=I4/fs3nmfJK7VOz1/0EmyxZpz8ZFxIZBLs/QGLaziUyQQ3RXYcaXUoq5XeRKHUogu/ iy5vUM/yEtfuR96AjtDdIzazlp8w1EgLDukrAnbOAZBQrsxBZkZXhiyQ+JA80f6C/14K OGNz60CGWhskghUWcTLx05d2fdRsGvglfvYhT3Ba39pdE858+JAaSA4jVk7tqZ8HVzLz qTVPWzvPmACuKNivN/WPyTcsYROr4MHx8g1Zjfviuir52KK/BHtMz+v6P1It7XPOBfB2 ifZnvVONmJGapf0EbgJ/yAJmh4yGxW/p9beOEggPrc70ORF8eZyV9yM1HugABjBnP5/Q NF+A== X-Gm-Message-State: AOAM533rhxzkbvqs/EueT4WgxFxAx7/hRAELKqug5dRJd/PmkchCaBAu qsrwKQNpr3O63oHZnTOLCdTOdUg4/E8UhQ7QMJU+NqUGWI1odbHR2AAwu0y6gU+lMuSkmkbtpfL 292xFyFuLfFFzHOlKi/+V8O1mfM3/xyuBaYU/1heo7HrqLUO+nVrtIoOFLrgfY7YnnX4nPOU9 X-Google-Smtp-Source: ABdhPJzmPbz3Liv20I104eNqXo6xGke1LxialQ3joR/qjn6cyzxLg9/li7AXccj82Ez2/yDGv7eJKIAsZjRz X-Received: from uluru3.svl.corp.google.com ([2620:15c:2cd:202:6875:3c51:69be:6e2c]) (user=eranian job=sendgmr) by 2002:a62:a512:: with SMTP id v18mr6265903pfm.42.1644355018689; Tue, 08 Feb 2022 13:16:58 -0800 (PST) Date: Tue, 8 Feb 2022 13:16:28 -0800 In-Reply-To: <20220208211637.2221872-1-eranian@google.com> Message-Id: <20220208211637.2221872-4-eranian@google.com> Mime-Version: 1.0 References: <20220208211637.2221872-1-eranian@google.com> X-Mailer: git-send-email 2.35.0.263.gb82422642f-goog Subject: [PATCH v6 03/12] perf/x86/amd: add AMD Fam19h Branch Sampling support From: Stephane Eranian To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, kim.phillips@amd.com, acme@redhat.com, jolsa@redhat.com, songliubraving@fb.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add support for the AMD Fam19h 16-deep branch sampling feature as described in the AMD PPR Fam19h Model 01h Revision B1. This is a model specific extension. It is not an architected AMD feature. The Branch Sampling (BRS) operates with a 16-deep saturating buffer in MSR registers. There is no branch type filtering. All control flow changes are captured. BRS relies on specific programming of the core PMU of Fam19h. In particular, the following requirements must be met: - the sampling period be greater than 16 (BRS depth) - the sampling period must use a fixed and not frequency mode BRS interacts with the NMI interrupt as well. Because enabling BRS is expensive, it is only activated after P event occurrences, where P is the desired sampling period. At P occurrences of the event, the counter overflows, the CPU catches the interrupt, activates BRS for 16 branches unt= il it saturates, and then delivers the NMI to the kernel. Between the overflow and the time BRS activates more branches may be executed skewing the period. All along, the sampling event keeps counting. The skid may be attenuated by reducing the sampling period by 16 (subsequent patch). BRS is integrated into perf_events seamlessly via the same PERF_RECORD_BRANCH_STACK sample format. BRS generates perf_branch_entry records in the sampling buffer. No prediction information is supported. The branches are stored in reverse order of execution. The most recent branch = is the first entry in each record. No modification to the perf tool is necessary. BRS can be used with any sampling event. However, it is recommended to use the RETIRED_BRANCH_INSTRUCTIONS event because it matches what the BRS captures. $ perf record -b -c 1000037 -e cpu/event=3D0xc2,name=3Dret_br_instructions/= test $ perf report -D 56531696056126 0x193c000 [0x1a8]: PERF_RECORD_SAMPLE(IP, 0x2): 18122/18230:= 0x401d24 period: 1000037 addr: 0 ... branch stack: nr:16 ..... 0: 0000000000401d24 -> 0000000000401d5a 0 cycles 0 ..... 1: 0000000000401d5c -> 0000000000401d24 0 cycles 0 ..... 2: 0000000000401d22 -> 0000000000401d5c 0 cycles 0 ..... 3: 0000000000401d5e -> 0000000000401d22 0 cycles 0 ..... 4: 0000000000401d20 -> 0000000000401d5e 0 cycles 0 ..... 5: 0000000000401d3e -> 0000000000401d20 0 cycles 0 ..... 6: 0000000000401d42 -> 0000000000401d3e 0 cycles 0 ..... 7: 0000000000401d3c -> 0000000000401d42 0 cycles 0 ..... 8: 0000000000401d44 -> 0000000000401d3c 0 cycles 0 ..... 9: 0000000000401d3a -> 0000000000401d44 0 cycles 0 ..... 10: 0000000000401d46 -> 0000000000401d3a 0 cycles 0 ..... 11: 0000000000401d38 -> 0000000000401d46 0 cycles 0 ..... 12: 0000000000401d48 -> 0000000000401d38 0 cycles 0 ..... 13: 0000000000401d36 -> 0000000000401d48 0 cycles 0 ..... 14: 0000000000401d4a -> 0000000000401d36 0 cycles 0 ..... 15: 0000000000401d34 -> 0000000000401d4a 0 cycles 0 ... thread: test:18230 ...... dso: test Signed-off-by: Stephane Eranian --- arch/x86/events/amd/Makefile | 2 +- arch/x86/events/amd/brs.c | 317 +++++++++++++++++++++++++++++++ arch/x86/events/amd/core.c | 197 ++++++++++++++++++- arch/x86/events/core.c | 10 +- arch/x86/events/perf_event.h | 101 ++++++++-- arch/x86/include/asm/msr-index.h | 4 + 6 files changed, 609 insertions(+), 22 deletions(-) create mode 100644 arch/x86/events/amd/brs.c diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile index 6cbe38d5fd9d..cf323ffab5cd 100644 --- a/arch/x86/events/amd/Makefile +++ b/arch/x86/events/amd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CPU_SUP_AMD) +=3D core.o +obj-$(CONFIG_CPU_SUP_AMD) +=3D core.o brs.o obj-$(CONFIG_PERF_EVENTS_AMD_POWER) +=3D power.o obj-$(CONFIG_X86_LOCAL_APIC) +=3D ibs.o obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE) +=3D amd-uncore.o diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c new file mode 100644 index 000000000000..3c13c484c637 --- /dev/null +++ b/arch/x86/events/amd/brs.c @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Implement support for AMD Fam19h Branch Sampling feature + * Based on specifications published in AMD PPR Fam19 Model 01 + * + * Copyright 2021 Google LLC + * Contributed by Stephane Eranian + */ +#include +#include +#include + +#include "../perf_event.h" + +#define BRS_POISON 0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */ + +/* Debug Extension Configuration register layout */ +union amd_debug_extn_cfg { + __u64 val; + struct { + __u64 rsvd0:2, /* reserved */ + brsmen:1, /* branch sample enable */ + rsvd4_3:2,/* reserved - must be 0x3 */ + vb:1, /* valid branches recorded */ + rsvd2:10, /* reserved */ + msroff:4, /* index of next entry to write */ + rsvd3:4, /* reserved */ + pmc:3, /* #PMC holding the sampling event */ + rsvd4:37; /* reserved */ + }; +}; + +static inline unsigned int brs_from(int idx) +{ + return MSR_AMD_SAMP_BR_FROM + 2 * idx; +} + +static inline unsigned int brs_to(int idx) +{ + return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1; +} + +static inline void set_debug_extn_cfg(u64 val) +{ + /* bits[4:3] must always be set to 11b */ + wrmsrl(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3); +} + +static inline u64 get_debug_extn_cfg(void) +{ + u64 val; + + rdmsrl(MSR_AMD_DBG_EXTN_CFG, val); + return val; +} + +static bool __init amd_brs_detect(void) +{ + if (!boot_cpu_has(X86_FEATURE_BRS)) + return false; + + switch (boot_cpu_data.x86) { + case 0x19: /* AMD Fam19h (Zen3) */ + x86_pmu.lbr_nr =3D 16; + + /* No hardware filtering supported */ + x86_pmu.lbr_sel_map =3D NULL; + x86_pmu.lbr_sel_mask =3D 0; + break; + default: + return false; + } + + return true; +} + +/* + * Current BRS implementation does not support branch type or privilege le= vel + * filtering. Therefore, this function simply enforces these limitations. = No need for + * a br_sel_map. Software filtering is not supported because it would not = correlate well + * with a sampling period. + */ +int amd_brs_setup_filter(struct perf_event *event) +{ + u64 type =3D event->attr.branch_sample_type; + + /* No BRS support */ + if (!x86_pmu.lbr_nr) + return -EOPNOTSUPP; + + /* Can only capture all branches, i.e., no filtering */ + if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) !=3D PERF_SAMPLE_BRANCH_ANY) + return -EINVAL; + + /* can only capture at all priv levels due to the way BRS works */ + if ((type & PERF_SAMPLE_BRANCH_PLM_ALL) !=3D PERF_SAMPLE_BRANCH_PLM_ALL) + return -EINVAL; + + return 0; +} + +/* tos =3D top of stack, i.e., last valid entry written */ +static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg) +{ + /* + * msroff: index of next entry to write so top-of-stack is one off + * if BRS is full then msroff is set back to 0. + */ + return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1; +} + +/* + * make sure we have a sane BRS offset to begin with + * especially with kexec + */ +void amd_brs_reset(void) +{ + /* + * Reset config + */ + set_debug_extn_cfg(0); + + /* + * Mark first entry as poisoned + */ + wrmsrl(brs_to(0), BRS_POISON); +} + +int __init amd_brs_init(void) +{ + if (!amd_brs_detect()) + return -EOPNOTSUPP; + + pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr); + + return 0; +} + +void amd_brs_enable(void) +{ + struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); + union amd_debug_extn_cfg cfg; + + /* Activate only on first user */ + if (++cpuc->brs_active > 1) + return; + + cfg.val =3D 0; /* reset all fields */ + cfg.brsmen =3D 1; /* enable branch sampling */ + + /* Set enable bit */ + set_debug_extn_cfg(cfg.val); +} + +void amd_brs_enable_all(void) +{ + struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); + if (cpuc->lbr_users) + amd_brs_enable(); +} + +void amd_brs_disable(void) +{ + struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); + union amd_debug_extn_cfg cfg; + + /* Check if active (could be disabled via x86_pmu_disable_all()) */ + if (!cpuc->brs_active) + return; + + /* Only disable for last user */ + if (--cpuc->brs_active) + return; + + /* + * Clear the brsmen bit but preserve the others as they contain + * useful state such as vb and msroff + */ + cfg.val =3D get_debug_extn_cfg(); + + /* + * When coming in on interrupt and BRS is full, then hw will have + * already stopped BRS, no need to issue wrmsr again + */ + if (cfg.brsmen) { + cfg.brsmen =3D 0; + set_debug_extn_cfg(cfg.val); + } +} + +void amd_brs_disable_all(void) +{ + struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); + if (cpuc->lbr_users) + amd_brs_disable(); +} + +/* + * Caller must ensure amd_brs_inuse() is true before calling + * return: + */ +void amd_brs_drain(void) +{ + struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); + struct perf_event *event =3D cpuc->events[0]; + struct perf_branch_entry *br =3D cpuc->lbr_entries; + union amd_debug_extn_cfg cfg; + u32 i, nr =3D 0, num, tos, start; + u32 shift =3D 64 - boot_cpu_data.x86_virt_bits; + + /* + * BRS event forced on PMC0, + * so check if there is an event. + * It is possible to have lbr_users > 0 but the event + * not yet scheduled due to long latency PMU irq + */ + if (!event) + goto empty; + + cfg.val =3D get_debug_extn_cfg(); + + /* Sanity check [0-x86_pmu.lbr_nr] */ + if (WARN_ON_ONCE(cfg.msroff >=3D x86_pmu.lbr_nr)) + goto empty; + + /* No valid branch */ + if (cfg.vb =3D=3D 0) + goto empty; + + /* + * msr.off points to next entry to be written + * tos =3D most recent entry index =3D msr.off - 1 + * BRS register buffer saturates, so we know we have + * start < tos and that we have to read from start to tos + */ + start =3D 0; + tos =3D amd_brs_get_tos(&cfg); + + num =3D tos - start + 1; + + /* + * BRS is only one pass (saturation) from MSROFF to depth-1 + * MSROFF wraps to zero when buffer is full + */ + for (i =3D 0; i < num; i++) { + u32 brs_idx =3D tos - i; + u64 from, to; + + rdmsrl(brs_to(brs_idx), to); + + /* Entry does not belong to us (as marked by kernel) */ + if (to =3D=3D BRS_POISON) + break; + + rdmsrl(brs_from(brs_idx), from); + + /* + * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved. + * Necessary to generate proper virtual addresses suitable for + * symbolization + */ + to =3D (u64)(((s64)to << shift) >> shift); + + perf_clear_branch_entry_bitfields(br+nr); + + br[nr].from =3D from; + br[nr].to =3D to; + + nr++; + } +empty: + /* Record number of sampled branches */ + cpuc->lbr_stack.nr =3D nr; +} + +/* + * Poison most recent entry to prevent reuse by next task + * required because BRS entry are not tagged by PID + */ +static void amd_brs_poison_buffer(void) +{ + union amd_debug_extn_cfg cfg; + unsigned int idx; + + /* Get current state */ + cfg.val =3D get_debug_extn_cfg(); + + /* idx is most recently written entry */ + idx =3D amd_brs_get_tos(&cfg); + + /* Poison target of entry */ + wrmsrl(brs_to(idx), BRS_POISON); +} + +/* + * On context switch in, we need to make sure no samples from previous user + * are left in the BRS. + * + * On ctxswin, sched_in =3D true, called after the PMU has started + * On ctxswout, sched_in =3D false, called before the PMU is stopped + */ +void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); + + /* no active users */ + if (!cpuc->lbr_users) + return; + + /* + * On context switch in, we need to ensure we do not use entries + * from previous BRS user on that CPU, so we poison the buffer as + * a faster way compared to resetting all entries. + */ + if (sched_in) + amd_brs_poison_buffer(); +} diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 9687a8aef01c..44d8f618bb3e 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -327,6 +327,8 @@ static inline bool amd_is_pair_event_code(struct hw_per= f_event *hwc) =20 static int amd_core_hw_config(struct perf_event *event) { + int ret =3D 0; + if (event->attr.exclude_host && event->attr.exclude_guest) /* * When HO =3D=3D GO =3D=3D 1 the hardware treats that as GO =3D=3D HO = =3D=3D 0 @@ -343,7 +345,32 @@ static int amd_core_hw_config(struct perf_event *event) if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw)) event->hw.flags |=3D PERF_X86_EVENT_PAIR; =20 - return 0; + /* + * if branch stack is requested + */ + if (has_branch_stack(event) && is_sampling_event(event)) { + /* + * BRS implementation does not work with frequency mode + * reprogramming of the period. + */ + if (event->attr.freq) + return -EINVAL; + /* + * The kernel subtracts BRS depth from period, so it must be big enough + */ + if (event->attr.sample_period <=3D x86_pmu.lbr_nr) + return -EINVAL; + + /* + * Check if we can allow PERF_SAMPLE_BRANCH_STACK + */ + ret =3D amd_brs_setup_filter(event); + + /* only set in case of success */ + if (!ret) + event->hw.flags |=3D PERF_X86_EVENT_AMD_BRS; + } + return ret; } =20 static inline int amd_is_nb_event(struct hw_perf_event *hwc) @@ -366,7 +393,7 @@ static int amd_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip && get_ibs_caps()) return -ENOENT; =20 - if (has_branch_stack(event)) + if (has_branch_stack(event) && !x86_pmu.lbr_nr) return -EOPNOTSUPP; =20 ret =3D x86_pmu_hw_config(event); @@ -555,6 +582,8 @@ static void amd_pmu_cpu_starting(int cpu) =20 cpuc->amd_nb->nb_id =3D nb_id; cpuc->amd_nb->refcnt++; + + amd_brs_reset(); } =20 static void amd_pmu_cpu_dead(int cpu) @@ -610,6 +639,8 @@ static void amd_pmu_disable_all(void) struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); int idx; =20 + amd_brs_disable_all(); + x86_pmu_disable_all(); =20 /* @@ -634,6 +665,30 @@ static void amd_pmu_disable_all(void) } } =20 +static void amd_pmu_enable_event(struct perf_event *event) +{ + x86_pmu_enable_event(event); +} + +static void amd_pmu_enable_all(int added) +{ + struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc; + int idx; + + amd_brs_enable_all(); + + for (idx =3D 0; idx < x86_pmu.num_counters; idx++) { + hwc =3D &cpuc->events[idx]->hw; + + /* only activate events which are marked as active */ + if (!test_bit(idx, cpuc->active_mask)) + continue; + + amd_pmu_enable_event(cpuc->events[idx]); + } +} + static void amd_pmu_disable_event(struct perf_event *event) { x86_pmu_disable_event(event); @@ -651,6 +706,18 @@ static void amd_pmu_disable_event(struct perf_event *e= vent) amd_pmu_wait_on_overflow(event->hw.idx); } =20 +static void amd_pmu_add_event(struct perf_event *event) +{ + if (needs_branch_stack(event)) + amd_pmu_brs_add(event); +} + +static void amd_pmu_del_event(struct perf_event *event) +{ + if (needs_branch_stack(event)) + amd_pmu_brs_del(event); +} + /* * Because of NMI latency, if multiple PMC counters are active or other so= urces * of NMIs are received, the perf NMI handler can handle one or more overf= lowed @@ -671,11 +738,31 @@ static void amd_pmu_disable_event(struct perf_event *= event) */ static int amd_pmu_handle_irq(struct pt_regs *regs) { + struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); int handled; + int pmu_enabled; + + /* + * Save the PMU state. + * It needs to be restored when leaving the handler. + */ + pmu_enabled =3D cpuc->enabled; + cpuc->enabled =3D 0; + + /* stop everything (includes BRS) */ + amd_pmu_disable_all(); + + /* Drain BRS is in use (could be inactive) */ + if (cpuc->lbr_users) + amd_brs_drain(); =20 /* Process any counter overflows */ handled =3D x86_pmu_handle_irq(regs); =20 + cpuc->enabled =3D pmu_enabled; + if (pmu_enabled) + amd_pmu_enable_all(0); + /* * If a counter was handled, record a timestamp such that un-handled * NMIs will be claimed if arriving within that window. @@ -897,6 +984,51 @@ static void amd_put_event_constraints_f17h(struct cpu_= hw_events *cpuc, --cpuc->n_pair; } =20 +/* + * Because of the way BRS operates with an inactive and active phases, and + * the link to one counter, it is not possible to have two events using BRS + * scheduled at the same time. There would be an issue with enforcing the + * period of each one and given that the BRS saturates, it would not be po= ssible + * to guarantee correlated content for all events. Therefore, in situations + * where multiple events want to use BRS, the kernel enforces mutual exclu= sion. + * Exclusion is enforced by chosing only one counter for events using BRS. + * The event scheduling logic will then automatically multiplex the + * events and ensure that at most one event is actively using BRS. + * + * The BRS counter could be any counter, but there is no constraint on Fam= 19h, + * therefore all counters are equal and thus we pick the first one: PMC0 + */ +static struct event_constraint amd_fam19h_brs_cntr0_constraint =3D + EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK); + +static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint =3D + __EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAI= R); + +static struct event_constraint * +amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct hw_perf_event *hwc =3D &event->hw; + bool has_brs =3D has_amd_brs(hwc); + + /* + * In case BRS is used with an event requiring a counter pair, + * the kernel allows it but only on counter 0 & 1 to enforce + * multiplexing requiring to protect BRS in case of multiple + * BRS users + */ + if (amd_is_pair_event_code(hwc)) { + return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint + : &pair_constraint; + } + + if (has_brs) + return &amd_fam19h_brs_cntr0_constraint; + + return &unconstrained; +} + + static ssize_t amd_event_sysfs_show(char *page, u64 config) { u64 event =3D (config & ARCH_PERFMON_EVENTSEL_EVENT) | @@ -905,12 +1037,19 @@ static ssize_t amd_event_sysfs_show(char *page, u64 = config) return x86_event_sysfs_show(page, config, event); } =20 +static void amd_pmu_sched_task(struct perf_event_context *ctx, + bool sched_in) +{ + if (sched_in && x86_pmu.lbr_nr) + amd_pmu_brs_sched_task(ctx, sched_in); +} + static __initconst const struct x86_pmu amd_pmu =3D { .name =3D "AMD", .handle_irq =3D amd_pmu_handle_irq, .disable_all =3D amd_pmu_disable_all, - .enable_all =3D x86_pmu_enable_all, - .enable =3D x86_pmu_enable_event, + .enable_all =3D amd_pmu_enable_all, + .enable =3D amd_pmu_enable_event, .disable =3D amd_pmu_disable_event, .hw_config =3D amd_pmu_hw_config, .schedule_events =3D x86_schedule_events, @@ -920,6 +1059,8 @@ static __initconst const struct x86_pmu amd_pmu =3D { .event_map =3D amd_pmu_event_map, .max_events =3D ARRAY_SIZE(amd_perfmon_event_map), .num_counters =3D AMD64_NUM_COUNTERS, + .add =3D amd_pmu_add_event, + .del =3D amd_pmu_del_event, .cntval_bits =3D 48, .cntval_mask =3D (1ULL << 48) - 1, .apic =3D 1, @@ -938,6 +1079,37 @@ static __initconst const struct x86_pmu amd_pmu =3D { .amd_nb_constraints =3D 1, }; =20 +static ssize_t branches_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr); +} + +static DEVICE_ATTR_RO(branches); + +static struct attribute *amd_pmu_brs_attrs[] =3D { + &dev_attr_branches.attr, + NULL, +}; + +static umode_t +amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.lbr_nr ? attr->mode : 0; +} + +static struct attribute_group group_caps_amd_brs =3D { + .name =3D "caps", + .attrs =3D amd_pmu_brs_attrs, + .is_visible =3D amd_brs_is_visible, +}; + +static const struct attribute_group *amd_attr_update[] =3D { + &group_caps_amd_brs, + NULL, +}; + static int __init amd_core_pmu_init(void) { u64 even_ctr_mask =3D 0ULL; @@ -989,6 +1161,23 @@ static int __init amd_core_pmu_init(void) x86_pmu.flags |=3D PMU_FL_PAIR; } =20 + if (boot_cpu_data.x86 >=3D 0x19) { + /* + * On AMD, invoking pmu_disable_all() is very expensive and the function= is + * invoked on context-switch in via sched_task_in(), so enable only when= necessary + */ + if (!amd_brs_init()) { + x86_pmu.get_event_constraints =3D amd_get_event_constraints_f19h; + x86_pmu.sched_task =3D amd_pmu_sched_task; + /* + * The put_event_constraints callback is shared with + * Fam17h, set above + */ + } + } + + x86_pmu.attr_update =3D amd_attr_update; + pr_cont("core perfctr, "); return 0; } diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index e686c5e0537b..c2a890caeb0a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1338,6 +1338,10 @@ static void x86_pmu_enable(struct pmu *pmu) if (hwc->state & PERF_HES_ARCH) continue; =20 + /* + * if cpuc->enabled =3D 0, then no wrmsr as + * per x86_pmu_enable_event() + */ x86_pmu_start(event, PERF_EF_RELOAD); } cpuc->n_added =3D 0; @@ -1704,11 +1708,15 @@ int x86_pmu_handle_irq(struct pt_regs *regs) * event overflow */ handled++; - perf_sample_data_init(&data, 0, event->hw.last_period); =20 if (!x86_perf_event_set_period(event)) continue; =20 + perf_sample_data_init(&data, 0, event->hw.last_period); + + if (has_branch_stack(event)) + data.br_stack =3D &cpuc->lbr_stack; + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 150261d929b9..3485a4cf0241 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -67,22 +67,23 @@ static inline bool constraint_match(struct event_constr= aint *c, u64 ecode) /* * struct hw_perf_event.flags flags */ -#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling= */ -#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */ -#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */ -#define PERF_X86_EVENT_PEBS_LD_HSW 0x0008 /* haswell style datala, load */ -#define PERF_X86_EVENT_PEBS_NA_HSW 0x0010 /* haswell style datala, unknown= */ -#define PERF_X86_EVENT_EXCL 0x0020 /* HT exclusivity on counter */ -#define PERF_X86_EVENT_DYNAMIC 0x0040 /* dynamic alloc'd constraint */ - -#define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */ -#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */ -#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */ -#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */ -#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */ -#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */ -#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics even= ts */ -#define PERF_X86_EVENT_PEBS_STLAT 0x8000 /* st+stlat data address sampling= */ +#define PERF_X86_EVENT_PEBS_LDLAT 0x00001 /* ld+ldlat data address samplin= g */ +#define PERF_X86_EVENT_PEBS_ST 0x00002 /* st data address sampling */ +#define PERF_X86_EVENT_PEBS_ST_HSW 0x00004 /* haswell style datala, store = */ +#define PERF_X86_EVENT_PEBS_LD_HSW 0x00008 /* haswell style datala, load */ +#define PERF_X86_EVENT_PEBS_NA_HSW 0x00010 /* haswell style datala, unknow= n */ +#define PERF_X86_EVENT_EXCL 0x00020 /* HT exclusivity on counter */ +#define PERF_X86_EVENT_DYNAMIC 0x00040 /* dynamic alloc'd constraint */ + +#define PERF_X86_EVENT_EXCL_ACCT 0x00100 /* accounted EXCL event */ +#define PERF_X86_EVENT_AUTO_RELOAD 0x00200 /* use PEBS auto-reload */ +#define PERF_X86_EVENT_LARGE_PEBS 0x00400 /* use large PEBS */ +#define PERF_X86_EVENT_PEBS_VIA_PT 0x00800 /* use PT buffer for PEBS */ +#define PERF_X86_EVENT_PAIR 0x01000 /* Large Increment per Cycle */ +#define PERF_X86_EVENT_LBR_SELECT 0x02000 /* Save/Restore MSR_LBR_SELECT */ +#define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics eve= nts */ +#define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address samplin= g */ +#define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */ =20 static inline bool is_topdown_count(struct perf_event *event) { @@ -325,6 +326,8 @@ struct cpu_hw_events { * AMD specific bits */ struct amd_nb *amd_nb; + int brs_active; /* BRS is enabled */ + /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ u64 perf_ctr_virt_mask; int n_pair; /* Large increment events */ @@ -1105,6 +1108,11 @@ int x86_pmu_hw_config(struct perf_event *event); =20 void x86_pmu_disable_all(void); =20 +static inline bool has_amd_brs(struct hw_perf_event *hwc) +{ + return hwc->flags & PERF_X86_EVENT_AMD_BRS; +} + static inline bool is_counter_pair(struct hw_perf_event *hwc) { return hwc->flags & PERF_X86_EVENT_PAIR; @@ -1210,6 +1218,50 @@ static inline bool fixed_counter_disabled(int i, str= uct pmu *pmu) #ifdef CONFIG_CPU_SUP_AMD =20 int amd_pmu_init(void); +int amd_brs_init(void); +void amd_brs_disable(void); +void amd_brs_enable(void); +void amd_brs_enable_all(void); +void amd_brs_disable_all(void); +void amd_brs_drain(void); +void amd_brs_disable_all(void); +int amd_brs_setup_filter(struct perf_event *event); +void amd_brs_reset(void); + +static inline void amd_pmu_brs_add(struct perf_event *event) +{ + struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); + + perf_sched_cb_inc(event->ctx->pmu); + cpuc->lbr_users++; + /* + * No need to reset BRS because it is reset + * on brs_enable() and it is saturating + */ +} + +static inline void amd_pmu_brs_del(struct perf_event *event) +{ + struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); + + cpuc->lbr_users--; + WARN_ON_ONCE(cpuc->lbr_users < 0); + + perf_sched_cb_dec(event->ctx->pmu); +} + +void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in); + +/* + * check if BRS is activated on the CPU + * active defined as it has non-zero users and DBG_EXT_CFG.BRSEN=3D1 + */ +static inline bool amd_brs_active(void) +{ + struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); + + return cpuc->brs_active; +} =20 #else /* CONFIG_CPU_SUP_AMD */ =20 @@ -1218,6 +1270,23 @@ static inline int amd_pmu_init(void) return 0; } =20 +static inline int amd_brs_init(void) +{ + return 0; +} + +static inline void amd_brs_drain(void) +{ +} + +static inline void amd_brs_enable_all(void) +{ +} + +static inline void amd_brs_disable_all(void) +{ +} + #endif /* CONFIG_CPU_SUP_AMD */ =20 static inline int is_pebs_pt(struct perf_event *event) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-in= dex.h index 3faf0f97edb1..d44bc769dd6f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -667,6 +667,10 @@ #define MSR_IA32_PERF_CTL 0x00000199 #define INTEL_PERF_CTL_MASK 0xffff =20 +/* AMD Branch Sampling configuration */ +#define MSR_AMD_DBG_EXTN_CFG 0xc000010f +#define MSR_AMD_SAMP_BR_FROM 0xc0010300 + #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 =20 --=20 2.35.0.263.gb82422642f-goog From nobody Sun Jun 28 10:34:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C3A44C433EF for ; Tue, 8 Feb 2022 22:34:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1387644AbiBHWbY (ORCPT ); Tue, 8 Feb 2022 17:31:24 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37544 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1386846AbiBHVRD (ORCPT ); Tue, 8 Feb 2022 16:17:03 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5E5C8C0612B8 for ; Tue, 8 Feb 2022 13:17:02 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id a12-20020a056902056c00b0061dc0f2a94aso387895ybt.6 for ; Tue, 08 Feb 2022 13:17:02 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=hpYRwQLRBekfx+PCqt9/1HSAYQriRfUkssje8sUgZSw=; b=sOK4RMt818cUZPOEOHeyV19TBhn5uBFBKquSjktW5SYy6qRp9nBkRB82fA/OVrQjbT zau9QpGFLq0wy5gqkLyktdJ/aVwh/ojsgDGOtbvxJZf1hkv5G/7vXYBwFrpS1Uq3HNEI ue/kBM5PmsKiJMkyb6+pSVEC8gVxrSvyPnpj1zhvFbVmCaT4SVVJ1VUniIDOkdaBGCZi RhIurQT6LcLl3Db57CNeL4P9lc7NX4fzwSg5TwmKlwme9XdHYekJeq5d55zAR6qnAf84 nkcHMYvnUwAWz1l4fHySz7vdZYVT+Vh4z/3jNBoPfLTrPKiZeUlfQT4zZ9AE9hlG2A++ ftCQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=hpYRwQLRBekfx+PCqt9/1HSAYQriRfUkssje8sUgZSw=; b=hGti+he40Usil3qtJAxTfBSNRVnKBk3Kc+TiGa8agcqc7HYWqfeXslM+nm2PK3X94L v5zLsONo9PnK74Dbi60CsLCBRndO20sWP/V9wLaTKPsytDkaCPTsrB0zbhORjh13pVtN SP+1EGlt7fu5yX242mSzgfD0t++7XRhYoYoI9ziAS+nq4cxi4Ny3sZbHVnrtVmvVw6bB xZ3/YxjgM52uzej41gaoGAgP9BZGzzJaXBtRyiO9mkdQDxhK2Y5WE0LbiI3+HNqki8IG Tc6dLhs3ph6T74HhBBTa3XJN8aHCXrptJo+RSDtoekf+xTCH4Caa6ABIrnVNBJm1GSR/ wNIA== X-Gm-Message-State: AOAM5308KorsR5YhbklsNIe+JUUGWymk6ZSpd7vAa/jX8jJ9S/2KH/Je Iq98h3PtLG/m11xmi8bnuZ/fPvYCD2CrIbGz3l7cCovQkPJAzfncKMHdCds0oHaXEWXq3HQQnZJ +H1m6lt6dl6LkgEsg4d2Uefnc0LmqMFGNjR0i9pSzlX8I4lGUjS/SNKZ1WXYjjGkWeWZ37XEl X-Google-Smtp-Source: ABdhPJz4tSKHW4S5Dd8IpdL9PG0xNiIntmhIVsFrDAE+HRTSNUI+YCx8meMBdWzz6F4B/Kf9Z3Kclf0o1xzY X-Received: from uluru3.svl.corp.google.com ([2620:15c:2cd:202:6875:3c51:69be:6e2c]) (user=eranian job=sendgmr) by 2002:a25:e803:: with SMTP id k3mr6732621ybd.571.1644355021597; Tue, 08 Feb 2022 13:17:01 -0800 (PST) Date: Tue, 8 Feb 2022 13:16:29 -0800 In-Reply-To: <20220208211637.2221872-1-eranian@google.com> Message-Id: <20220208211637.2221872-5-eranian@google.com> Mime-Version: 1.0 References: <20220208211637.2221872-1-eranian@google.com> X-Mailer: git-send-email 2.35.0.263.gb82422642f-goog Subject: [PATCH v6 04/12] perf/x86/amd: add branch-brs helper event for Fam19h BRS From: Stephane Eranian To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, kim.phillips@amd.com, acme@redhat.com, jolsa@redhat.com, songliubraving@fb.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a pseudo event called branch-brs to help use the FAM Fam19h Branch Sampling feature (BRS). BRS samples taken branches, so it is best us= ed when sampling on a retired taken branch event (0xc4) which is what BRS captures. Instead of trying to remember the event code or actual event nam= e, users can simply do: $ perf record -b -e cpu/branch-brs/ -c 1000037 ..... Signed-off-by: Stephane Eranian --- arch/x86/events/amd/core.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 44d8f618bb3e..597defee1e02 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -1105,8 +1105,24 @@ static struct attribute_group group_caps_amd_brs =3D= { .is_visible =3D amd_brs_is_visible, }; =20 +#define AMD_FAM19H_BRS_EVENT 0xc4 /* Fam19h RETIRED_TAKEN_BRANCH_INSTRUCTI= ONS */ +EVENT_ATTR_STR(branch-brs, amd_branch_brs, + "event=3D" __stringify(AMD_FAM19H_BRS_EVENT)"\n"); + +static struct attribute *amd_brs_events_attrs[] =3D { + EVENT_PTR(amd_branch_brs), + NULL, +}; + +static struct attribute_group group_events_amd_brs =3D { + .name =3D "events", + .attrs =3D amd_brs_events_attrs, + .is_visible =3D amd_brs_is_visible, +}; + static const struct attribute_group *amd_attr_update[] =3D { &group_caps_amd_brs, + &group_events_amd_brs, NULL, }; =20 --=20 2.35.0.263.gb82422642f-goog From nobody Sun Jun 28 10:34:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0894EC47086 for ; Tue, 8 Feb 2022 22:34:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1389276AbiBHWeA (ORCPT ); Tue, 8 Feb 2022 17:34:00 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37556 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1386847AbiBHVRF (ORCPT ); Tue, 8 Feb 2022 16:17:05 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 207B8C0612B8 for ; Tue, 8 Feb 2022 13:17:05 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id b187-20020a251bc4000000b0061e15c5024fso190758ybb.4 for ; Tue, 08 Feb 2022 13:17:05 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=IkSsAEe4tLZ7sPXk1cQDbdyI/ymKu70QdfkWphwdEME=; b=b79Z7mrt1T2rqjlQtvxG17JrcAsR5iLV5BcMxiBFygqW3qVgCsA77QfwZtD2vuQU7J cfEe2GM7sD1ayD+2/wyB3JjaXRE357xbtHNQozxQOmj5kA08Q2eBZFr8f9eWB00WxvdE udxuQSNKJXvYjAudABDXGuC60jf3ZP8UEmtoqgfLstZdnVHRo9AnvDrKwu631Kx1Ojgl s0uBonnRONcL6fZ3DMCpxqDiNnVw++0Z6Ps+n3yCQuis5Jj620sfzQVc8+yk1UH1OeIH fgYU0b9rkBxh4jfG1TjuBFZq17+JRhxD1O+H/GD7jyesjOw7sUJZOxbl0djhjp4T8fRn BQiQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=IkSsAEe4tLZ7sPXk1cQDbdyI/ymKu70QdfkWphwdEME=; b=Ca6jWelzds8UEnn7aljyAvB6z+whHVovwsJE5i8uL5Iu3+xGpdX/5sowxVB6o93inl v++/ont6AeKQ+B+z8dDM65kNvltulh1D58HOqeWNFsXFcuh3IxyuVBgI5jtPpmdnp3ve mrI6ycW7WPnuIrU7HVCpNsyS0N+e4G9Z/qtJuFbZCcq7t//Gf90sEme8YfR7Q5UEshO9 1mKuxekkmIps+tJGqXWz22Mhm5B6ULizax5Rp2m5zsLDc7LQyjdltbRPlhjIWQR1a/6R bnN0Z3t4+xiZLrrMXTcU9TeIyR2F3aHbyS62N2c6Emoibhbc0wgrK3xbfy1+T/ms4mYP 8feg== X-Gm-Message-State: AOAM533fqBvWz3Y3ZykSDTnuE+cMdt31Rd7kWcCoq7gd/31QSeJKzDUU jPkhPOzxNlJBLjKrwMDLZfVAgVeip2KmKDAV3SH1I/gEKjrdL8BlWrAnf2Aq2dKLFzDkzKtR1GG HdLj8MR/6LdWVKnFMdlDw68BcKJiDjhqM2uO7K6fw6HqS8WoYH0ZNaHQhSjqxU/IjiwOxLaNG X-Google-Smtp-Source: ABdhPJxTVkfw8EifXuKk44o0hXOLcZKl9tvUeVIC23vGxANJlD9t8VEaIHcuXL1EJCskdf6PGZH7c2cLpSSQ X-Received: from uluru3.svl.corp.google.com ([2620:15c:2cd:202:6875:3c51:69be:6e2c]) (user=eranian job=sendgmr) by 2002:a25:4d5:: with SMTP id 204mr6406319ybe.485.1644355024283; Tue, 08 Feb 2022 13:17:04 -0800 (PST) Date: Tue, 8 Feb 2022 13:16:30 -0800 In-Reply-To: <20220208211637.2221872-1-eranian@google.com> Message-Id: <20220208211637.2221872-6-eranian@google.com> Mime-Version: 1.0 References: <20220208211637.2221872-1-eranian@google.com> X-Mailer: git-send-email 2.35.0.263.gb82422642f-goog Subject: [PATCH v6 05/12] perf/x86/amd: enable branch sampling priv level filtering From: Stephane Eranian To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, kim.phillips@amd.com, acme@redhat.com, jolsa@redhat.com, songliubraving@fb.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The AMD Branch Sampling features does not provide hardware filtering by privilege level. The associated PMU counter does but not the branch sampling by itself. Given how BRS operates there is a possibility that BRS captures kernel level branches even though the event is programmed to count only at the user level. Implement a workaround in software by removing the branches which belong to the wrong privilege level. The privilege level is evaluated on the target of the branch and not the source so as to be compatible with other architectur= es. As a consequence of this patch, the number of entries in the PERF_RECORD_BRANCH_STACK buffer may be less than the maximum (16). It could even be zero. Another consequence is that consecutive entries in the branch stack may not reflect actual code path and may have discontinuities, in case kernel branches were suppressed. But this is no different than what happens on other architectures. Signed-off-by: Stephane Eranian --- arch/x86/events/amd/brs.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c index 3c13c484c637..40461c3ce714 100644 --- a/arch/x86/events/amd/brs.c +++ b/arch/x86/events/amd/brs.c @@ -92,10 +92,6 @@ int amd_brs_setup_filter(struct perf_event *event) if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) !=3D PERF_SAMPLE_BRANCH_ANY) return -EINVAL; =20 - /* can only capture at all priv levels due to the way BRS works */ - if ((type & PERF_SAMPLE_BRANCH_PLM_ALL) !=3D PERF_SAMPLE_BRANCH_PLM_ALL) - return -EINVAL; - return 0; } =20 @@ -195,6 +191,21 @@ void amd_brs_disable_all(void) amd_brs_disable(); } =20 +static bool amd_brs_match_plm(struct perf_event *event, u64 to) +{ + int type =3D event->attr.branch_sample_type; + int plm_k =3D PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV; + int plm_u =3D PERF_SAMPLE_BRANCH_USER; + + if (!(type & plm_k) && kernel_ip(to)) + return 0; + + if (!(type & plm_u) && !kernel_ip(to)) + return 0; + + return 1; +} + /* * Caller must ensure amd_brs_inuse() is true before calling * return: @@ -252,8 +263,6 @@ void amd_brs_drain(void) if (to =3D=3D BRS_POISON) break; =20 - rdmsrl(brs_from(brs_idx), from); - /* * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved. * Necessary to generate proper virtual addresses suitable for @@ -261,6 +270,11 @@ void amd_brs_drain(void) */ to =3D (u64)(((s64)to << shift) >> shift); =20 + if (!amd_brs_match_plm(event, to)) + continue; + + rdmsrl(brs_from(brs_idx), from); + perf_clear_branch_entry_bitfields(br+nr); =20 br[nr].from =3D from; --=20 2.35.0.263.gb82422642f-goog From nobody Sun Jun 28 10:34:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 895C3C3527E for ; Tue, 8 Feb 2022 22:34:14 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1388977AbiBHWds (ORCPT ); Tue, 8 Feb 2022 17:33:48 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37570 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1386853AbiBHVRI (ORCPT ); Tue, 8 Feb 2022 16:17:08 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D3B05C0612B8 for ; Tue, 8 Feb 2022 13:17:07 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id a12-20020a056902056c00b0061dc0f2a94aso388597ybt.6 for ; Tue, 08 Feb 2022 13:17:07 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=l7SK3hpl2tjo8fmJkEKtw/l+MWg699GiBeiSDOMnjGI=; b=M7vZM4ZREjaUtmHzubGbUso3C4ozlbjNKZqvghtgVrz9MmcgRU5etkyKJsvmcr8Q3r q6lyfZsEdfSK29U50aZ0pOWW0gUYabrBzKqxUHhNHzojj9Zh0kSJIcoz7SPZcV77RwB1 MrAkKurRMn9nbC/21+6kuVSn21Y3j2RKG8rKMETkOJElB2MVKLSnJVOeL4kou9dxPOP/ UVFWo0i8QPdL+5pu/INhcuHnJULb1D6hCry/NBXMrFQ4DYhwSzSUWykWM0qIyukeutWg GRJPF1MDCd/xxB8xzJyFDfbJqIBLLcc9sFJP+75ZRsid889rfjjEyIkZLMr/HC5/7s7x sw/A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=l7SK3hpl2tjo8fmJkEKtw/l+MWg699GiBeiSDOMnjGI=; b=F1y4S5485ZX340nu+1R0NcHNB/gFOcnMTsnVoQUyrGDMJq0mKHgQIX6m67PyOOwpnx nkdeh6v3ShkB5fVEqhcR4sAszORaXJbNUlUx4vLPeBpzNQ43J3c/Fzd4R9Mrlj10P3q3 gmp+P/IhcdIqMMEaIUCGpAYS5X1r9v1lfyrQdlcsCvXK8bNo07YDUHKja/IMghuWQMJM NC50iVENq6Jo8+F930e0yf/mewgWX9/YbBG/q0QL7ZaROLxw5He9WP8JNdlIGhSp4k0q L5w271SPHSz3pVz/f4EYng5IlSvQ4uJz3kbh40zUffY9fE8zqox/FOLfoDA3tZ6jdyMA YMCQ== X-Gm-Message-State: AOAM530P7NEBvv2owE8Pcjlb4wBm/x3QaorI+2ok1G1SJDVHJKX9PGIe KVKFdLwQtUZnum5Hd0lSzGH8CnG16f66qNb1ITFchtdWZ9K5X+oYOXjxMaXAELUFI/sgaK5CWxI F7OrDKNza9lznkQwhD1o5jiy+O6AFkQNYPb/KxQIuwQQial+0bDLSwxBGGPOZVEGdN4igRc1r X-Google-Smtp-Source: ABdhPJww16HAN32Z6YUGCbf+r3wgG+Y5yNKVjcEJ3h20rpY3mB6H5B3KgHoziz1vex98CAy2Q8lFajjLbHw/ X-Received: from uluru3.svl.corp.google.com ([2620:15c:2cd:202:6875:3c51:69be:6e2c]) (user=eranian job=sendgmr) by 2002:a81:af54:: with SMTP id x20mr4840845ywj.337.1644355027029; Tue, 08 Feb 2022 13:17:07 -0800 (PST) Date: Tue, 8 Feb 2022 13:16:31 -0800 In-Reply-To: <20220208211637.2221872-1-eranian@google.com> Message-Id: <20220208211637.2221872-7-eranian@google.com> Mime-Version: 1.0 References: <20220208211637.2221872-1-eranian@google.com> X-Mailer: git-send-email 2.35.0.263.gb82422642f-goog Subject: [PATCH v6 06/12] perf/x86/amd: add AMD branch sampling period adjustment From: Stephane Eranian To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, kim.phillips@amd.com, acme@redhat.com, jolsa@redhat.com, songliubraving@fb.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add code to adjust the sampling event period when used with the Branch Sampling feature (BRS). Given the depth of the BRS (16), the period is reduced by that depth such that in the best case scenario, BRS saturates at the desired sampling period. In practice, though, the processor may execute more branches. Given a desired period P and a depth D, the kernel programs the actual period at P - D. After P occurrences of the sampling event, the counter overflows. It then may take X branches (skid) before the NMI is caught and held by the hardware and BRS activates. Then, after D branches, BRS saturates and the NMI is delivered. With no skid, the effective period would be (P - D) + D =3D P. In practice, however, it will likely be (P - D)= + X + D. There is no way to eliminate X or predict X. Signed-off-by: Stephane Eranian --- arch/x86/events/core.c | 7 +++++++ arch/x86/events/perf_event.h | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index c2a890caeb0a..ed285f640efe 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1374,6 +1374,13 @@ int x86_perf_event_set_period(struct perf_event *eve= nt) x86_pmu.set_topdown_event_period) return x86_pmu.set_topdown_event_period(event); =20 + /* + * decrease period by the depth of the BRS feature to get + * the last N taken branches and approximate the desired period + */ + if (has_branch_stack(event)) + period =3D amd_brs_adjust_period(period); + /* * If we are way outside a reasonable range then just skip forward: */ diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 3485a4cf0241..25b037b571e4 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1263,6 +1263,14 @@ static inline bool amd_brs_active(void) return cpuc->brs_active; } =20 +static inline s64 amd_brs_adjust_period(s64 period) +{ + if (period > x86_pmu.lbr_nr) + return period - x86_pmu.lbr_nr; + + return period; +} + #else /* CONFIG_CPU_SUP_AMD */ =20 static inline int amd_pmu_init(void) @@ -1287,6 +1295,10 @@ static inline void amd_brs_disable_all(void) { } =20 +static inline s64 amd_brs_adjust_period(s64 period) +{ + return period; +} #endif /* CONFIG_CPU_SUP_AMD */ =20 static inline int is_pebs_pt(struct perf_event *event) --=20 2.35.0.263.gb82422642f-goog From nobody Sun Jun 28 10:34:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7A6D4C35273 for ; Tue, 8 Feb 2022 22:34:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1388260AbiBHWch (ORCPT ); Tue, 8 Feb 2022 17:32:37 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37596 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1386854AbiBHVRN (ORCPT ); Tue, 8 Feb 2022 16:17:13 -0500 Received: from mail-pl1-x649.google.com (mail-pl1-x649.google.com [IPv6:2607:f8b0:4864:20::649]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B4A8BC0612B8 for ; Tue, 8 Feb 2022 13:17:10 -0800 (PST) Received: by mail-pl1-x649.google.com with SMTP id 4-20020a170902c20400b0014d61c5a5b1so302826pll.14 for ; Tue, 08 Feb 2022 13:17:10 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=dEWqa6ll67+XS8zqG5upDAQcRA944Vkil3rbd3Ql4r0=; b=sVdAXqy5YQYAyluM+87YKfNCyBV41r86ejBj1FZy0iA4BBvN7GyaIF7fUMqCc+x/pR 8VaW5oS5h4lk16chyDzoA1ASUeb97LlgAQ009oc06roZ37sE3wf8pUuVgPP4ZcaAR4hZ vJ6X9lVX6jY7pSAHG4tInnGftXISZvlNNqCYV2cYohESxGprVC4FN/GYHZ9RfRjg7zR4 n0wwD1Gj8Q0EQzGErPr+CURkEsZcDXmOjfcJuz4KVCfLdbWVJFyMoFfLaUyeXRekAzm1 1H6Ebs+yaye61ncsW9WC5qeOfNfFO9mVZ5IaiW6YkiAT3ecuOrNST+6t8Mp7gk9Wyo2R fO9A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=dEWqa6ll67+XS8zqG5upDAQcRA944Vkil3rbd3Ql4r0=; b=JcE94RaSMDa+3ldSc1bFKrbUPpP81irz3UjZ1SpGXn8S1sXByQstvRAHjailH86lmH W8K4m3I3MCaxBBYfDnCWBMrztvAapUzC2VP+zNuR67j2zwHu32Fpg1WQWLUDN0DoLvUv 7i+BGQF6q4ZkWoVr/OrSDrj1gNdWfCTKrkutvga527MSJjmaCvE/tnXYwPdP4TYLgu0U cAy33MP6raefnhg4fl4eKCSTXWh/8Fxlmv6X15hz3pdushWPucwxpj72OBx3/GlAVnUJ LEVV9R5sd+J5fBHnX8zPcdyPaB30kj6V30aU/ftutRiZah0FtM7pMRmYNaC9OwPFU6wI 8HRQ== X-Gm-Message-State: AOAM530p0Ck6NpBT/qeAeOAo6jKBxs7/YYTmCq9a5WP9Adm//uTVT0SW KMp9JPVdtgH/JeQARkScfLZjoif7lMIt26nrDMaNVSO8HDUbFJBobf4EpjTVHN1C6j+bf8HSh5b J/voAvP9V//4xvzVeTZ516sERKC3jZjYdrcIOvHAzd++xtsq2qiMHKqIoQU8RjgbMEpmdjSBC X-Google-Smtp-Source: ABdhPJwHX2WZBSwLZnHLKP/cSYAhGuwYChAF9oD04o7EmDNAEjoAD27twBuQi5uJjrewhDa+XJlVUiGvJuSJ X-Received: from uluru3.svl.corp.google.com ([2620:15c:2cd:202:6875:3c51:69be:6e2c]) (user=eranian job=sendgmr) by 2002:a17:902:6a83:: with SMTP id n3mr6441644plk.139.1644355030018; Tue, 08 Feb 2022 13:17:10 -0800 (PST) Date: Tue, 8 Feb 2022 13:16:32 -0800 In-Reply-To: <20220208211637.2221872-1-eranian@google.com> Message-Id: <20220208211637.2221872-8-eranian@google.com> Mime-Version: 1.0 References: <20220208211637.2221872-1-eranian@google.com> X-Mailer: git-send-email 2.35.0.263.gb82422642f-goog Subject: [PATCH v6 07/12] perf/x86/amd: make Zen3 branch sampling opt-in From: Stephane Eranian To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, kim.phillips@amd.com, acme@redhat.com, jolsa@redhat.com, songliubraving@fb.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a kernel config option CONFIG_PERF_EVENTS_AMD_BRS to make the support for AMD Zen3 Branch Sampling (BRS) an opt-in compile time option. Signed-off-by: Stephane Eranian --- arch/x86/events/Kconfig | 8 ++++++ arch/x86/events/amd/Makefile | 3 ++- arch/x86/events/perf_event.h | 49 ++++++++++++++++++++++++++++-------- 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig index d6cdfe631674..09c56965750a 100644 --- a/arch/x86/events/Kconfig +++ b/arch/x86/events/Kconfig @@ -44,4 +44,12 @@ config PERF_EVENTS_AMD_UNCORE =20 To compile this driver as a module, choose M here: the module will be called 'amd-uncore'. + +config PERF_EVENTS_AMD_BRS + depends on PERF_EVENTS && CPU_SUP_AMD + bool "AMD Zen3 Branch Sampling support" + help + Enable AMD Zen3 branch sampling support (BRS) which samples up to + 16 consecutive taken branches in registers. + endmenu diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile index cf323ffab5cd..b9f5d4610256 100644 --- a/arch/x86/events/amd/Makefile +++ b/arch/x86/events/amd/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CPU_SUP_AMD) +=3D core.o brs.o +obj-$(CONFIG_CPU_SUP_AMD) +=3D core.o +obj-$(CONFIG_PERF_EVENTS_AMD_BRS) +=3D brs.o obj-$(CONFIG_PERF_EVENTS_AMD_POWER) +=3D power.o obj-$(CONFIG_X86_LOCAL_APIC) +=3D ibs.o obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE) +=3D amd-uncore.o diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 25b037b571e4..4d050579dcbd 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1218,6 +1218,8 @@ static inline bool fixed_counter_disabled(int i, stru= ct pmu *pmu) #ifdef CONFIG_CPU_SUP_AMD =20 int amd_pmu_init(void); + +#ifdef CONFIG_PERF_EVENTS_AMD_BRS int amd_brs_init(void); void amd_brs_disable(void); void amd_brs_enable(void); @@ -1252,25 +1254,52 @@ static inline void amd_pmu_brs_del(struct perf_even= t *event) =20 void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in); =20 -/* - * check if BRS is activated on the CPU - * active defined as it has non-zero users and DBG_EXT_CFG.BRSEN=3D1 - */ -static inline bool amd_brs_active(void) +static inline s64 amd_brs_adjust_period(s64 period) { - struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); + if (period > x86_pmu.lbr_nr) + return period - x86_pmu.lbr_nr; =20 - return cpuc->brs_active; + return period; +} +#else +static inline int amd_brs_init(void) +{ + return 0; } +static inline void amd_brs_disable(void) {} +static inline void amd_brs_enable(void) {} +static inline void amd_brs_drain(void) {} +static inline void amd_brs_lopwr_init(void) {} +static inline void amd_brs_disable_all(void) {} +static inline int amd_brs_setup_filter(struct perf_event *event) +{ + return 0; +} +static inline void amd_brs_reset(void) {} =20 -static inline s64 amd_brs_adjust_period(s64 period) +static inline void amd_pmu_brs_add(struct perf_event *event) { - if (period > x86_pmu.lbr_nr) - return period - x86_pmu.lbr_nr; +} + +static inline void amd_pmu_brs_del(struct perf_event *event) +{ +} + +static inline void amd_pmu_brs_sched_task(struct perf_event_context *ctx, = bool sched_in) +{ +} =20 +static inline s64 amd_brs_adjust_period(s64 period) +{ return period; } =20 +static inline void amd_brs_enable_all(void) +{ +} + +#endif + #else /* CONFIG_CPU_SUP_AMD */ =20 static inline int amd_pmu_init(void) --=20 2.35.0.263.gb82422642f-goog From nobody Sun Jun 28 10:34:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 58C9BC4321E for ; Tue, 8 Feb 2022 22:34:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1387934AbiBHWb5 (ORCPT ); Tue, 8 Feb 2022 17:31:57 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37610 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1386858AbiBHVRO (ORCPT ); Tue, 8 Feb 2022 16:17:14 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A24FDC0612B8 for ; Tue, 8 Feb 2022 13:17:13 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id s73-20020a25aa4f000000b0061d764d3c13so453028ybi.1 for ; Tue, 08 Feb 2022 13:17:13 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=4dR1yNXtmyr78XdAPm6ZMGJLtvP6soDdLw1TSPN/FsI=; b=Lrj1tvUasnQmtz5WlTQQZ+9yjaP4dfMnodiJ+cdc9TVnGgD+jiZkyTV1ILXS7CXJ2n 2JW9VQmmg2xrspIg+IA9NKrIVZ3S8MvlVjsxcipXTlz4M/sDb9vFKioxlrQLh3ujav47 bA4YWHFqx3mNZodX4jPA3QenKxCjrEci+xMD+soLPt51+d9r/v3BrtapNaI14XjX9W0s C9gYXs1pQuq5m3CWtnw+ENKZVFu3FCQpr8mMsfpqw5TdtPLZkxCEKQDD6BJU3KuIpJiX 0zV6Zlj6C7ii79ob9tsAVzCJ7M8bmNP1JDhfW0TwQb/NiFYQewuwEHMZRi/QqycTh3L7 eAmw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=4dR1yNXtmyr78XdAPm6ZMGJLtvP6soDdLw1TSPN/FsI=; b=bckyKYRlEcSpzxl9R8OR+CQfGNaCpWQofRi/NA1eowDB0wzjOxoqkxQPtsMC8ReX1P bsvIQ68nK2QME8SsjGqwZgs3Sil4xuSdFwa6poIYx+JIZcKwC1k901iQeXxd5K2VMo6e 34Csq8XwfC34Xk+Yi3yFvRyj/YSiq+T574NPMQbVBWOoVKP1/51Yg/YxkEJXb+JnWi1T FYb1VRG0MFMgfFC/f8y7qXi989fnAmOOfLWNBnwja3Em3mh1DZNni4NDViwYhH3GgSdD hb34cBaX+j3wAFawMVyXZSfUcJjeyHG2dPOiL4r0VfNg9stIoXkQzSdGvMYWGZ20Wok/ 3NJA== X-Gm-Message-State: AOAM533Wo3g8t9vSEYinCb9VgQvNDzA5vOZT/6bk0Rn9Tw5GVgoEGF6B DnIclOEbdMIZU4oCwp21ZjvEmB4rtsa9n4jxsauNBs0MXT0UQ45R2ya3m4RtCnvl2PlGZP9s2n9 AA4pyS97TRodCW9xDzffc3yioDdhKflwapwk8fEXCqhOoHQpef/GYNkDr350eXRoSVSQALUZ5 X-Google-Smtp-Source: ABdhPJyl9fK0SIMuiVRlhtI77KWnRTLqUU5cOlzF7J9P/p0y4WhEaMJnqOPrg56OWApJUCep4obNtjt/2ZgM X-Received: from uluru3.svl.corp.google.com ([2620:15c:2cd:202:6875:3c51:69be:6e2c]) (user=eranian job=sendgmr) by 2002:a25:c5c4:: with SMTP id v187mr6638764ybe.643.1644355032864; Tue, 08 Feb 2022 13:17:12 -0800 (PST) Date: Tue, 8 Feb 2022 13:16:33 -0800 In-Reply-To: <20220208211637.2221872-1-eranian@google.com> Message-Id: <20220208211637.2221872-9-eranian@google.com> Mime-Version: 1.0 References: <20220208211637.2221872-1-eranian@google.com> X-Mailer: git-send-email 2.35.0.263.gb82422642f-goog Subject: [PATCH v6 08/12] ACPI: add perf low power callback From: Stephane Eranian To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, kim.phillips@amd.com, acme@redhat.com, jolsa@redhat.com, songliubraving@fb.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add an optional callback needed by some PMU features, e.g., AMD BRS, to give a chance to the perf_events code to change its state before a CPU goes to low power and after it comes back. The callback is void when the PERF_NEEDS_LOPWR_CB flag is not set. This flag must be set in arch specific perf_event.h header whenever needed. When not set, there is no impact on the ACPI code. Signed-off-by: Stephane Eranian --- drivers/acpi/acpi_pad.c | 6 ++++++ drivers/acpi/processor_idle.c | 5 +++++ include/linux/perf_event.h | 6 ++++++ 3 files changed, 17 insertions(+) diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index f45979aa2d64..a306a07a60b5 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -164,6 +164,9 @@ static int power_saving_thread(void *data) tsc_marked_unstable =3D 1; } local_irq_disable(); + + perf_lopwr_cb(true); + tick_broadcast_enable(); tick_broadcast_enter(); stop_critical_timings(); @@ -172,6 +175,9 @@ static int power_saving_thread(void *data) =20 start_critical_timings(); tick_broadcast_exit(); + + perf_lopwr_cb(false); + local_irq_enable(); =20 if (time_before(expire_time, jiffies)) { diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 86560a28751b..880c0a43a529 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -21,6 +21,7 @@ #include #include #include +#include #include =20 /* @@ -544,6 +545,8 @@ static void wait_for_freeze(void) */ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx) { + perf_lopwr_cb(true); + if (cx->entry_method =3D=3D ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cx); @@ -554,6 +557,8 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_pr= ocessor_cx *cx) inb(cx->address); wait_for_freeze(); } + + perf_lopwr_cb(false); } =20 /** diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 496eb6aa6e54..1b98e46588bc 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1676,4 +1676,10 @@ typedef int (perf_snapshot_branch_stack_t)(struct pe= rf_branch_entry *entries, unsigned int cnt); DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack= _t); =20 +#ifndef PERF_NEEDS_LOPWR_CB +static inline void perf_lopwr_cb(bool mode) +{ +} +#endif + #endif /* _LINUX_PERF_EVENT_H */ --=20 2.35.0.263.gb82422642f-goog From nobody Sun Jun 28 10:34:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0F3D3C43217 for ; Tue, 8 Feb 2022 22:37:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1388293AbiBHWgQ (ORCPT ); Tue, 8 Feb 2022 17:36:16 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37654 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1386861AbiBHVRR (ORCPT ); Tue, 8 Feb 2022 16:17:17 -0500 Received: from mail-pf1-x44a.google.com (mail-pf1-x44a.google.com [IPv6:2607:f8b0:4864:20::44a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BB0B2C0612BC for ; Tue, 8 Feb 2022 13:17:16 -0800 (PST) Received: by mail-pf1-x44a.google.com with SMTP id i16-20020aa78d90000000b004be3e88d746so234301pfr.13 for ; Tue, 08 Feb 2022 13:17:16 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=uTDmTBSOOY683ot1kd61hkpiMB3wxXOtomvwYLlA3dQ=; b=Cx0XtYRxcH4rbHL4wV+lB2OzYKeQ/4cXIVomva3Oh0zh4c5JfPYsIlzFVACwveQyJY vI0yMSxMGyz6VtTOn4aXbOggGYqx85kdcYNpF3yHRMj84a6h0g957qJmiQ2AWwnvsEAU 0BdOFvtVw/3D37X452H0sGUmrvwq+4tdBTQMLRT56m7bGwv0729XSrC/a1iFte65RyoO rchhAp36fxKyVq/5GeEbdrQs7T2d3qsL588Wtjy857HKW7S7xyJzOJ8GxDVocVS6gpG4 OejHORJog5iou4Kh4Py7BllaOp3s8PzlH96jjma35fffvFCXhFWgUzLg2UJs5lN2PxVF XpAg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=uTDmTBSOOY683ot1kd61hkpiMB3wxXOtomvwYLlA3dQ=; b=NswopKFDxVC0oNSSRD+vOQElvEVPtuB9JYgDA3+X16vDc5L25XE+aIUmXdRgK9Oal2 G0hRc3ywT0jV+w84u1Y471NsxdKuOdVpzM/jbUo+lkuF2ll38C7XzxSXr+J8/fKxRqmB qB6SFwKz0zwARGXxnS7HqZ7Zt3AQwO5wD1XoiXhxXyyrRcskW8fZOYqvGtYx51UfO5T8 f3+9yfXXN8WwVA89nFgSTHEqNynqDzrtY6EghLnJsYn9DQ/msvoTXUjVoAby/ZedVSVp 66qCtgjKxDvMpsIygktZTqWSKwbMDwfsZskIY0Fa5oqQGwWtigqToiF6PDgKQ+phVvP/ xYVQ== X-Gm-Message-State: AOAM533f2rTOuU7J9AYr6ZgoB3iiRXfgkbgaQEesKCjfjeDovfRGMgwz IruQIPfYcQ8GNKYaPDKxDSJkF23nxrl43Ixo6JpZi2Ndp3mdHdIbvSyg3GyLwc40g6Y8Nde59h+ 8gW77dEdng6YFt+24kPrj1/KYan5AmFAflwnTjBE4sOiZfDsJLNiu5XV+Y7um9xhxR00SZBm6 X-Google-Smtp-Source: ABdhPJzECl4nloGcEaC4D2VkDIalK0xVDOgyyn6E5UEquw+JIDhh+rcA6l20CApUgd4KLd3Tb2FKjwEx4FJa X-Received: from uluru3.svl.corp.google.com ([2620:15c:2cd:202:6875:3c51:69be:6e2c]) (user=eranian job=sendgmr) by 2002:a17:902:b684:: with SMTP id c4mr6195450pls.100.1644355035935; Tue, 08 Feb 2022 13:17:15 -0800 (PST) Date: Tue, 8 Feb 2022 13:16:34 -0800 In-Reply-To: <20220208211637.2221872-1-eranian@google.com> Message-Id: <20220208211637.2221872-10-eranian@google.com> Mime-Version: 1.0 References: <20220208211637.2221872-1-eranian@google.com> X-Mailer: git-send-email 2.35.0.263.gb82422642f-goog Subject: [PATCH v6 09/12] perf/x86/amd: add idle hooks for branch sampling From: Stephane Eranian To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, kim.phillips@amd.com, acme@redhat.com, jolsa@redhat.com, songliubraving@fb.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" On AMD Fam19h Zen3, the branch sampling (BRS) feature must be disabled befo= re entering low power and re-enabled (if was active) when returning from low power. Otherwise, the NMI interrupt may be held up for too long and cause problems. Stopping BRS will cause the NMI to be delivered if it was held up. Define a perf_amd_brs_lopwr_cb() callback to stop/restart BRS. The callback is protected by a jump label which is enabled only when AMD BRS is detected. In all other cases, the callback is never called. Signed-off-by: Stephane Eranian --- arch/x86/events/amd/brs.c | 32 +++++++++++++++++++++++++++++++ arch/x86/events/amd/core.c | 4 ++++ arch/x86/events/perf_event.h | 1 + arch/x86/include/asm/perf_event.h | 21 ++++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c index 40461c3ce714..185a58cea917 100644 --- a/arch/x86/events/amd/brs.c +++ b/arch/x86/events/amd/brs.c @@ -7,6 +7,7 @@ * Contributed by Stephane Eranian */ #include +#include #include #include =20 @@ -329,3 +330,34 @@ void amd_pmu_brs_sched_task(struct perf_event_context = *ctx, bool sched_in) if (sched_in) amd_brs_poison_buffer(); } + +DEFINE_STATIC_KEY_FALSE(perf_lopwr_needed); + +/* + * called from ACPI processor_idle.c or acpi_pad.c + * with interrupts disabled + */ +void perf_amd_brs_lopwr_cb(bool lopwr_in) +{ + struct cpu_hw_events *cpuc =3D this_cpu_ptr(&cpu_hw_events); + union amd_debug_extn_cfg cfg; + + /* + * on mwait in, we may end up in non C0 state. + * we must disable branch sampling to avoid holding the NMI + * for too long. We disable it in hardware but we + * keep the state in cpuc, so we can re-enable. + * + * The hardware will deliver the NMI if needed when brsmen cleared + */ + if (cpuc->brs_active) { + cfg.val =3D get_debug_extn_cfg(); + cfg.brsmen =3D !lopwr_in; + set_debug_extn_cfg(cfg.val); + } +} + +void __init amd_brs_lopwr_init(void) +{ + static_branch_enable(&perf_lopwr_needed); +} diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 597defee1e02..ea71ee52b758 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include #include #include #include @@ -1189,6 +1190,9 @@ static int __init amd_core_pmu_init(void) * The put_event_constraints callback is shared with * Fam17h, set above */ + + /* branch sampling must be stopped when entering low power */ + amd_brs_lopwr_init(); } } =20 diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 4d050579dcbd..2ed7bf5b51b1 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1226,6 +1226,7 @@ void amd_brs_enable(void); void amd_brs_enable_all(void); void amd_brs_disable_all(void); void amd_brs_drain(void); +void amd_brs_lopwr_init(void); void amd_brs_disable_all(void); int amd_brs_setup_filter(struct perf_event *event); void amd_brs_reset(void); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_= event.h index 58d9e4b1fa0a..42753a9dc3ed 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -513,6 +513,27 @@ static inline void intel_pt_handle_vmx(int on) #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) extern void amd_pmu_enable_virt(void); extern void amd_pmu_disable_virt(void); + +#if defined(CONFIG_PERF_EVENTS_AMD_BRS) + +#define PERF_NEEDS_LOPWR_CB 1 + +/* + * architectural low power callback impacts + * drivers/acpi/processor_idle.c + * drivers/acpi/acpi_pad.c + */ +extern void perf_amd_brs_lopwr_cb(bool lopwr_in); +DECLARE_STATIC_KEY_FALSE(perf_lopwr_needed); + +static inline void perf_lopwr_cb(bool mode) +{ + /* key enabled only when BRS is available */ + if (static_branch_unlikely(&perf_lopwr_needed)) + perf_amd_brs_lopwr_cb(mode); +} +#endif /* PERF_NEEDS_LOPWR_CB */ + #else static inline void amd_pmu_enable_virt(void) { } static inline void amd_pmu_disable_virt(void) { } --=20 2.35.0.263.gb82422642f-goog From nobody Sun Jun 28 10:34:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 15BB2C4167E for ; Tue, 8 Feb 2022 22:25:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1387600AbiBHWZo (ORCPT ); Tue, 8 Feb 2022 17:25:44 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37694 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1386874AbiBHVRU (ORCPT ); Tue, 8 Feb 2022 16:17:20 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id F213FC0612B8 for ; Tue, 8 Feb 2022 13:17:19 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id q199-20020a252ad0000000b0061e113c9953so257466ybq.18 for ; Tue, 08 Feb 2022 13:17:19 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=XenqmXleccybpW6tauROr+uyQr3Aow8wYknpY7cyN4o=; b=gRInUedMBFu4RCwxude+HLD90BW8l2ZI8mFca/kltWg371JlYLIBMWMbAMzSuQmw7u Qi8pj0cGI9jy7DVl5fDM2MHbSIdL6w+iLq0sdeYLh9oRTKUKA/E4QJKgylzv1AF7yDqB J4N1iKfNYXwUxKlrdrAsVX1uJFRoARxW8IYWkU/hD3f2xY6/TyRdrreFmpoQ1jENGUIE ItuevWzJQ+YAIUegro0s8jXxgQxTNg1BsgJANoJGkXXbLSJEe2R29OGgWt4YcrC4LLBN fNQwzMWwvnHRs4xNh7zKqlFYJJtz5Y03ufouQsNYe9klgiUsQ1nz6kfc/oKW2rc8ZCzx SX9w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=XenqmXleccybpW6tauROr+uyQr3Aow8wYknpY7cyN4o=; b=r4G18Zo/9gx055gTXiNKsI1vYN+cJxvUH8jhKBpWAtoNQYBBsJxBe5AtPGEe4VJIg/ hDzzWQT6sjvH1S8CPAVqnrq5FvfFv2Jwo0iYVpLu9Y5wF+Xxh4SjCBxBkLyicVy8md6c S9OPp5jZ2lio5CdJ6On3Wq8nUW10qEkVaB8kQDqja6L5VCItIXG86fe6NBT+/GBdZ2Cj bD+mKnzbDuEZHDLl5IzMKbV4mVGi/wIg+Ry9DE6ap5REdbncjyM1r+wyQdP/69MlBBpd 71gwiWlzSClALDohD+E1E9yPdgem2R48NXpF7jcBz3qrSWP7VDLPG3iaQwW5pbRMWhA6 wPEg== X-Gm-Message-State: AOAM530YSOobHdmpH9E+e90quLKC46oAANtHhYselXJOgniTtnEQiGby h/njw18c6DjTR4SvJuJ4WsilOA/89bP1a8IHkYzu71Gf90DjFpuXDOxM9HP5Bs8nCO0XVthZFaU kzDA8F4aNU4Z72wlojpRuvBpNP6Ol1aE9JUVuEP2uCK1Q6JsF1J6HVqvjTHa3h1GHgraBkQvc X-Google-Smtp-Source: ABdhPJzIZgA6dXUwM0NAYtLt4hdqaYI3F874SQXFKePsPfh0W0XbyBxDauA/Fd396Gng+Kf61QspPBVXxHhk X-Received: from uluru3.svl.corp.google.com ([2620:15c:2cd:202:6875:3c51:69be:6e2c]) (user=eranian job=sendgmr) by 2002:a81:c607:: with SMTP id l7mr6851336ywi.265.1644355039100; Tue, 08 Feb 2022 13:17:19 -0800 (PST) Date: Tue, 8 Feb 2022 13:16:35 -0800 In-Reply-To: <20220208211637.2221872-1-eranian@google.com> Message-Id: <20220208211637.2221872-11-eranian@google.com> Mime-Version: 1.0 References: <20220208211637.2221872-1-eranian@google.com> X-Mailer: git-send-email 2.35.0.263.gb82422642f-goog Subject: [PATCH v6 10/12] perf tools: Improve IBS error handling From: Stephane Eranian To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, kim.phillips@amd.com, acme@redhat.com, jolsa@redhat.com, songliubraving@fb.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Kim Phillips improve the error message returned on failed perf_event_open() on AMD when using IBS. Output of executing 'perf record -e ibs_op// true' BEFORE this patch: The sys_perf_event_open() syscall returned with 22 (Invalid argument)for ev= ent (ibs_op//u). /bin/dmesg | grep -i perf may provide additional information. Output after: AMD IBS cannot exclude kernel events. Try running at a higher privilege le= vel. Output of executing 'sudo perf record -e ibs_op// true' BEFORE this patch: Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for e= vent (ibs_op//). /bin/dmesg | grep -i perf may provide additional information. Output after: Error: AMD IBS may only be available in system-wide/per-cpu mode. Try using -a, o= r -C and workload affinity Signed-off-by: Kim Phillips Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Boris Ostrovsky Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Joao Martins Cc: Konrad Rzeszutek Wilk Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian --- tools/perf/util/evsel.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 22d3267ce294..d42f63a484df 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2847,9 +2847,22 @@ static bool find_process(const char *name) return ret ? false : true; } =20 +static bool is_amd(const char *arch, const char *cpuid) +{ + return arch && !strcmp("x86", arch) && cpuid && strstarts(cpuid, "Authent= icAMD"); +} + +static bool is_amd_ibs(struct evsel *evsel) +{ + return evsel->core.attr.precise_ip || !strncmp(evsel->pmu_name, "ibs", 3); +} + int evsel__open_strerror(struct evsel *evsel, struct target *target, int err, char *msg, size_t size) { + struct perf_env *env =3D evsel__env(evsel); + const char *arch =3D perf_env__arch(env); + const char *cpuid =3D perf_env__cpuid(env); char sbuf[STRERR_BUFSIZE]; int printed =3D 0, enforced =3D 0; =20 @@ -2949,6 +2962,17 @@ int evsel__open_strerror(struct evsel *evsel, struct= target *target, return scnprintf(msg, size, "Invalid event (%s) in per-thread mode, enable system wide with '-a'.", evsel__name(evsel)); + if (is_amd(arch, cpuid)) { + if (is_amd_ibs(evsel)) { + if (evsel->core.attr.exclude_kernel) + return scnprintf(msg, size, + "AMD IBS can't exclude kernel events. Try running at a higher privilege = level."); + if (!evsel->core.system_wide) + return scnprintf(msg, size, + "AMD IBS may only be available in system-wide/per-cpu mode. Try using -a= , or -C and workload affinity"); + } + } + break; case ENODATA: return scnprintf(msg, size, "Cannot collect data source with the load la= tency event alone. " --=20 2.35.0.263.gb82422642f-goog From nobody Sun Jun 28 10:34:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id CD617C4332F for ; Tue, 8 Feb 2022 22:34:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1388619AbiBHWdR (ORCPT ); Tue, 8 Feb 2022 17:33:17 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37720 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1386880AbiBHVRX (ORCPT ); Tue, 8 Feb 2022 16:17:23 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9A95AC0612B8 for ; Tue, 8 Feb 2022 13:17:22 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id q199-20020a252ad0000000b0061e113c9953so257802ybq.18 for ; Tue, 08 Feb 2022 13:17:22 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=ohbaKv3Lg/0Da2FFFbyy5azyuFvGZukwAaGJCbUrePw=; b=SKx+AgJfubJcERmdfYBFsFjNIZCuy1okXaW+Upyg8n+NM6mwgA1CWsSzmFUQQ9FzFh Kuy+E3vvzhtZ2h3EHEMm4GwLyNy7+D4KngxLeseIQtqrxN/bIOFSuslOVxCBXAhDaAlf oKWiAu1FbhNW4lt7rng/y0DEuyzSe0icv7li/pEdDnhMVeV5g5QnVnxfiwRe6KY8a98c HnRiAlmQ1yu3BM5/iwqx5TykECDJMiQC8fJML8uDRiV7ORY+2KwfJUJ8Kq6g4IZ4QmDO 0cB/hD3tq0X0abapCjENL2mgoEYs7IXbeoZJMthXYfc6rrlGstkypt2gT3DRUXogTh6e Y6XA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=ohbaKv3Lg/0Da2FFFbyy5azyuFvGZukwAaGJCbUrePw=; b=Ew4/wEKVsubkrb/iRGbYag9Q/ne4Zsf0KmT9TgZdgbr+iZnJL7BaTkj96a2OLLOumv LKdI3wNd5srRrz2T+bo9S/4WYnDUBOsOV2eyKOCX4YXn0i7/+wwGEaDKdeCEUCx0Cm+5 4kFVZ65P7IifDStWDpFKE5uY/xTy+1TCZhGZWTQuNZ590REYD1rYkgKCBFEQVJVFGfvr dhzgWOkf0tNI2H2xTeVx8vhyjRHK2BqL5m6kCDYwfPnm7EmkRcczjXKXW0+c/24B1bFE HhKpCybMHLcPwHIpIxGxAhwhNFokqAmc3x6eOrGPvwoN1ZFpUZPqaTBz5GvRH4Q6fhhG XFGw== X-Gm-Message-State: AOAM531lVgrYtEA/Sd9r3IJOwM+Llm1rKQmJvxDG/3PnpGkkMv6rgRok JaVQwA5ETLKNpMn0UMMq0haHnKV8svbwiVwWjqZ3pDy5xwLN3bJtR/SZqntyLCrI7nhtDNHCA5k ZDybclvuKK49y7Bz4eXCmY3X3x5ufSEf/2Cv/OzQ84eUicNjBFwAc0GyQrMOABGi8spmLnB19 X-Google-Smtp-Source: ABdhPJzGaANv2LxnvqCCuYdwvnXsK2X2ChMgm2rnPY5ppU/8qM6BnNyTPK1i1ppKVA273TcfVVmOiLUZStTn X-Received: from uluru3.svl.corp.google.com ([2620:15c:2cd:202:6875:3c51:69be:6e2c]) (user=eranian job=sendgmr) by 2002:a25:cdc1:: with SMTP id d184mr6229803ybf.489.1644355041791; Tue, 08 Feb 2022 13:17:21 -0800 (PST) Date: Tue, 8 Feb 2022 13:16:36 -0800 In-Reply-To: <20220208211637.2221872-1-eranian@google.com> Message-Id: <20220208211637.2221872-12-eranian@google.com> Mime-Version: 1.0 References: <20220208211637.2221872-1-eranian@google.com> X-Mailer: git-send-email 2.35.0.263.gb82422642f-goog Subject: [PATCH v6 11/12] perf tools: Improve error handling of AMD Branch Sampling From: Stephane Eranian To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, kim.phillips@amd.com, acme@redhat.com, jolsa@redhat.com, songliubraving@fb.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Improve the error message printed by perf when perf_event_open() fails on AMD Zen3 when using the branch sampling feature. In the case of EINVAL, the= re are two main reasons: frequency mode or period is smaller than the depth of the branch sampling buffer (16). The patch checks the parameters of the call and tries to print a relevant message to explain the error: $ perf record -b -e cpu/branch-brs/ -c 10 ls Error: AMD Branch Sampling does not support sampling period smaller than what is r= eported in /sys/devices/cpu/caps/branches. $ perf record -b -e cpu/branch-brs/ ls Error: AMD Branch Sampling does not support frequency mode sampling, must pass a f= ixed sampling period via -c option or cpu/branch-brs,period=3Dxxxx/. Signed-off-by: Stephane Eranian [Rebased on commit 9fe8895a27a84 ("perf env: Add perf_env__cpuid, perf_env_= _{nr_}pmu_mappings")] Signed-off-by: Kim Phillips --- tools/perf/util/evsel.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index d42f63a484df..7311e7b4d34d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2857,6 +2857,12 @@ static bool is_amd_ibs(struct evsel *evsel) return evsel->core.attr.precise_ip || !strncmp(evsel->pmu_name, "ibs", 3); } =20 +static bool is_amd_brs(struct evsel *evsel) +{ + return ((evsel->core.attr.config & 0xff) =3D=3D 0xc4) && + (evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK); +} + int evsel__open_strerror(struct evsel *evsel, struct target *target, int err, char *msg, size_t size) { @@ -2971,6 +2977,14 @@ int evsel__open_strerror(struct evsel *evsel, struct= target *target, return scnprintf(msg, size, "AMD IBS may only be available in system-wide/per-cpu mode. Try using -a= , or -C and workload affinity"); } + if (is_amd_brs(evsel)) { + if (evsel->core.attr.freq) + return scnprintf(msg, size, + "AMD Branch Sampling does not support frequency mode sampling, must pass = a fixed sampling period via -c option or cpu/branch-brs,period=3Dxxxx/."); + /* another reason is that the period is too small */ + return scnprintf(msg, size, + "AMD Branch Sampling does not support sampling period smaller than what i= s reported in /sys/devices/cpu/caps/branches."); + } } =20 break; --=20 2.35.0.263.gb82422642f-goog From nobody Sun Jun 28 10:34:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id F360BC352A7 for ; Tue, 8 Feb 2022 22:26:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1387796AbiBHW0e (ORCPT ); Tue, 8 Feb 2022 17:26:34 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37752 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1386890AbiBHVR0 (ORCPT ); Tue, 8 Feb 2022 16:17:26 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5787CC0612BC for ; Tue, 8 Feb 2022 13:17:25 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id 2-20020a251302000000b006118f867dadso324967ybt.12 for ; Tue, 08 Feb 2022 13:17:25 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=AfnR3Nm7yc5ok/MjiQ3WXGdZyImD71++/kH8RUzdzEo=; b=iGiHWkFBaBN5bvqSrIpm9CvlBfMgTkuuVA6EthbtpSWjMnmORtCnFFKEJ/EOCl0OUb tQQgi/sCdNelt5HSNHz3jJuediSGaFROqcHQs4qMfU6KIGnae3LaZL805GJhUeowx9eH c/shKP4YQqL95KEdx3uYl4Er+lR0fVRv/ZvedjYQtLUj+26q0ZWVFkXWSNJyOoOlU76J 4UzH/XErzqOQ/YWKTnkyFaYSIflQMoHK78qvthYhr2jiOoV8X6iQ4pKK4zV0VhJ6gt4T 9l6OjznvCKouvsD9qCaob3Pa/n2Za2bW673iX/L+JWg3cG/gggiJysDrCHtcyQiBIswk yLVw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=AfnR3Nm7yc5ok/MjiQ3WXGdZyImD71++/kH8RUzdzEo=; b=x0fbLW+GMVfXzmFThHBldfWXvRbN3DrbBQTWhjP7RBBAk8IyKPk+667CaLcsX2W2fj 59hGrkBu4BaR0aJTA6YqWmcVm/q+JT+Va2ZpIqvY3KWw3fj3Xe5VE1inFu2i3zoNhKRB 9ui80VnRZ5JtTa0YDJxOnArdnLj79NBt4MN9LAk8JqrlkK5iqJHCStyJluct/HkBz/AC hW8dGV/amC1SvZBK2GO4XGKqUMEX5whwrFqnNz3ZiXI9J/YGwQIA21fVUchzkwbxzMc1 pHRBKGA1chCRdgaEggO/dVIARzf8rHH2ctMUAkb5GWp6ximfzWJWBw0EhXXYA8Doh0Dd EXjw== X-Gm-Message-State: AOAM532+aisc7LweC3FWMEcu03OInp+5gM0TuALhQJ3aXLXSIesDwtCT A4cm8mWiaJD5s1ji7v+RF11SBa8iijnCgb9RtYJCWUXJh6ssNRk62VqLtRjJmrwRDT5JGlMwSXb +65PRwsGLOdN1olXS5FJdqxZT1+kGF1ZsIwIzc2Rsovznsvz+kIhUZ07NzHHI3FRpbSfis1wN X-Google-Smtp-Source: ABdhPJxsi9V32l7wcvyjIOPe9zUkfV3MbtC27mX+fQ98jrFwTM1ryS5Noz2QhHdTWMjuLjoRcr7A1Z8UtjFv X-Received: from uluru3.svl.corp.google.com ([2620:15c:2cd:202:6875:3c51:69be:6e2c]) (user=eranian job=sendgmr) by 2002:a25:bcd0:: with SMTP id l16mr5459204ybm.59.1644355044562; Tue, 08 Feb 2022 13:17:24 -0800 (PST) Date: Tue, 8 Feb 2022 13:16:37 -0800 In-Reply-To: <20220208211637.2221872-1-eranian@google.com> Message-Id: <20220208211637.2221872-13-eranian@google.com> Mime-Version: 1.0 References: <20220208211637.2221872-1-eranian@google.com> X-Mailer: git-send-email 2.35.0.263.gb82422642f-goog Subject: [PATCH v6 12/12] perf report: add addr_from/addr_to sort dimensions From: Stephane Eranian To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, kim.phillips@amd.com, acme@redhat.com, jolsa@redhat.com, songliubraving@fb.com Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" With the existing symbol_from/symbol_to, branches captured in the same function would be collapsed into a single function if the latencies associa= ted with the each branch (cycles) were all the same. That is the case on Intel Broadwell, for instance. Since Intel Skylake, the latency is captured by hardware and therefore is used to disambiguate branches. Add addr_from/addr_to sort dimensions to sort branches based on their addresses and not the function there are in. The output is still the functi= on name but the offset within the function is provided to uniquely identify ea= ch branch. These new sort dimensions also help with annotate because they cre= ate different entries in the histogram which, in turn, generates proper branch annotations. Here is an example using AMD's branch sampling: $ perf record -a -b -c 1000037 -e cpu/branch-brs/ test_prg $ perf report Samples: 6M of event 'cpu/branch-brs/', Event count (approx.): 6901276 Overhead Command Source Shared Object Source Symbol = Target Symbol Basic= Block Cycle 99.65% test_prg test_prg [.] test_thread = [.] test_thread - 0.02% test_prg [kernel.vmlinux] [k] asm_sysvec_apic_timer_= interrupt [k] error_entry - $ perf report -F overhead,comm,dso,addr_from,addr_to Samples: 6M of event 'cpu/branch-brs/', Event count (approx.): 6901276 Overhead Command Shared Object Source Address Target= Address 4.22% test_prg test_prg [.] test_thread+0x3c [.] te= st_thread+0x4 4.13% test_prg test_prg [.] test_thread+0x4 [.] te= st_thread+0x3a 4.09% test_prg test_prg [.] test_thread+0x3a [.] te= st_thread+0x6 4.08% test_prg test_prg [.] test_thread+0x2 [.] te= st_thread+0x3c 4.06% test_prg test_prg [.] test_thread+0x3e [.] te= st_thread+0x2 3.87% test_prg test_prg [.] test_thread+0x6 [.] te= st_thread+0x38 3.84% test_prg test_prg [.] test_thread [.] te= st_thread+0x3e 3.76% test_prg test_prg [.] test_thread+0x1e [.] te= st_thread 3.76% test_prg test_prg [.] test_thread+0x38 [.] te= st_thread+0x8 3.56% test_prg test_prg [.] test_thread+0x22 [.] te= st_thread+0x1e 3.54% test_prg test_prg [.] test_thread+0x8 [.] te= st_thread+0x36 3.47% test_prg test_prg [.] test_thread+0x1c [.] te= st_thread+0x22 3.45% test_prg test_prg [.] test_thread+0x36 [.] te= st_thread+0xa 3.28% test_prg test_prg [.] test_thread+0x24 [.] te= st_thread+0x1c 3.25% test_prg test_prg [.] test_thread+0xa [.] te= st_thread+0x34 3.24% test_prg test_prg [.] test_thread+0x1a [.] te= st_thread+0x24 3.20% test_prg test_prg [.] test_thread+0x34 [.] te= st_thread+0xc 3.04% test_prg test_prg [.] test_thread+0x26 [.] te= st_thread+0x1a 3.01% test_prg test_prg [.] test_thread+0xc [.] te= st_thread+0x32 2.98% test_prg test_prg [.] test_thread+0x18 [.] te= st_thread+0x26 2.94% test_prg test_prg [.] test_thread+0x32 [.] te= st_thread+0xe 2.76% test_prg test_prg [.] test_thread+0x28 [.] te= st_thread+0x18 2.73% test_prg test_prg [.] test_thread+0xe [.] te= st_thread+0x30 2.67% test_prg test_prg [.] test_thread+0x30 [.] te= st_thread+0x10 2.67% test_prg test_prg [.] test_thread+0x16 [.] te= st_thread+0x28 2.46% test_prg test_prg [.] test_thread+0x10 [.] te= st_thread+0x2e 2.44% test_prg test_prg [.] test_thread+0x2a [.] te= st_thread+0x16 2.38% test_prg test_prg [.] test_thread+0x14 [.] te= st_thread+0x2a 2.32% test_prg test_prg [.] test_thread+0x2e [.] te= st_thread+0x12 2.28% test_prg test_prg [.] test_thread+0x12 [.] te= st_thread+0x2c 2.16% test_prg test_prg [.] test_thread+0x2c [.] te= st_thread+0x14 0.02% test_prg [kernel.vmlinux] [k] asm_sysvec_apic_ti+0x5 [k= ] error_entry Signed-off-by: Stephane Eranian --- tools/perf/util/hist.c | 2 + tools/perf/util/hist.h | 2 + tools/perf/util/sort.c | 128 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 2 + 4 files changed, 134 insertions(+) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 0a8033b09e28..1c085ab56534 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -124,6 +124,7 @@ void hists__calc_col_len(struct hists *hists, struct hi= st_entry *h) } else { symlen =3D unresolved_col_width + 4 + 2; hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); + hists__new_col_len(hists, HISTC_ADDR_FROM, symlen); hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM); } =20 @@ -138,6 +139,7 @@ void hists__calc_col_len(struct hists *hists, struct hi= st_entry *h) } else { symlen =3D unresolved_col_width + 4 + 2; hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); + hists__new_col_len(hists, HISTC_ADDR_TO, symlen); hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); } =20 diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2a15e22fb89c..7ed4648d2fc2 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -77,6 +77,8 @@ enum hist_column { HISTC_GLOBAL_INS_LAT, HISTC_LOCAL_P_STAGE_CYC, HISTC_GLOBAL_P_STAGE_CYC, + HISTC_ADDR_FROM, + HISTC_ADDR_TO, HISTC_NR_COLS, /* Last entry */ }; =20 diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2da081ef532b..6d5588e80935 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -990,6 +990,128 @@ struct sort_entry sort_sym_to =3D { .se_width_idx =3D HISTC_SYMBOL_TO, }; =20 +static int _hist_entry__addr_snprintf(struct map_symbol *ms, + u64 ip, char level, char *bf, size_t size, + unsigned int width) +{ + struct symbol *sym =3D ms->sym; + struct map *map =3D ms->map; + size_t ret =3D 0, offs; + + ret +=3D repsep_snprintf(bf + ret, size - ret, "[%c] ", level); + if (sym && map) { + if (sym->type =3D=3D STT_OBJECT) { + ret +=3D repsep_snprintf(bf + ret, size - ret, "%s", sym->name); + ret +=3D repsep_snprintf(bf + ret, size - ret, "+0x%llx", + ip - map->unmap_ip(map, sym->start)); + } else { + ret +=3D repsep_snprintf(bf + ret, size - ret, "%.*s", + width - ret, + sym->name); + offs =3D ip - sym->start; + if (offs) + ret +=3D repsep_snprintf(bf + ret, size - ret, "+0x%llx", offs); + } + } else { + size_t len =3D BITS_PER_LONG / 4; + ret +=3D repsep_snprintf(bf + ret, size - ret, "%-#.*llx", + len, ip); + } + + return ret; +} + +static int hist_entry__addr_from_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + if (he->branch_info) { + struct addr_map_symbol *from =3D &he->branch_info->from; + + return _hist_entry__addr_snprintf(&from->ms, from->al_addr, + he->level, bf, size, width); + } + + return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); +} + +static int hist_entry__addr_to_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + if (he->branch_info) { + struct addr_map_symbol *to =3D &he->branch_info->to; + + return _hist_entry__addr_snprintf(&to->ms, to->al_addr, + he->level, bf, size, width); + } + + return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); +} + +static int64_t +sort__addr_from_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct addr_map_symbol *from_l; + struct addr_map_symbol *from_r; + int64_t ret; + + if (!left->branch_info || !right->branch_info) + return cmp_null(left->branch_info, right->branch_info); + + from_l =3D &left->branch_info->from; + from_r =3D &right->branch_info->from; + + /* + * comparing symbol address alone is not enough since it's a + * relative address within a dso. + */ + ret =3D _sort__dso_cmp(from_l->ms.map, from_r->ms.map); + if (ret !=3D 0) + return ret; + + return _sort__addr_cmp(from_l->addr, from_r->addr); +} + +static int64_t +sort__addr_to_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct addr_map_symbol *to_l; + struct addr_map_symbol *to_r; + int64_t ret; + + if (!left->branch_info || !right->branch_info) + return cmp_null(left->branch_info, right->branch_info); + + to_l =3D &left->branch_info->to; + to_r =3D &right->branch_info->to; + + /* + * comparing symbol address alone is not enough since it's a + * relative address within a dso. + */ + ret =3D _sort__dso_cmp(to_l->ms.map, to_r->ms.map); + if (ret !=3D 0) + return ret; + + return _sort__addr_cmp(to_l->addr, to_r->addr); +} + +struct sort_entry sort_addr_from =3D { + .se_header =3D "Source Address", + .se_cmp =3D sort__addr_from_cmp, + .se_snprintf =3D hist_entry__addr_from_snprintf, + .se_filter =3D hist_entry__sym_from_filter, /* shared with sym_from */ + .se_width_idx =3D HISTC_ADDR_FROM, +}; + +struct sort_entry sort_addr_to =3D { + .se_header =3D "Target Address", + .se_cmp =3D sort__addr_to_cmp, + .se_snprintf =3D hist_entry__addr_to_snprintf, + .se_filter =3D hist_entry__sym_to_filter, /* shared with sym_to */ + .se_width_idx =3D HISTC_ADDR_TO, +}; + + static int64_t sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1893,6 +2015,8 @@ static struct sort_dimension bstack_sort_dimensions[]= =3D { DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from), DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to), DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc), + DIM(SORT_ADDR_FROM, "addr_from", sort_addr_from), + DIM(SORT_ADDR_TO, "addr_to", sort_addr_to), }; =20 #undef DIM @@ -3126,6 +3250,10 @@ static bool get_elide(int idx, FILE *output) return __get_elide(symbol_conf.dso_from_list, "dso_from", output); case HISTC_DSO_TO: return __get_elide(symbol_conf.dso_to_list, "dso_to", output); + case HISTC_ADDR_FROM: + return __get_elide(symbol_conf.sym_from_list, "addr_from", output); + case HISTC_ADDR_TO: + return __get_elide(symbol_conf.sym_to_list, "addr_to", output); default: break; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index f994261888e1..2ddc00d1c464 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -251,6 +251,8 @@ enum sort_type { SORT_SRCLINE_FROM, SORT_SRCLINE_TO, SORT_SYM_IPC, + SORT_ADDR_FROM, + SORT_ADDR_TO, =20 /* memory mode specific sort keys */ __SORT_MEMORY_MODE, --=20 2.35.0.263.gb82422642f-goog