From nobody Sun Feb 8 08:48:07 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 0793443ACD; Thu, 8 Feb 2024 13:11:14 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1707397877; cv=none; b=dTxAjvIIxrSQ7e3OrGgX0EBs+nPxf/GNmhEIRfqsTY94cLYALPv4xjFaH1Eh43jeHGeuZhoy2bFYWj0kaVcqrMR7QXjGAYMID/p8j0neZ9wX6CVnfmEB7g0HkKQMn+WW+cOsyonNer+Vp2Y9uHu3+C93DzmOjzJjTfiKJQhpzLY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1707397877; c=relaxed/simple; bh=fqu6mN5K4lFFy3zTNhV7f/rrnImkgY46BLOzGCAKBEs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Qg9kTAHLm9Kn6l1Jr7vezyRqSg5p/+N4lAyOAgU9cK9clPWNUYsqahjb4zYbqoieyTWJ/IdijL2a9mp4WXEX14LwX4RrPO2/puMta60xHxQ/zY4a0JjxD9XTRPYTnxDwF7Y7kz0cJ6aadh6qmkPOIBUoU+5y48JHGJWo5Eygqo4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id A3FD0DA7; Thu, 8 Feb 2024 05:11:56 -0800 (PST) Received: from e126817.cambridge.arm.com (e126817.cambridge.arm.com [10.2.3.5]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id B1CE03F5A1; Thu, 8 Feb 2024 05:11:12 -0800 (PST) From: Ben Gainey To: linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org Cc: peterz@infradead.org, mingo@redhat.com, acme@kernel.org, mark.rutland@arm.com, alexander.shishkin@linux.intel.com, jolsa@kernel.org, namhyung@kernel.org, irogers@google.com, adrian.hunter@intel.com, james.clark@arm.com, Ben Gainey Subject: [PATCH v2 1/4] perf: Support PERF_SAMPLE_READ with inherit_stat Date: Thu, 8 Feb 2024 13:10:47 +0000 Message-ID: <20240208131050.2406183-2-ben.gainey@arm.com> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20240208131050.2406183-1-ben.gainey@arm.com> References: <20240208131050.2406183-1-ben.gainey@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" This change allows events to use PERF_SAMPLE READ with inherit so long as both inherit_stat and PERF_SAMPLE_TID are set. In this configuration, and event will be inherited into any child processes / threads, allowing convenient profiling of a multiprocess or multithreaded application, whilst allowing profiling tools to collect per-thread samples, in particular of groups of counters. Signed-off-by: Ben Gainey --- include/linux/perf_event.h | 1 + kernel/events/core.c | 53 ++++++++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d2a15c0c6f8a..7d405dff6694 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -932,6 +932,7 @@ struct perf_event_context { =20 int nr_task_data; int nr_stat; + int nr_stat_read; int nr_freq; int rotate_disable; =20 diff --git a/kernel/events/core.c b/kernel/events/core.c index f0f0f71213a1..dac7093b3608 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1795,8 +1795,11 @@ list_add_event(struct perf_event *event, struct perf= _event_context *ctx) ctx->nr_events++; if (event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) ctx->nr_user++; - if (event->attr.inherit_stat) + if (event->attr.inherit_stat) { ctx->nr_stat++; + if (event->attr.inherit && (event->attr.sample_type & PERF_SAMPLE_READ)) + ctx->nr_stat_read++; + } =20 if (event->state > PERF_EVENT_STATE_OFF) perf_cgroup_event_enable(event, ctx); @@ -2019,8 +2022,11 @@ list_del_event(struct perf_event *event, struct perf= _event_context *ctx) ctx->nr_events--; if (event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT) ctx->nr_user--; - if (event->attr.inherit_stat) + if (event->attr.inherit_stat) { ctx->nr_stat--; + if (event->attr.inherit && (event->attr.sample_type & PERF_SAMPLE_READ)) + ctx->nr_stat_read--; + } =20 list_del_rcu(&event->event_entry); =20 @@ -3529,11 +3535,17 @@ perf_event_context_sched_out(struct task_struct *ta= sk, struct task_struct *next) perf_ctx_disable(ctx, false); =20 /* PMIs are disabled; ctx->nr_pending is stable. */ - if (local_read(&ctx->nr_pending) || + if (ctx->nr_stat_read || + next_ctx->nr_stat_read || + local_read(&ctx->nr_pending) || local_read(&next_ctx->nr_pending)) { /* * Must not swap out ctx when there's pending * events that rely on the ctx->task relation. + * + * Likewise, when a context contains inherit+inherit_stat+SAMPLE_READ + * events they should be switched out using the slow path + * so that they are treated as if they were distinct contexts. */ raw_spin_unlock(&next_ctx->lock); rcu_read_unlock(); @@ -3545,6 +3557,7 @@ perf_event_context_sched_out(struct task_struct *task= , struct task_struct *next) =20 perf_ctx_sched_task_cb(ctx, false); perf_event_swap_task_ctx_data(ctx, next_ctx); + perf_event_sync_stat(ctx, next_ctx); =20 perf_ctx_enable(ctx, false); =20 @@ -3559,8 +3572,6 @@ perf_event_context_sched_out(struct task_struct *task= , struct task_struct *next) RCU_INIT_POINTER(next->perf_event_ctxp, ctx); =20 do_switch =3D 0; - - perf_event_sync_stat(ctx, next_ctx); } raw_spin_unlock(&next_ctx->lock); raw_spin_unlock(&ctx->lock); @@ -4533,8 +4544,13 @@ static void __perf_event_read(void *info) raw_spin_unlock(&ctx->lock); } =20 -static inline u64 perf_event_count(struct perf_event *event) +static inline u64 perf_event_count(struct perf_event *event, bool from_sam= ple) { + if (from_sample && event->attr.inherit && + event->attr.inherit && + (event->attr.sample_type & PERF_SAMPLE_TID)) { + return local64_read(&event->count); + } return local64_read(&event->count) + atomic64_read(&event->child_count); } =20 @@ -5454,7 +5470,7 @@ static u64 __perf_event_read_value(struct perf_event = *event, u64 *enabled, u64 * mutex_lock(&event->child_mutex); =20 (void)perf_event_read(event, false); - total +=3D perf_event_count(event); + total +=3D perf_event_count(event, false); =20 *enabled +=3D event->total_time_enabled + atomic64_read(&event->child_total_time_enabled); @@ -5463,7 +5479,7 @@ static u64 __perf_event_read_value(struct perf_event = *event, u64 *enabled, u64 * =20 list_for_each_entry(child, &event->child_list, child_list) { (void)perf_event_read(child, false); - total +=3D perf_event_count(child); + total +=3D perf_event_count(child, false); *enabled +=3D child->total_time_enabled; *running +=3D child->total_time_running; } @@ -5545,14 +5561,14 @@ static int __perf_read_group_add(struct perf_event = *leader, /* * Write {count,id} tuples for every sibling. */ - values[n++] +=3D perf_event_count(leader); + values[n++] +=3D perf_event_count(leader, false); if (read_format & PERF_FORMAT_ID) values[n++] =3D primary_event_id(leader); if (read_format & PERF_FORMAT_LOST) values[n++] =3D atomic64_read(&leader->lost_samples); =20 for_each_sibling_event(sub, leader) { - values[n++] +=3D perf_event_count(sub); + values[n++] +=3D perf_event_count(sub, false); if (read_format & PERF_FORMAT_ID) values[n++] =3D primary_event_id(sub); if (read_format & PERF_FORMAT_LOST) @@ -6132,7 +6148,7 @@ void perf_event_update_userpage(struct perf_event *ev= ent) ++userpg->lock; barrier(); userpg->index =3D perf_event_index(event); - userpg->offset =3D perf_event_count(event); + userpg->offset =3D perf_event_count(event, false); if (userpg->index) userpg->offset -=3D local64_read(&event->hw.prev_count); =20 @@ -7200,7 +7216,7 @@ static void perf_output_read_one(struct perf_output_h= andle *handle, u64 values[5]; int n =3D 0; =20 - values[n++] =3D perf_event_count(event); + values[n++] =3D perf_event_count(event, true); if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { values[n++] =3D enabled + atomic64_read(&event->child_total_time_enabled); @@ -7245,7 +7261,7 @@ static void perf_output_read_group(struct perf_output= _handle *handle, (leader->state =3D=3D PERF_EVENT_STATE_ACTIVE)) leader->pmu->read(leader); =20 - values[n++] =3D perf_event_count(leader); + values[n++] =3D perf_event_count(leader, true); if (read_format & PERF_FORMAT_ID) values[n++] =3D primary_event_id(leader); if (read_format & PERF_FORMAT_LOST) @@ -7260,7 +7276,7 @@ static void perf_output_read_group(struct perf_output= _handle *handle, (sub->state =3D=3D PERF_EVENT_STATE_ACTIVE)) sub->pmu->read(sub); =20 - values[n++] =3D perf_event_count(sub); + values[n++] =3D perf_event_count(sub, false); if (read_format & PERF_FORMAT_ID) values[n++] =3D primary_event_id(sub); if (read_format & PERF_FORMAT_LOST) @@ -12010,10 +12026,13 @@ perf_event_alloc(struct perf_event_attr *attr, in= t cpu, local64_set(&hwc->period_left, hwc->sample_period); =20 /* - * We currently do not support PERF_SAMPLE_READ on inherited events. + * We do not support PERF_SAMPLE_READ on inherited events unless + * inherit_stat and PERF_SAMPLE_TID are also selected, which allows + * inherited events to collect per-thread samples. * See perf_output_read(). */ - if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)) + if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ) + && !(attr->inherit_stat && (attr->sample_type & PERF_SAMPLE_TID))) goto err_ns; =20 if (!has_branch_stack(event)) @@ -13037,7 +13056,7 @@ static void sync_child_event(struct perf_event *chi= ld_event) perf_event_read_event(child_event, task); } =20 - child_val =3D perf_event_count(child_event); + child_val =3D perf_event_count(child_event, false); =20 /* * Add back the child's count to the parent's count: --=20 2.43.0 From nobody Sun Feb 8 08:48:07 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id DC06876413; Thu, 8 Feb 2024 13:11:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1707397879; cv=none; b=TkGv8fvJEawnO6iVrwud/NTWMksUd6z84DxHfA28Yvl3kdqS6aMv6r2/Qy368HYg5Bm/vvOSCmCXkBpkTa1LZhN2/PZo9EdQu/Okj/L/sjk7DF0qQ80hNHR/fXIvEsEVONXmJVqI7i25kHw49nsa1ItaWvDukquKqBTMiFS2lEw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1707397879; c=relaxed/simple; bh=Je/B4EVJ1QmkamUOgXw0RNyKBh1iaAjN/ZC5BiEcIWc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=BfhRl5K0waUP8pEO2TEr7nJEVEc6jKXFXgcB59o5NyX6iQP4TydTnNt9wGU7Dkb17sc5p2+t+shIFfry70tjuX5FB/h3sAozcHFhtG68Ltsr/pWP0b8J+WvLwD87iFw+8UPy2d8k1297WTvcsSwSc1zxKsURE92k99XaxagVZGw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 90781152B; Thu, 8 Feb 2024 05:11:58 -0800 (PST) Received: from e126817.cambridge.arm.com (e126817.cambridge.arm.com [10.2.3.5]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 9D49A3F5A1; Thu, 8 Feb 2024 05:11:14 -0800 (PST) From: Ben Gainey To: linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org Cc: peterz@infradead.org, mingo@redhat.com, acme@kernel.org, mark.rutland@arm.com, alexander.shishkin@linux.intel.com, jolsa@kernel.org, namhyung@kernel.org, irogers@google.com, adrian.hunter@intel.com, james.clark@arm.com, Ben Gainey Subject: [PATCH v2 2/4] tools/perf: Track where perf_sample_ids need per-thread periods Date: Thu, 8 Feb 2024 13:10:48 +0000 Message-ID: <20240208131050.2406183-3-ben.gainey@arm.com> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20240208131050.2406183-1-ben.gainey@arm.com> References: <20240208131050.2406183-1-ben.gainey@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" When PERF_SAMPLE_READ is used with inherit+inherit_stat the perf_sample_id = is no longer globally unique, but instead is unique per each inherited thread. Track this fact in perf_sample_ids as it will be needed to correctly calcul= ate the period. Signed-off-by: Ben Gainey --- tools/lib/perf/evlist.c | 1 + tools/lib/perf/evsel.c | 7 +++++++ tools/lib/perf/include/internal/evsel.h | 7 +++++++ 3 files changed, 15 insertions(+) diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 058e3ff10f9b..c585c49491a5 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -255,6 +255,7 @@ static void perf_evlist__id_hash(struct perf_evlist *ev= list, =20 sid->id =3D id; sid->evsel =3D evsel; + sid->period_per_thread =3D perf_evsel__attr_has_per_thread_sample_period(= evsel); hash =3D hash_64(sid->id, PERF_EVLIST__HLIST_BITS); hlist_add_head(&sid->node, &evlist->heads[hash]); } diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index c07160953224..dd60ee0557d8 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -537,6 +537,13 @@ void perf_evsel__free_id(struct perf_evsel *evsel) evsel->ids =3D 0; } =20 +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evse= l) +{ + return (evsel->attr.sample_type & PERF_SAMPLE_READ) + && evsel->attr.inherit + && evsel->attr.inherit_stat; +} + void perf_counts_values__scale(struct perf_counts_values *count, bool scale, __s8 *pscaled) { diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/inclu= de/internal/evsel.h index 5cd220a61962..97658f1c9ca3 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -36,6 +36,11 @@ struct perf_sample_id { =20 /* Holds total ID period value for PERF_SAMPLE_READ processing. */ u64 period; + + /* When inherit+inherit_stat is combined with PERF_SAMPLE_READ, the + * period value is per (sample_id, thread) tuple, rather than per + * sample_id. */ + bool period_per_thread; }; =20 struct perf_evsel { @@ -88,4 +93,6 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, co= nst char *filter); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads= ); void perf_evsel__free_id(struct perf_evsel *evsel); =20 +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evse= l); + #endif /* __LIBPERF_INTERNAL_EVSEL_H */ --=20 2.43.0 From nobody Sun Feb 8 08:48:07 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id BFA2F76C75; Thu, 8 Feb 2024 13:11:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1707397880; cv=none; b=bt5i0EjJUS0if+24nmMIFdQvb4t7FWtmi24cs/uQ9f4+6xON7Pi7uG6U2lRFJcoIOJFy6ehjfCJZhHaDjU016s+sV+qfklD5RsiVAHVe68hbvo2OrJE16NFHjsAX1czQl4gC2DIQZLqRjxJ0leFN8Ilo8QbGv+mJCA39D66OOK8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1707397880; c=relaxed/simple; bh=Ry30vTKwYooMTl9Mf53QWiWzOCZr0ih4rljkclS4P3Y=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=mhQO2lSe1OQ9ZNE/3QRYNpUwbE5WbgFwDbJqhtYzTNfxC30OOzDMFz8EE+C9GWWPk5ymN3pAv29ZPb+NtavhkO3eFnPatLI6LkTyQqiEXYpKD+NqktViTMvfLcdkjrgeOZpgWM/RO43wqYqSoMX+3GLFkKYPAFbeUjmwFEc/RIY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 7AA4C153B; Thu, 8 Feb 2024 05:12:00 -0800 (PST) Received: from e126817.cambridge.arm.com (e126817.cambridge.arm.com [10.2.3.5]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 88F893F5A1; Thu, 8 Feb 2024 05:11:16 -0800 (PST) From: Ben Gainey To: linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org Cc: peterz@infradead.org, mingo@redhat.com, acme@kernel.org, mark.rutland@arm.com, alexander.shishkin@linux.intel.com, jolsa@kernel.org, namhyung@kernel.org, irogers@google.com, adrian.hunter@intel.com, james.clark@arm.com, Ben Gainey Subject: [PATCH v2 3/4] tools/perf: Correctly calculate sample period for inherited SAMPLE_READ values Date: Thu, 8 Feb 2024 13:10:49 +0000 Message-ID: <20240208131050.2406183-4-ben.gainey@arm.com> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20240208131050.2406183-1-ben.gainey@arm.com> References: <20240208131050.2406183-1-ben.gainey@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Calculate the per-thread period when using PERF_SAMPLE_READ with inherit+in= herit_stat. Stores a per-thread period per perf_sample_id, hashed by tid. For other configurations, maintain a global period per perf_sample_id. Signed-off-by: Ben Gainey --- tools/lib/perf/evsel.c | 41 +++++++++++++++++++++++++ tools/lib/perf/include/internal/evsel.h | 41 +++++++++++++++++++++++-- tools/perf/util/session.c | 11 +++++-- 3 files changed, 88 insertions(+), 5 deletions(-) diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index dd60ee0557d8..4e173151e183 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,7 @@ void perf_evsel__init(struct perf_evsel *evsel, struct pe= rf_event_attr *attr, int idx) { INIT_LIST_HEAD(&evsel->node); + INIT_LIST_HEAD(&evsel->period_per_thread_periods); evsel->attr =3D *attr; evsel->idx =3D idx; evsel->leader =3D evsel; @@ -531,10 +533,17 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, in= t ncpus, int nthreads) =20 void perf_evsel__free_id(struct perf_evsel *evsel) { + struct perf_sample_id_period *pos, *n; + xyarray__delete(evsel->sample_id); evsel->sample_id =3D NULL; zfree(&evsel->id); evsel->ids =3D 0; + + perf_evsel_for_each_per_thread_period_safe(evsel, n, pos) { + list_del_init(&pos->node); + zfree(pos); + } } =20 bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evse= l) @@ -544,6 +553,38 @@ bool perf_evsel__attr_has_per_thread_sample_period(str= uct perf_evsel *evsel) && evsel->attr.inherit_stat; } =20 +u64 * perf_sample_id__get_period_storage(struct perf_sample_id * sid, u32 = tid) +{ + struct hlist_head *head; + struct perf_sample_id_period *res; + int hash; + + if (!sid->period_per_thread) + return &sid->period; + + hash =3D hash_32(tid, PERF_SAMPLE_ID__HLIST_BITS); + head =3D &sid->periods[hash]; + + hlist_for_each_entry(res, head, hnode) + if (res->tid =3D=3D tid) + return &res->period; + + if (sid->evsel =3D=3D NULL) + return NULL; + + res =3D zalloc(sizeof(struct perf_sample_id_period)); + if (res =3D=3D NULL) + return NULL; + + INIT_LIST_HEAD(&res->node); + res->tid =3D tid; + + list_add_tail(&res->node, &sid->evsel->period_per_thread_periods); + hlist_add_head(&res->hnode, &sid->periods[hash]); + + return &res->period; +} + void perf_counts_values__scale(struct perf_counts_values *count, bool scale, __s8 *pscaled) { diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/inclu= de/internal/evsel.h index 97658f1c9ca3..0fd8597c1340 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -11,6 +11,31 @@ struct perf_thread_map; struct xyarray; =20 +/** + * The per-thread accumulated period storage node. + */ +struct perf_sample_id_period { + struct list_head node; + struct hlist_node hnode; + /* The thread that the values belongs to */ + u32 tid; + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ + u64 period; +}; + +/** + * perf_evsel_for_each_per_thread_period_safe - safely iterate thru all th= e period_per_thread_periods + * @evlist:perf_evsel instance to iterate + * @item: struct perf_sample_id_period iterator + * @tmp: struct perf_sample_id_period temp iterator + */ +#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \ + list_for_each_entry_safe(item, tmp, &(evsel)->period_per_thread_periods, = node) + + +#define PERF_SAMPLE_ID__HLIST_BITS 4 +#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS) + /* * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there = are * more than one entry in the evlist. @@ -19,6 +44,7 @@ struct perf_sample_id { struct hlist_node node; u64 id; struct perf_evsel *evsel; + /* * 'idx' will be used for AUX area sampling. A sample will have AUX area * data that will be queued for decoding, where there are separate @@ -34,8 +60,14 @@ struct perf_sample_id { pid_t machine_pid; struct perf_cpu vcpu; =20 - /* Holds total ID period value for PERF_SAMPLE_READ processing. */ - u64 period; + union { + /* Holds total ID period value for PERF_SAMPLE_READ processing (when per= iod is not + * per-thread). */ + u64 period; + /* Holds total ID period value for PERF_SAMPLE_READ processing (when per= iod is + * per-thread). */ + struct hlist_head periods[PERF_SAMPLE_ID__HLIST_SIZE]; + }; =20 /* When inherit+inherit_stat is combined with PERF_SAMPLE_READ, the * period value is per (sample_id, thread) tuple, rather than per @@ -63,6 +95,9 @@ struct perf_evsel { u32 ids; struct perf_evsel *leader; =20 + /* Where period_per_thread is true, stores the per-thread values */ + struct list_head period_per_thread_periods; + /* parse modifier helper */ int nr_members; /* @@ -95,4 +130,6 @@ void perf_evsel__free_id(struct perf_evsel *evsel); =20 bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evse= l); =20 +u64 * perf_sample_id__get_period_storage(struct perf_sample_id * sid, u32 = tid); + #endif /* __LIBPERF_INTERNAL_EVSEL_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 199d3e8df315..22a8598ee849 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1478,14 +1478,19 @@ static int deliver_sample_value(struct evlist *evli= st, { struct perf_sample_id *sid =3D evlist__id2sid(evlist, v->id); struct evsel *evsel; + u64 * storage =3D NULL; =20 if (sid) { + storage =3D perf_sample_id__get_period_storage(sid, sample->tid); + } + + if (storage) { sample->id =3D v->id; - sample->period =3D v->value - sid->period; - sid->period =3D v->value; + sample->period =3D v->value - *storage; + *storage =3D v->value; } =20 - if (!sid || sid->evsel =3D=3D NULL) { + if (!storage || sid->evsel =3D=3D NULL) { ++evlist->stats.nr_unknown_id; return 0; } --=20 2.43.0 From nobody Sun Feb 8 08:48:07 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id BB4D3128393; Thu, 8 Feb 2024 13:11:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1707397882; cv=none; b=TwNWeYPdgzRiwpOdZLUupZ6gVXOK3m2DIFerDhywVtlRBxL15tQGcTzmdTZdLmow/mm2xUas9KDl8HM+JiIKVJY4R2b3FE8ETVGc8r+ovSGh6qWNlcPFs6SUdn2woXSxiI9hNV5Y23dqGIlvyLI8iTN4ba40WYmG0wffEuf2Fes= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1707397882; c=relaxed/simple; bh=NXmmRDt9OvFlC34NViJ9HXJ7Z7vXKtF1o7U8eiErB4o=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=OAygWMJih4Xq+spF9mKZkDxj4HG0VTWWyLA20M+pnIShjnyDTV32iNMq/USO88jhVn8CfJ5Akq/+eKHfDadGK94V0Epy/ThN5t57DrcgLNDBoOnmTafE5yAY4oqpSg9J4uZCgEofxWLCZhLCwwG5Krtswxar6HXsnqecWX0P/SM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 673201570; Thu, 8 Feb 2024 05:12:02 -0800 (PST) Received: from e126817.cambridge.arm.com (e126817.cambridge.arm.com [10.2.3.5]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 741163F5A1; Thu, 8 Feb 2024 05:11:18 -0800 (PST) From: Ben Gainey To: linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org Cc: peterz@infradead.org, mingo@redhat.com, acme@kernel.org, mark.rutland@arm.com, alexander.shishkin@linux.intel.com, jolsa@kernel.org, namhyung@kernel.org, irogers@google.com, adrian.hunter@intel.com, james.clark@arm.com, Ben Gainey Subject: [PATCH v2 4/4] tools/perf: Allow inherit + inherit_stat + PERF_SAMPLE_READ when opening events Date: Thu, 8 Feb 2024 13:10:50 +0000 Message-ID: <20240208131050.2406183-5-ben.gainey@arm.com> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20240208131050.2406183-1-ben.gainey@arm.com> References: <20240208131050.2406183-1-ben.gainey@arm.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" When PERF_SAMPLE_READ is used will enable inherit_stat when inherit is set. Provides a fallback path to disable inherit when this feature is not availa= ble, which is inline with the previous behaviour. Signed-off-by: Ben Gainey --- tools/perf/util/evsel.c | 15 +++++++++++++-- tools/perf/util/evsel.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6d7c9c58a9bc..dc74b39a2254 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1156,7 +1156,11 @@ void evsel__config(struct evsel *evsel, struct recor= d_opts *opts, */ if (leader->core.nr_members > 1) { attr->read_format |=3D PERF_FORMAT_GROUP; - attr->inherit =3D 0; + } + + /* Inherit + READ requires inherit_stat */ + if (attr->inherit) { + attr->inherit_stat =3D true; } } =20 @@ -1832,6 +1836,8 @@ static int __evsel__prepare_open(struct evsel *evsel,= struct perf_cpu_map *cpus, =20 static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.inherit_sample_read) + evsel->core.attr.inherit =3D 0; if (perf_missing_features.branch_counters) evsel->core.attr.branch_sample_type &=3D ~PERF_SAMPLE_BRANCH_COUNTERS; if (perf_missing_features.read_lost) @@ -1887,7 +1893,12 @@ bool evsel__detect_missing_features(struct evsel *ev= sel) * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.branch_counters && + if (!perf_missing_features.inherit_sample_read && + evsel->core.attr.inherit && (evsel->core.attr.sample_type & PERF_SAMP= LE_READ)) { + perf_missing_features.inherit_sample_read =3D true; + pr_debug2("Using PERF_SAMPLE_READ / :S modifier is not compatible with i= nherit, falling back to no-inherit.\n"); + return true; + } else if (!perf_missing_features.branch_counters && (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { perf_missing_features.branch_counters =3D true; pr_debug2("switching off branch counters support\n"); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index efbb6e848287..11cc9b8bee27 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -192,6 +192,7 @@ struct perf_missing_features { bool weight_struct; bool read_lost; bool branch_counters; + bool inherit_sample_read; }; =20 extern struct perf_missing_features perf_missing_features; --=20 2.43.0