From nobody Mon Feb 9 13:01:31 2026 Received: from casper.infradead.org (casper.infradead.org [90.155.50.34]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 24EFF1C689D for ; Mon, 4 Nov 2024 13:57:05 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=90.155.50.34 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730728627; cv=none; b=J7lT97KiUwRj8VgHXzf6sBmw4PdYnFrAxaJaqtfn21ZbJVFKkDfdo8FCYGo4RSLqckFZyeRkjhFwnWcASIP1SqaOVZYeEYhB/wDWHInoXLfAJB9hs471iAW9j8/gHM5hvZYmAJIFSHvOvunBj+wOCts+6FGR3oDib/zAnOZJWWQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730728627; c=relaxed/simple; bh=1kQdT7RNP30VSQRhVbLbDXk7u0BCc/CqkwU4buF82HI=; h=Message-Id:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=di10UjEg4DU5hG2s4aE7An2y/rk7m1OvCmNKhdDaLBYblcPf3JGQ/h8KIg6+r2I72QtDJCjgqUfyH73wRRgtDGypQMDEKmw9TqjLW54PWwNzPeQ56JMuny1lMTzknxGVypSSLuVPeGyMtHaJZKjJIDE4m4bP9AkmcKnjUdW2kGY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org; spf=none smtp.mailfrom=infradead.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b=rgC5F+6o; arc=none smtp.client-ip=90.155.50.34 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=infradead.org Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=infradead.org Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=infradead.org header.i=@infradead.org header.b="rgC5F+6o" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Type:MIME-Version:References: Subject:Cc:To:From:Date:Message-Id:Sender:Reply-To:Content-Transfer-Encoding: Content-ID:Content-Description:In-Reply-To; bh=Wwd6VbMcmGqdP94/vtno7cnLFnb1TlambeNcu37V/zM=; b=rgC5F+6oir+K28AKYsrERZ5gPM FdipB4gZVLK9H11W2NtAbu8SurbkReXOjRPt/zOHc09ko86blXhJ8/8nbujuWHmkhLm/aCNWOVGvs PFUL9bx1LjL2iVMmUVcIdd4CPR4ZTwCrXf/1ri5JO/Z6odowbDhm2J5TrnnAQcsc94UMX8DX45fX5 lXReqSNQNa/ENvmWzCiuNMH6M5jDkduNDjePZU4WgdgP21p8SOTCq51eAziarJtUxxMvctLVDrOy1 LDNsX/xelZnAuw7I2ebAazi3eRkVJ0cy2QFPlYElrSNBCefIgcd3pwA2956jk8Vs5n5JXQr5v5KXq VTYO7QcQ==; Received: from j130084.upc-j.chello.nl ([24.132.130.84] helo=noisy.programming.kicks-ass.net) by casper.infradead.org with esmtpsa (Exim 4.98 #2 (Red Hat Linux)) id 1t7xZX-00000001EDv-26h4; Mon, 04 Nov 2024 13:56:48 +0000 Received: by noisy.programming.kicks-ass.net (Postfix, from userid 0) id 7B588308CBF; Mon, 4 Nov 2024 14:56:46 +0100 (CET) Message-Id: <20241104135518.760214287@infradead.org> User-Agent: quilt/0.65 Date: Mon, 04 Nov 2024 14:39:20 +0100 From: Peter Zijlstra To: mingo@kernel.org, lucas.demarchi@intel.com Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, willy@infradead.org, acme@kernel.org, namhyung@kernel.org, mark.rutland@arm.com, alexander.shishkin@linux.intel.com, jolsa@kernel.org, irogers@google.com, adrian.hunter@intel.com, kan.liang@linux.intel.com Subject: [PATCH 11/19] perf: Detach perf_cpu_pmu_context and pmu lifetimes References: <20241104133909.669111662@infradead.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" In prepration for being able to unregister a pmu with existing events, it becomes important to detach struct perf_cpu_pmu_context lifetimes from that of struct pmu. Notably perf_cpu_pmu_context embeds a perf_event_pmu_context that can stay referenced until the last event goes. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/perf_event.h | 4 +-- kernel/events/core.c | 56 +++++++++++++++++++++++++++++++++++++---= ----- 2 files changed, 49 insertions(+), 11 deletions(-) --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -336,7 +336,7 @@ struct pmu { */ unsigned int scope; =20 - struct perf_cpu_pmu_context __percpu *cpu_pmu_context; + struct perf_cpu_pmu_context __percpu **cpu_pmu_context; atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ int task_ctx_nr; int hrtimer_interval_ms; @@ -901,7 +901,7 @@ struct perf_event_pmu_context { struct list_head pinned_active; struct list_head flexible_active; =20 - /* Used to avoid freeing per-cpu perf_event_pmu_context */ + /* Used to identify the per-cpu perf_event_pmu_context */ unsigned int embedded : 1; =20 unsigned int nr_events; --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1178,7 +1178,7 @@ static int perf_mux_hrtimer_restart_ipi( =20 static __always_inline struct perf_cpu_pmu_context *this_cpc(struct pmu *p= mu) { - return this_cpu_ptr(pmu->cpu_pmu_context); + return *this_cpu_ptr(pmu->cpu_pmu_context); } =20 void perf_pmu_disable(struct pmu *pmu) @@ -4971,11 +4971,14 @@ find_get_pmu_context(struct pmu *pmu, st */ struct perf_cpu_pmu_context *cpc; =20 - cpc =3D per_cpu_ptr(pmu->cpu_pmu_context, event->cpu); + cpc =3D *per_cpu_ptr(pmu->cpu_pmu_context, event->cpu); epc =3D &cpc->epc; raw_spin_lock_irq(&ctx->lock); if (!epc->ctx) { - atomic_set(&epc->refcount, 1); + /* + * One extra reference for the pmu; see perf_pmu_free(). + */ + atomic_set(&epc->refcount, 2); epc->embedded =3D 1; list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list); epc->ctx =3D ctx; @@ -5044,6 +5047,15 @@ static void get_pmu_ctx(struct perf_even WARN_ON_ONCE(!atomic_inc_not_zero(&epc->refcount)); } =20 +static void free_cpc_rcu(struct rcu_head *head) +{ + struct perf_cpu_pmu_context *cpc =3D + container_of(head, typeof(*cpc), epc.rcu_head); + + kfree(cpc->epc.task_ctx_data); + kfree(cpc); +} + static void free_epc_rcu(struct rcu_head *head) { struct perf_event_pmu_context *epc =3D container_of(head, typeof(*epc), r= cu_head); @@ -5078,8 +5090,10 @@ static void put_pmu_ctx(struct perf_even =20 raw_spin_unlock_irqrestore(&ctx->lock, flags); =20 - if (epc->embedded) + if (epc->embedded) { + call_rcu(&epc->rcu_head, free_cpc_rcu); return; + } =20 call_rcu(&epc->rcu_head, free_epc_rcu); } @@ -11595,7 +11609,7 @@ perf_event_mux_interval_ms_store(struct cpus_read_lock(); for_each_online_cpu(cpu) { struct perf_cpu_pmu_context *cpc; - cpc =3D per_cpu_ptr(pmu->cpu_pmu_context, cpu); + cpc =3D *per_cpu_ptr(pmu->cpu_pmu_context, cpu); cpc->hrtimer_interval =3D ns_to_ktime(NSEC_PER_MSEC * timer); =20 cpu_function_call(cpu, perf_mux_hrtimer_restart_ipi, cpc); @@ -11767,7 +11781,25 @@ static void perf_pmu_free(struct pmu *pm device_del(pmu->dev); put_device(pmu->dev); } - free_percpu(pmu->cpu_pmu_context); + + if (pmu->cpu_pmu_context) { + int cpu; + + for_each_possible_cpu(cpu) { + struct perf_cpu_pmu_context *cpc; + + cpc =3D *per_cpu_ptr(pmu->cpu_pmu_context, cpu); + if (!cpc) + continue; + if (cpc->epc.embedded) { + /* refcount managed */ + put_pmu_ctx(&cpc->epc); + continue; + } + kfree(cpc); + } + free_percpu(pmu->cpu_pmu_context); + } } =20 DEFINE_FREE(pmu_unregister, struct pmu *, if (_T) perf_pmu_free(_T)) @@ -11806,14 +11838,20 @@ int perf_pmu_register(struct pmu *_pmu, return ret; } =20 - pmu->cpu_pmu_context =3D alloc_percpu(struct perf_cpu_pmu_context); + pmu->cpu_pmu_context =3D alloc_percpu(struct perf_cpu_pmu_context *); if (!pmu->cpu_pmu_context) return -ENOMEM; =20 for_each_possible_cpu(cpu) { - struct perf_cpu_pmu_context *cpc; + struct perf_cpu_pmu_context *cpc =3D + kmalloc_node(sizeof(struct perf_cpu_pmu_context), + GFP_KERNEL | __GFP_ZERO, + cpu_to_node(cpu)); + + if (!cpc) + return -ENOMEM; =20 - cpc =3D per_cpu_ptr(pmu->cpu_pmu_context, cpu); + *per_cpu_ptr(pmu->cpu_pmu_context, cpu) =3D cpc; __perf_init_event_pmu_context(&cpc->epc, pmu); __perf_mux_hrtimer_init(cpc, cpu); }