From nobody Mon Feb 9 07:19:42 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 446C2196 for ; Fri, 29 Mar 2024 23:58:42 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1711756723; cv=none; b=mMUKxgrhfsgjg1qgNuyMDoxSspBv6uWBHXl4O7VHiNI4vOv3PoOmlyz2UwHdpY8FWV3BRuHhmPRr8CwxEdEf88eYCSFVZxGN3i2iQ8lrxEuC1mzWD4+eZw8wRfychA55FN592lXMU+9OsNKvx0DoBA0xUtpAe5m46fgWlBMrZec= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1711756723; c=relaxed/simple; bh=a8ZIwcOzFWpAazFJq80nYQpJgGvoSiKotzpuXLRiwFg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Ef5h4xjq9TeVEFlQ8/8NCv3NJtuyIViCcFwqxZhAaTRsa4HeYtRrLhAMWXdrKpp4Ej0Yqd/cwFuwXKmlCxgN6Mk5nVT0hj8mB1UVjTvqyIRXZKL7wAyg2dFvbAIjkS08DMGdFcKPNuaOd5WPr5WqbUZgMyf0IBuUQu6Wb40cFIw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=qLMiWGGD; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="qLMiWGGD" Received: by smtp.kernel.org (Postfix) with ESMTPSA id B2EC4C433A6; Fri, 29 Mar 2024 23:58:39 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1711756722; bh=a8ZIwcOzFWpAazFJq80nYQpJgGvoSiKotzpuXLRiwFg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=qLMiWGGDt8GGp9SAk0E+qPUqHo0K+TIxfNmN0U7mkjYtVOKJbuvZ6+6xidYiZsrps jfn0wesESCoywGJODRx0vjSdS7LshuewgW0DAGyUtO1MclzQ9Rhrpjns66r1jH44QS A/+VLV966VfjOXt061IaoHt/ioQ/TxBo4myxEA05KBvvI+EPhfUN+xXGtngOFFxe+I lnj72Dqdglpw+CwLe7MV4ljUFVRqvv8xncQXer6pPFpVLJOa9d1DPrZm/Clmqa9uaQ w5JCVvXmz3l7sXMz10wxwbA942qYKXXaTJe2P5BDOxZQQePW9ff40piqYooenbIfBx 3UQWooTDwcTFw== From: Frederic Weisbecker To: LKML Cc: Frederic Weisbecker , Thomas Gleixner , David Howells , Jarkko Sakkinen , Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Namhyung Kim , Mark Rutland , Alexander Shishkin , Jiri Olsa , Ian Rogers , Adrian Hunter Subject: [PATCH 1/4] task_work: s/task_work_cancel()/task_work_cancel_func()/ Date: Sat, 30 Mar 2024 00:58:09 +0100 Message-ID: <20240329235812.18917-2-frederic@kernel.org> X-Mailer: git-send-email 2.44.0 In-Reply-To: <20240329235812.18917-1-frederic@kernel.org> References: <20240329235812.18917-1-frederic@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" A proper task_work_cancel() API that actually cancels a callback and not *any* callback pointing to a given function is going to be needed for perf events event freeing. Do the appropriate rename to prepare for that. Signed-off-by: Frederic Weisbecker --- include/linux/task_work.h | 2 +- kernel/irq/manage.c | 2 +- kernel/task_work.c | 10 +++++----- security/keys/keyctl.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 3564172dbc27..89ee2cbf044b 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -36,7 +36,7 @@ int task_work_add(struct task_struct *task, struct callba= ck_head *twork, =20 struct callback_head *task_work_cancel_match(struct task_struct *task, bool (*match)(struct callback_head *, void *data), void *data); -struct callback_head *task_work_cancel(struct task_struct *, task_work_fun= c_t); +struct callback_head *task_work_cancel_func(struct task_struct *, task_wor= k_func_t); void task_work_run(void); =20 static inline void exit_task_work(struct task_struct *task) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ad3eaf2ab959..2464e98879f3 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1333,7 +1333,7 @@ static int irq_thread(void *data) * synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the * oneshot mask bit can be set. */ - task_work_cancel(current, irq_thread_dtor); + task_work_cancel_func(current, irq_thread_dtor); return 0; } =20 diff --git a/kernel/task_work.c b/kernel/task_work.c index d283f603d916..c1b4d3ba2590 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -120,9 +120,9 @@ static bool task_work_func_match(struct callback_head *= cb, void *data) } =20 /** - * task_work_cancel - cancel a pending work added by task_work_add() - * @task: the task which should execute the work - * @func: identifies the work to remove + * task_work_cancel_func - cancel a pending work matching a function added= by task_work_add() + * @task: the task which should execute the func's work + * @func: identifies the func to match with a work to remove * * Find the last queued pending work with ->func =3D=3D @func and remove * it from queue. @@ -131,7 +131,7 @@ static bool task_work_func_match(struct callback_head *= cb, void *data) * The found work or NULL if not found. */ struct callback_head * -task_work_cancel(struct task_struct *task, task_work_func_t func) +task_work_cancel_func(struct task_struct *task, task_work_func_t func) { return task_work_cancel_match(task, task_work_func_match, func); } @@ -168,7 +168,7 @@ void task_work_run(void) if (!work) break; /* - * Synchronize with task_work_cancel(). It can not remove + * Synchronize with task_work_cancel_match(). It can not remove * the first entry =3D=3D work, cmpxchg(task_works) must fail. * But it can remove another entry from the ->next list. */ diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 10ba439968f7..3aff32a2bcf3 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1693,7 +1693,7 @@ long keyctl_session_to_parent(void) goto unlock; =20 /* cancel an already pending keyring replacement */ - oldwork =3D task_work_cancel(parent, key_change_session_keyring); + oldwork =3D task_work_cancel_func(parent, key_change_session_keyring); =20 /* the replacement session keyring is applied just prior to userspace * restarting */ --=20 2.44.0 From nobody Mon Feb 9 07:19:42 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0A28413E895 for ; Fri, 29 Mar 2024 23:58:45 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1711756726; cv=none; b=aGuvIV3fldei8iu8jscqatCp5NrcHEyTdu5jIc+83iBSyGIkG6BQIj1WsBF3sc8s9gVVdKvI7wY/l/sqIe+6uTodFUBcccfruJzhF6CQf/D89a12XMjAMkDiTxJkiP6SlILFoMB0eNXEpoJMU/iXqMUszT5fZ3+ECiAgD6Anm9o= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1711756726; c=relaxed/simple; bh=leWar64/qGvuiTJiHqRj1593VB59JAei213MN3XaVAc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=a3OexwQxWwR8ioXwqjHKdtJl2dFbH18wz+fYSWWPaWZ67r2jCAJoWT9+ZvAB7uU1W3ywOyJDb/13GToj7xmLfIJ7Fp+15D9MDY1Yi25/Ing9rdK2JvIf0a+iKF6uiX5wLT+iFCwwwn+S5It46pFhMAT3HeOSjsj32pS8gcO03qY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=E0R6NEFu; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="E0R6NEFu" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2CA34C43394; Fri, 29 Mar 2024 23:58:43 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1711756725; bh=leWar64/qGvuiTJiHqRj1593VB59JAei213MN3XaVAc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=E0R6NEFu+sVSytMxL/aGko019dWB+IpyKhvcsyhy6u888IV5cNz7AEUdcDbVt4+0V poMdJvOb5448A1n/SYQ3/XYgI8WhLSrIkJrynCob9eWWvD6Mw5EqBFH5GFU7W8qM7A IQQmLiA+AJ1ZC420BmTRw+34jvkwt4G82ulvDxc/r/aJRK1lxFFqo0wrRODpecI4w7 ZxcYPXC4AzI7FcXr68ovbqmE0G4adlkgrwc+5PIqdMAKmBaQsx051H+61fMSDGcJY8 ZUBS6v884uBI0k10NZJK9uy46L90/ct6glVL8dyHUhu7VJXAZNU9nEdxGU7m/42RV7 IYdr5qWMDCcZw== From: Frederic Weisbecker To: LKML Cc: Frederic Weisbecker , Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Namhyung Kim , Mark Rutland , Alexander Shishkin , Jiri Olsa , Ian Rogers , Adrian Hunter Subject: [PATCH 2/4] task_work: Introduce task_work_cancel() again Date: Sat, 30 Mar 2024 00:58:10 +0100 Message-ID: <20240329235812.18917-3-frederic@kernel.org> X-Mailer: git-send-email 2.44.0 In-Reply-To: <20240329235812.18917-1-frederic@kernel.org> References: <20240329235812.18917-1-frederic@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Re-introduce task_work_cancel(), this time to cancel an actual callback and not *any* callback pointing to a given function. This is going to be needed for perf events event freeing. Signed-off-by: Frederic Weisbecker --- include/linux/task_work.h | 1 + kernel/task_work.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 89ee2cbf044b..58e42ef59580 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -37,6 +37,7 @@ int task_work_add(struct task_struct *task, struct callba= ck_head *twork, struct callback_head *task_work_cancel_match(struct task_struct *task, bool (*match)(struct callback_head *, void *data), void *data); struct callback_head *task_work_cancel_func(struct task_struct *, task_wor= k_func_t); +bool task_work_cancel(struct task_struct *, struct callback_head *twork); void task_work_run(void); =20 static inline void exit_task_work(struct task_struct *task) diff --git a/kernel/task_work.c b/kernel/task_work.c index c1b4d3ba2590..9e85ac7632ae 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -136,6 +136,30 @@ task_work_cancel_func(struct task_struct *task, task_w= ork_func_t func) return task_work_cancel_match(task, task_work_func_match, func); } =20 +static bool task_work_match(struct callback_head *cb, void *data) +{ + return cb =3D=3D data; +} + +/** + * task_work_cancel - cancel a pending work added by task_work_add() + * @task: the task which should execute the work + * @func: the work to remove if queued + * + * Remove a callback from a task's queue if queued. + * + * RETURNS: + * True if the callback was queued and got cancelled, false otherwise. + */ +bool task_work_cancel(struct task_struct *task, struct callback_head *cb) +{ + struct callback_head *ret; + + ret =3D task_work_cancel_match(task, task_work_match, cb); + + return ret =3D=3D cb; +} + /** * task_work_run - execute the works added by task_work_add() * --=20 2.44.0 From nobody Mon Feb 9 07:19:42 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C54F713F01B for ; Fri, 29 Mar 2024 23:58:48 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1711756728; cv=none; b=lXlVF6xVvaFU8KLAuM2wvju+RGhK9Xlp485RF6x+gDdz3D5URNIzfwRhSf3/Cnrp3rBU9NWKQgjwsD6WFhnFNQ7XEKpcHyfcBTmiiUhCjYs8GG4Mis5r6K5KJZ/jFZi+Z5MZZBK1o9OqZJL149oeEFjKSFafi15gz7OuT40pAoM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1711756728; c=relaxed/simple; bh=jdiXquXAPPtPajTSj3ryNiWP4Z6gOwf8+7VBRKa5ETE=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=bRoLAycOvLeo16XTi9CUCWvcVKoKKd05G5U6PGqS8uUabaNwnAJSsQme6pW/tpaRE6lzvE5q04nR+GbNL5DTpiIC8Q6oEmdSyxVIArGTSwlRlKO8zzqRsbYpxP9jfQGZqSp4KTkshz41UHjnu6KEokRaP/mvwIqiB0K1/GlEwv8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=ZNXj0OHr; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="ZNXj0OHr" Received: by smtp.kernel.org (Postfix) with ESMTPSA id EE78AC433B1; Fri, 29 Mar 2024 23:58:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1711756728; bh=jdiXquXAPPtPajTSj3ryNiWP4Z6gOwf8+7VBRKa5ETE=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ZNXj0OHrqHocYTHj5KYi2Dvp4W1KeU6L284QF6RtRSMRQJnDVgK+IyAc1OQyx+7uc 9fgQDvLn1UmxLcaPxbyEpXaYyjV/3fv3Oven2GCWjiWfyn9uflPOzqTPpt9EPdG19M A70Pn5KvlNooPN1PoLiDr2UMqTxY+RAhPBoY1RHF3XoyLVtoiMN9Mg6ik1W/dnb0Ob QHrKDcejZSqvPNkes92IvfsXut5youx7rMSb4c6qKAIcf+lrIeEarhEWZn56/GkLqr /k+1DvDz+MQciUAvUuGEO2Gt4iFhchQJEzrhh24uB6dnFqT5wxpSqKSMjgFitKbUvA mTFMg45v2mr5Q== From: Frederic Weisbecker To: LKML Cc: Frederic Weisbecker , Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Namhyung Kim , Mark Rutland , Alexander Shishkin , Jiri Olsa , Ian Rogers , Adrian Hunter Subject: [PATCH 3/4] perf: Fix event leak upon exit Date: Sat, 30 Mar 2024 00:58:11 +0100 Message-ID: <20240329235812.18917-4-frederic@kernel.org> X-Mailer: git-send-email 2.44.0 In-Reply-To: <20240329235812.18917-1-frederic@kernel.org> References: <20240329235812.18917-1-frederic@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" When a task is scheduled out, pending sigtrap deliveries are deferred to the target task upon resume to userspace via task_work. However failures while adding en event's callback to the task_work engine are ignored. And since the last call for events exit happen after task work is eventually closed, there is a small window during which pending sigtrap can be queued though ignored, leaking the event refcount addition such as in the following scenario: TASK A ----- do_exit() exit_task_work(tsk); perf_event_overflow() event->pending_sigtrap =3D pending_id; irq_work_queue(&event->pending_irq); =3D=3D=3D=3D=3D=3D=3D=3D=3D> PREEMPTION: TASK A -> TASK B event_sched_out() event->pending_sigtrap =3D 0; atomic_long_inc_not_zero(&event->refcount) // FAILS: task work has exited task_work_add(&event->pending_task) [...] perf_pending_irq() // early return: event->oncpu =3D -1 [...] =3D=3D=3D=3D=3D=3D=3D=3D=3D> TASK B -> TASK A perf_event_exit_task(tsk) perf_event_exit_event() free_event() WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) !=3D 1) // leak event due to unexpected refcount =3D=3D 2 As a result the event is never released while the task exits. Fix this with appropriate task_work_add()'s error handling. Fixes: 517e6a301f34 ("perf: Fix perf_pending_task() UaF") Signed-off-by: Frederic Weisbecker --- kernel/events/core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 724e6d7e128f..c1632e69c69d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2289,10 +2289,11 @@ event_sched_out(struct perf_event *event, struct pe= rf_event_context *ctx) event->pending_sigtrap =3D 0; if (state !=3D PERF_EVENT_STATE_OFF && !event->pending_work) { - event->pending_work =3D 1; - dec =3D false; - WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); - task_work_add(current, &event->pending_task, TWA_RESUME); + if (task_work_add(current, &event->pending_task, TWA_RESUME) >=3D 0) { + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); + dec =3D false; + event->pending_work =3D 1; + } } if (dec) local_dec(&event->ctx->nr_pending); --=20 2.44.0 From nobody Mon Feb 9 07:19:42 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 56C5B13F446 for ; Fri, 29 Mar 2024 23:58:51 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1711756731; cv=none; b=LY/nKBsux+/qzsncPDbscqatpoYaBomRjBQyqUiimZoMxl4n0nj/JcNdUM8mU6Y8dAamROOh8IeqNaXLcHBKm3cb9vD1a18axlw3OrEte34drlXscb1RxjesnSKYhoAJ/qifJXOEV1uJwXdpY8K9HZzZbx57+mwI3jlKK06kGeA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1711756731; c=relaxed/simple; bh=GmB2wTQS4lTN4x1DBM9671s8jAtPbaalNinvZ1OGbQA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=eN5RPzWb0BYdE0+ntjUU4dSrmFHUov2NhSmRRJRdmn8gJE1XN/6BgHxZDyFqc4Xhf5eFZ1yJCUWuLRqKP3+9v8r6prO3ch0Z/OsvAOGw+ORAb3MWuKrCDGRCQJV9ZBgBGRvVHK4gIBN/y7sK5U4jZbociB6Xs555cSR6wje1Yow= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=U6HR25eo; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="U6HR25eo" Received: by smtp.kernel.org (Postfix) with ESMTPSA id BD650C433A6; Fri, 29 Mar 2024 23:58:48 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1711756731; bh=GmB2wTQS4lTN4x1DBM9671s8jAtPbaalNinvZ1OGbQA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=U6HR25eo1eOGai9gNwTe9QF6SBpR/4G4XgXEhXUmniRnlGCSCDAbDUNdfZyqKj3h+ k8PirAMVjA1LMBmgeoXnQ+KU+zMUN7UJ1vDNF1T/9xY3KT618Fu/MRydTQFdCrH0R7 BgGBWwKAOk1oxBbSAYqKkX8GFHu3duigfZ4foOyjpYWGXXRoqA56G/wBSWLky8ZIdd sH7BWt9iPbMYgRj2IvIblWxsRrvmwGJcNuST8XVa35S7WxdUWcXW0e3k52lPc0WELa Po4uyTOy6JZiKVi1nCjXUeJCVzhN23CWgXTVokGHgO3prtupPxTeIhS1oT4KK/+vlu HAq0zHdJmVlYQ== From: Frederic Weisbecker To: LKML Cc: Frederic Weisbecker , Peter Zijlstra , Ingo Molnar , Arnaldo Carvalho de Melo , Namhyung Kim , Mark Rutland , Alexander Shishkin , Jiri Olsa , Ian Rogers , Adrian Hunter Subject: [PATCH 4/4] perf: Fix event leak upon exec and file release Date: Sat, 30 Mar 2024 00:58:12 +0100 Message-ID: <20240329235812.18917-5-frederic@kernel.org> X-Mailer: git-send-email 2.44.0 In-Reply-To: <20240329235812.18917-1-frederic@kernel.org> References: <20240329235812.18917-1-frederic@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The perf pending task work is never waited upon the matching event release. In the case of a child event, released via free_event() directly, this can potentially result in a leaked event, such as in the following scenario that doesn't even require a weak IRQ work implementation to trigger: schedule() prepare_task_switch() =3D=3D=3D=3D=3D=3D=3D> perf_event_overflow() event->pending_sigtrap =3D ... irq_work_queue(&event->pending_irq) <=3D=3D=3D=3D=3D=3D=3D perf_event_task_sched_out() event_sched_out() event->pending_sigtrap =3D 0; atomic_long_inc_not_zero(&event->refcount) task_work_add(&event->pending_task) finish_lock_switch() =3D=3D=3D=3D=3D=3D=3D> perf_pending_irq() //do nothing, rely on pending task work <=3D=3D=3D=3D=3D=3D=3D begin_new_exec() perf_event_exit_task() perf_event_exit_event() // If is child event free_event() WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) !=3D 1) // event is leaked Similar scenarios can also happen with perf_event_remove_on_exec() or simply against concurrent perf_event_release(). Fix this with synchonizing against the possibly remaining pending task work while freeing the event, just like is done with remaining pending IRQ work. This means that the pending task callback neither need nor should hold a reference to the event, preventing it from ever beeing freed. Fixes: 517e6a301f34 ("perf: Fix perf_pending_task() UaF") Signed-off-by: Frederic Weisbecker --- include/linux/perf_event.h | 1 + kernel/events/core.c | 33 +++++++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d2a15c0c6f8a..2f2e222e1003 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -786,6 +786,7 @@ struct perf_event { struct irq_work pending_irq; struct callback_head pending_task; unsigned int pending_work; + wait_queue_head_t pending_work_wq; =20 atomic_t event_limit; =20 diff --git a/kernel/events/core.c b/kernel/events/core.c index c1632e69c69d..588e35848a4e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2290,7 +2290,6 @@ event_sched_out(struct perf_event *event, struct perf= _event_context *ctx) if (state !=3D PERF_EVENT_STATE_OFF && !event->pending_work) { if (task_work_add(current, &event->pending_task, TWA_RESUME) >=3D 0) { - WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); dec =3D false; event->pending_work =3D 1; } @@ -5188,9 +5187,35 @@ static bool exclusive_event_installable(struct perf_= event *event, static void perf_addr_filters_splice(struct perf_event *event, struct list_head *head); =20 +static void perf_pending_task_sync(struct perf_event *event) +{ + struct callback_head *head =3D &event->pending_task; + + if (!event->pending_work) + return; + /* + * If the task is queued to the current task's queue, we + * obviously can't wait for it to complete. Simply cancel it. + */ + if (task_work_cancel(current, head)) { + event->pending_work =3D 0; + local_dec(&event->ctx->nr_pending); + return; + } + + /* + * All accesses related to the event are within the same + * non-preemptible section in perf_pending_task(). The RCU + * grace period before the event is freed will make sure all + * those accesses are complete by then. + */ + wait_event(event->pending_work_wq, !event->pending_work); +} + static void _free_event(struct perf_event *event) { irq_work_sync(&event->pending_irq); + perf_pending_task_sync(event); =20 unaccount_event(event); =20 @@ -6808,24 +6833,23 @@ static void perf_pending_task(struct callback_head = *head) struct perf_event *event =3D container_of(head, struct perf_event, pendin= g_task); int rctx; =20 + preempt_disable_notrace(); /* * If we 'fail' here, that's OK, it means recursion is already disabled * and we won't recurse 'further'. */ - preempt_disable_notrace(); rctx =3D perf_swevent_get_recursion_context(); =20 if (event->pending_work) { event->pending_work =3D 0; perf_sigtrap(event); local_dec(&event->ctx->nr_pending); + wake_up(&event->pending_work_wq); } =20 if (rctx >=3D 0) perf_swevent_put_recursion_context(rctx); preempt_enable_notrace(); - - put_event(event); } =20 #ifdef CONFIG_GUEST_PERF_EVENTS @@ -11933,6 +11957,7 @@ perf_event_alloc(struct perf_event_attr *attr, int = cpu, init_waitqueue_head(&event->waitq); init_irq_work(&event->pending_irq, perf_pending_irq); init_task_work(&event->pending_task, perf_pending_task); + init_waitqueue_head(&event->pending_work_wq); =20 mutex_init(&event->mmap_mutex); raw_spin_lock_init(&event->addr_filters.lock); --=20 2.44.0