From nobody Tue Apr 7 07:05:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3AA01ECAAD3 for ; Tue, 30 Aug 2022 01:44:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229737AbiH3BoK (ORCPT ); Mon, 29 Aug 2022 21:44:10 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48598 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229550AbiH3BoH (ORCPT ); Mon, 29 Aug 2022 21:44:07 -0400 Received: from mail-pj1-x1034.google.com (mail-pj1-x1034.google.com [IPv6:2607:f8b0:4864:20::1034]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B3F157C1C2 for ; Mon, 29 Aug 2022 18:44:06 -0700 (PDT) Received: by mail-pj1-x1034.google.com with SMTP id p8-20020a17090ad30800b001fdfc8c7567so294905pju.1 for ; Mon, 29 Aug 2022 18:44:06 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc; bh=4G45J5/wbH8CJ9lkE1Iii44KzFmB6q2uZP6UhKMt5F0=; b=hme/xCx/PdWfI+daRrW3BOD4RhkV7SbDD/dzQtAIJm3N9y6A0v2eTJXbzhQRklZ33N MNb27S+7PWwZ+54cwchHFCS64G6NXu64y3i+aQuZIYPMffL0uSdT1ragDchYw2qk9+DO NirXWwVaPO1mu0DWrmpUVGRHbl1dnNKLCxMbJYk7NyiUh0HXBhLP8jw8expuxco0Hroi iJriviALZehlN7lEgZa/8X9jTN5vNX3NHJlHKV3d1/iWQMHQIwUTrhmVAtJn1SDZGH7F I5Up7tKTClddctA6Ad3bXIRGJJpxlKgKGUMa/1iZxUGxS6bpVNH5d2i+KWAYhMb1u5id 89zA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc; bh=4G45J5/wbH8CJ9lkE1Iii44KzFmB6q2uZP6UhKMt5F0=; b=bt11qfNg4EPJ37/whk+DHrCi0ovOEPeD0q7c1W0Lbj/9h4xcouCyxelUxNRdO/JGVt jWtnUXDb3b1v1JECwhXWyTlEXvdkVzv+UVV658OEDS1FzVHtU4aVjNJVnwo+0fQLHGHw QVha//E9xcdG0/PityOKr+ujNMnW67S2KgXRe051+QqlQGpQf1QA1I0qeleoq7Btf8xF 1MhampTduh7Cd97mQ/ObzKU6hNZ+i512HywGWxqgQnw20xQe/jjEaT/Towfsx6Dwv0m4 16ggwvvzwOZl49hmJ1BkV9DX+Lrk0IgsFAC3+JpB3IkTBUvoKOmylJ99TuIZnNEqXQjh YyEw== X-Gm-Message-State: ACgBeo2u7H2AmKXPkJOygHFBAY3DcXIpxvBxG6uD8N3tl6PbLqqkXuUm SpRbtdEeR0oZihx9rVFdC7TxXrJx+Oo= X-Google-Smtp-Source: AA6agR4GNeZem4U6gjMW7sBQ700KlnJNkMGpLWafUK2syMDcnXqz4HabgHHGrcE9a5qv1yfg4bRBjg== X-Received: by 2002:a17:902:d708:b0:172:f24e:2e59 with SMTP id w8-20020a170902d70800b00172f24e2e59mr19018781ply.169.1661823845456; Mon, 29 Aug 2022 18:44:05 -0700 (PDT) Received: from laptop.hsd1.wa.comcast.net ([2601:600:8500:5f14:d627:c51e:516e:a105]) by smtp.gmail.com with ESMTPSA id v14-20020aa799ce000000b00535faa9d6f2sm7976038pfi.53.2022.08.29.18.44.04 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 29 Aug 2022 18:44:04 -0700 (PDT) From: Andrei Vagin To: linux-kernel@vger.kernel.org Cc: Andrei Vagin , Andy Lutomirski , Christian Brauner , Dietmar Eggemann , Kees Cook , Ingo Molnar , Juri Lelli , Peter Oskolkov , Peter Zijlstra , Tycho Andersen , Will Drewry , Vincent Guittot Subject: [PATCH 1/4] seccomp: don't use semaphore and wait_queue together Date: Mon, 29 Aug 2022 18:43:53 -0700 Message-Id: <20220830014356.5364-2-avagin@gmail.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220830014356.5364-1-avagin@gmail.com> References: <20220830014356.5364-1-avagin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Here is no reason to use two different primitives that do similar things. Signed-off-by: Andrei Vagin --- kernel/seccomp.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index e9852d1b4a5e..667fd2d89464 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -145,7 +145,7 @@ struct seccomp_kaddfd { * @notifications: A list of struct seccomp_knotif elements. */ struct notification { - struct semaphore request; + atomic_t requests; u64 next_id; struct list_head notifications; }; @@ -1116,7 +1116,7 @@ static int seccomp_do_user_notification(int this_sysc= all, list_add_tail(&n.list, &match->notif->notifications); INIT_LIST_HEAD(&n.addfd); =20 - up(&match->notif->request); + atomic_add(1, &match->notif->requests); wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); =20 /* @@ -1388,8 +1388,10 @@ static long seccomp_set_mode_strict(void) #ifdef CONFIG_SECCOMP_FILTER static void seccomp_notify_free(struct seccomp_filter *filter) { - kfree(filter->notif); - filter->notif =3D NULL; + struct notification *notif =3D filter->notif; + + WRITE_ONCE(filter->notif, NULL); + kfree_rcu(notif); } =20 static void seccomp_notify_detach(struct seccomp_filter *filter) @@ -1450,6 +1452,16 @@ find_notification(struct seccomp_filter *filter, u64= id) return NULL; } =20 +static bool notify_wakeup(struct seccomp_filter *filter) +{ + bool ret; + + rcu_read_lock(); + ret =3D atomic_add_unless(&filter->notif->requests, -1, 0); + rcu_read_unlock(); + + return ret; +} =20 static long seccomp_notify_recv(struct seccomp_filter *filter, void __user *buf) @@ -1467,7 +1479,7 @@ static long seccomp_notify_recv(struct seccomp_filter= *filter, =20 memset(&unotif, 0, sizeof(unotif)); =20 - ret =3D down_interruptible(&filter->notif->request); + ret =3D wait_event_interruptible(filter->wqh, notify_wakeup(filter)); if (ret < 0) return ret; =20 @@ -1515,7 +1527,8 @@ static long seccomp_notify_recv(struct seccomp_filter= *filter, if (should_sleep_killable(filter, knotif)) complete(&knotif->ready); knotif->state =3D SECCOMP_NOTIFY_INIT; - up(&filter->notif->request); + atomic_add(1, &filter->notif->requests); + wake_up_poll(&filter->wqh, EPOLLIN | EPOLLRDNORM); } mutex_unlock(&filter->notify_lock); } @@ -1771,15 +1784,15 @@ static const struct file_operations seccomp_notify_= ops =3D { static struct file *init_listener(struct seccomp_filter *filter) { struct file *ret; + struct notification *notif; =20 ret =3D ERR_PTR(-ENOMEM); - filter->notif =3D kzalloc(sizeof(*(filter->notif)), GFP_KERNEL); - if (!filter->notif) + notif =3D kzalloc(sizeof(*notif), GFP_KERNEL); + if (!notif) goto out; =20 - sema_init(&filter->notif->request, 0); - filter->notif->next_id =3D get_random_u64(); - INIT_LIST_HEAD(&filter->notif->notifications); + notif->next_id =3D get_random_u64(); + INIT_LIST_HEAD(¬if->notifications); =20 ret =3D anon_inode_getfile("seccomp notify", &seccomp_notify_ops, filter, O_RDWR); @@ -1788,10 +1801,11 @@ static struct file *init_listener(struct seccomp_fi= lter *filter) =20 /* The file has a reference to it now */ __get_seccomp_filter(filter); + WRITE_ONCE(filter->notif, notif); =20 out_notif: if (IS_ERR(ret)) - seccomp_notify_free(filter); + kfree(notif); out: return ret; } --=20 2.37.2 From nobody Tue Apr 7 07:05:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E7FBEECAAD2 for ; Tue, 30 Aug 2022 01:44:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229852AbiH3Bog (ORCPT ); Mon, 29 Aug 2022 21:44:36 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48604 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229602AbiH3BoI (ORCPT ); Mon, 29 Aug 2022 21:44:08 -0400 Received: from mail-pf1-x431.google.com (mail-pf1-x431.google.com [IPv6:2607:f8b0:4864:20::431]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id F0F7F7C305 for ; Mon, 29 Aug 2022 18:44:07 -0700 (PDT) Received: by mail-pf1-x431.google.com with SMTP id 145so9549500pfw.4 for ; Mon, 29 Aug 2022 18:44:07 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc; bh=G+5+t9wS3j8loUJEHuF9oZ643Kgj2nRnoYQ+cdIzE4Q=; b=p8rDSiod8Q6m2f7lk+vGYHRbMdp8G+R142a05Y91cEYOFDAIz4HqPR/p3gogUm0Ylz 5l58AwitYF7ClY7/r1uh90AHZqPulavtZLGuBk1pX+s1yUacjmsdsKZW9neSzre0hvZu Uqsfq7HgJLDLvNwr/UmZiExWBFqy8ePwhgbx3JfoKZJD7K6x9c+27IKwVKo7mLHeE7wG GQFB0gz0ZkrPU2pWnPg3Hut15UYaAuEwNO8/Xg08tn3wqAdJOGwrmqc0DyJLQZfr2pah +8nFLLbrjKfx/EsQNSPRF64j6afoBibu8dS6S/XrL3fnl1dAICEfbtwxZwqkWGxhZJJO QOBA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc; bh=G+5+t9wS3j8loUJEHuF9oZ643Kgj2nRnoYQ+cdIzE4Q=; b=J0EvrtehfgzIJNEBxlPl8fMoiou5m06tvza4cSp8RVLoVI17di5TgBRbno9iaI2CtU 4/VHcX3eNZX4OAghZbcctmpCa7k/g1VeAW+haE9zw6P3gWCaylqEIZtB6G1zesz/Fm2o uF5GFxn8cmc9eav3B2THxkdjgUlyos9r+0jvc7dzAcKm79tHPao/3gS+4RyiyMi1L0eM QwuZmPkkS7uPw8EFVO3qfoBwHo1CjEgmY1t5R8erLuJsx6EqB4NG7BImkAmWBKW/eGh7 J/2/xA8trnLj1q14oEXFFE0ZLfbo+1lF1nVzDoZMr2jPwx9oYogK4NvsPb318rGqo/qd xBOA== X-Gm-Message-State: ACgBeo15P2t2EAkEp6DZbbzPk3VOb3oYCJCuLf1s3Y8sI6DyZhKM1fTZ N3bcwPNO3fU9adWVvKvZAgDCOlKFHcQ= X-Google-Smtp-Source: AA6agR5h5y0JHFyZVNXdoeR4s3tDd2r5LXjhoA3xugPlUhw3wrPSh1KuENIbd4Gu1ZSwl/Dg+K4pyA== X-Received: by 2002:a65:494b:0:b0:428:d68c:35bf with SMTP id q11-20020a65494b000000b00428d68c35bfmr15539520pgs.509.1661823846843; Mon, 29 Aug 2022 18:44:06 -0700 (PDT) Received: from laptop.hsd1.wa.comcast.net ([2601:600:8500:5f14:d627:c51e:516e:a105]) by smtp.gmail.com with ESMTPSA id v14-20020aa799ce000000b00535faa9d6f2sm7976038pfi.53.2022.08.29.18.44.05 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 29 Aug 2022 18:44:06 -0700 (PDT) From: Andrei Vagin To: linux-kernel@vger.kernel.org Cc: Andrei Vagin , Andy Lutomirski , Christian Brauner , Dietmar Eggemann , Kees Cook , Ingo Molnar , Juri Lelli , Peter Oskolkov , Peter Zijlstra , Tycho Andersen , Will Drewry , Vincent Guittot Subject: [PATCH 2/4] sched: add WF_CURRENT_CPU and externise ttwu Date: Mon, 29 Aug 2022 18:43:54 -0700 Message-Id: <20220830014356.5364-3-avagin@gmail.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220830014356.5364-1-avagin@gmail.com> References: <20220830014356.5364-1-avagin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" From: Peter Oskolkov Add WF_CURRENT_CPU wake flag that advices the scheduler to move the wakee to the current CPU. This is useful for fast on-CPU context switching use cases such as UMCG. In addition, make ttwu external rather than static so that the flag could be passed to it from outside of sched/core.c. Signed-off-by: Peter Oskolkov Signed-off-by: Andrei Vagin --- kernel/sched/core.c | 3 +-- kernel/sched/fair.c | 4 ++++ kernel/sched/sched.h | 13 ++++++++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ee28253c9ac0..008be12c31e6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4045,8 +4045,7 @@ bool ttwu_state_match(struct task_struct *p, unsigned= int state, int *success) * Return: %true if @p->state changes (an actual wakeup was done), * %false otherwise. */ -static int -try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) +int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_fla= gs) { unsigned long flags; int cpu, success =3D 0; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 914096c5b1ae..7b043870b634 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7028,6 +7028,10 @@ select_task_rq_fair(struct task_struct *p, int prev_= cpu, int wake_flags) if (wake_flags & WF_TTWU) { record_wakee(p); =20 + if ((wake_flags & WF_CURRENT_CPU) && + cpumask_test_cpu(cpu, p->cpus_ptr)) + return cpu; + if (sched_energy_enabled()) { new_cpu =3D find_energy_efficient_cpu(p, prev_cpu); if (new_cpu >=3D 0) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e26688d387ae..6e93e8808bfd 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2080,12 +2080,13 @@ static inline int task_on_rq_migrating(struct task_= struct *p) } =20 /* Wake flags. The first three directly map to some SD flag value */ -#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */ -#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */ -#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */ +#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC= */ +#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK= */ +#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE= */ =20 -#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ -#define WF_MIGRATED 0x20 /* Internal use, task got migrated */ +#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ +#define WF_MIGRATED 0x20 /* Internal use, task got migrated */ +#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CP= U. */ =20 #ifdef CONFIG_SMP static_assert(WF_EXEC =3D=3D SD_BALANCE_EXEC); @@ -3151,6 +3152,8 @@ static inline bool is_per_cpu_kthread(struct task_str= uct *p) extern void swake_up_all_locked(struct swait_queue_head *q); extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_qu= eue *wait); =20 +extern int try_to_wake_up(struct task_struct *tsk, unsigned int state, int= wake_flags); + #ifdef CONFIG_PREEMPT_DYNAMIC extern int preempt_dynamic_mode; extern int sched_dynamic_mode(const char *str); --=20 2.37.2 From nobody Tue Apr 7 07:05:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 18C65ECAAD3 for ; Tue, 30 Aug 2022 01:44:27 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229840AbiH3BoT (ORCPT ); Mon, 29 Aug 2022 21:44:19 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48610 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229689AbiH3BoK (ORCPT ); Mon, 29 Aug 2022 21:44:10 -0400 Received: from mail-pg1-x532.google.com (mail-pg1-x532.google.com [IPv6:2607:f8b0:4864:20::532]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 70B257C1C2 for ; Mon, 29 Aug 2022 18:44:09 -0700 (PDT) Received: by mail-pg1-x532.google.com with SMTP id c24so9301314pgg.11 for ; Mon, 29 Aug 2022 18:44:09 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc; bh=BKqwlh2LYsyA0xlbuRIO9SBcDmgUfBuyVHnS5ZwNoOg=; b=Fv+/cLbwo1iO+WAVeCuZ1vvS2QJyW7JL6cHtZ1jdcabtqKyvDPTpIefGiVZ5yw5V5t aRbk+afsASV4S+b7UfR4lWcFtOAz33T8lREB0hH9Eaoufs4fGHmigLHl4dZ6C46QdByz KeMJPPi1tnK+mL2cN8TfcY5ErIS+WgVBhifznkUvVqlsoZlg7aXTGsT3/PjRXSen3TKB eV+zNnZFqa+0WmDOxv4sPstNKAlKb4Ok79IoDL8OEdcTDdfapLrMYEHGT88f5SX+TCBE /vS58uGQwYD2eZfXDSkqc4eiV5+W4nkUo/jQC5Ru9Dat/Kscu/19lfxB9vr4s8ilbtUq 93Fg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc; bh=BKqwlh2LYsyA0xlbuRIO9SBcDmgUfBuyVHnS5ZwNoOg=; b=WBfnWetj/dKa1kiveXda+AEFX6C28r62WyOHLdjZFls640vOsMpLQ95PsZlhdDDYEG ukRhyymdLxjDCY+s4n97stqjUv+TiUBSf4WdIohNcZkIICE/d3Wdam2fdIzEe2+ve+U8 pCAl1O1BzWMC/6P+oSkilaoK1iTsMVucJ7Mu3Gav11BAv3lYQMfaf7kPtowrXbr6PBPa t1oHVYzWcVEgQ/ilWy6/wG099zabPY4WlzVQQmWU33Wc+W9IR/gG2TVW5DXQ0nwW98HN 4kM0mrQSqlnU412nDlNymLFzo5cZGCAeHLQjw8FwcZiHPNUUi0mS+incIafxl+un/RvK lzxQ== X-Gm-Message-State: ACgBeo3wNh34tr3zqXSfie20KzUaBaN+6ns1j97A3yvbZOQP3Ptz6yZ0 4Lc8xJMhQVutgUj4CVf3CDTe4sYf/HI= X-Google-Smtp-Source: AA6agR7P4i0pBPdvGm0aHAx9/RlRD0/FDRYQzHti2KWDUbjF3jvIiPWmddCZYi+AkdvVVqj8eSeC0g== X-Received: by 2002:a63:5418:0:b0:42a:d773:cbd6 with SMTP id i24-20020a635418000000b0042ad773cbd6mr16214306pgb.106.1661823848381; Mon, 29 Aug 2022 18:44:08 -0700 (PDT) Received: from laptop.hsd1.wa.comcast.net ([2601:600:8500:5f14:d627:c51e:516e:a105]) by smtp.gmail.com with ESMTPSA id v14-20020aa799ce000000b00535faa9d6f2sm7976038pfi.53.2022.08.29.18.44.07 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 29 Aug 2022 18:44:07 -0700 (PDT) From: Andrei Vagin To: linux-kernel@vger.kernel.org Cc: Andrei Vagin , Andy Lutomirski , Christian Brauner , Dietmar Eggemann , Kees Cook , Ingo Molnar , Juri Lelli , Peter Oskolkov , Peter Zijlstra , Tycho Andersen , Will Drewry , Vincent Guittot Subject: [PATCH 3/4] sched: add a few helpers to wake up tasks on the current cpu Date: Mon, 29 Aug 2022 18:43:55 -0700 Message-Id: <20220830014356.5364-4-avagin@gmail.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220830014356.5364-1-avagin@gmail.com> References: <20220830014356.5364-1-avagin@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Add complete_on_current_cpu, wake_up_poll_on_current_cpu helpers to wake up processes on the current CPU. Signed-off-by: Andrei Vagin --- include/linux/completion.h | 1 + include/linux/swait.h | 1 + include/linux/wait.h | 3 +++ kernel/sched/completion.c | 12 ++++++++++++ kernel/sched/core.c | 2 +- kernel/sched/swait.c | 11 +++++++++++ kernel/sched/wait.c | 5 +++++ 7 files changed, 34 insertions(+), 1 deletion(-) diff --git a/include/linux/completion.h b/include/linux/completion.h index 51d9ab079629..1699e697a225 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -115,6 +115,7 @@ extern bool try_wait_for_completion(struct completion *= x); extern bool completion_done(struct completion *x); =20 extern void complete(struct completion *); +extern void complete_on_current_cpu(struct completion *x); extern void complete_all(struct completion *); =20 #endif diff --git a/include/linux/swait.h b/include/linux/swait.h index 6a8c22b8c2a5..1f27b254adf5 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -147,6 +147,7 @@ static inline bool swq_has_sleeper(struct swait_queue_h= ead *wq) extern void swake_up_one(struct swait_queue_head *q); extern void swake_up_all(struct swait_queue_head *q); extern void swake_up_locked(struct swait_queue_head *q); +extern void swake_up_locked_on_current_cpu(struct swait_queue_head *q); =20 extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct = swait_queue *wait, int state); extern long prepare_to_swait_event(struct swait_queue_head *q, struct swai= t_queue *wait, int state); diff --git a/include/linux/wait.h b/include/linux/wait.h index 58cfbf81447c..dcd01dd4de3e 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -210,6 +210,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, st= ruct wait_queue_entry *wq } =20 void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr,= void *key); +void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned in= t mode, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mo= de, void *key); void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, unsigned int mode, void *key, wait_queue_entry_t *bookmark); @@ -237,6 +238,8 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head= ); #define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m)) #define wake_up_poll(x, m) \ __wake_up(x, TASK_NORMAL, 1, poll_to_key(m)) +#define wake_up_poll_on_current_cpu(x, m) \ + __wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m)) #define wake_up_locked_poll(x, m) \ __wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m)) #define wake_up_interruptible_poll(x, m) \ diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 35f15c26ed54..1ae9b34822ef 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -38,6 +38,18 @@ void complete(struct completion *x) } EXPORT_SYMBOL(complete); =20 +void complete_on_current_cpu(struct completion *x) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&x->wait.lock, flags); + + if (x->done !=3D UINT_MAX) + x->done++; + swake_up_locked_on_current_cpu(&x->wait); + raw_spin_unlock_irqrestore(&x->wait.lock, flags); +} + /** * complete_all: - signals all threads waiting on this completion * @x: holds the state of this particular completion diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 008be12c31e6..1e164d8fde1a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6813,7 +6813,7 @@ asmlinkage __visible void __sched preempt_schedule_ir= q(void) int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wak= e_flags, void *key) { - WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC); + WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC|WF_= CURRENT_CPU)); return try_to_wake_up(curr->private, mode, wake_flags); } EXPORT_SYMBOL(default_wake_function); diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index 76b9b796e695..9ebe23868942 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c @@ -31,6 +31,17 @@ void swake_up_locked(struct swait_queue_head *q) } EXPORT_SYMBOL(swake_up_locked); =20 +void swake_up_locked_on_current_cpu(struct swait_queue_head *q) +{ + struct swait_queue *curr; + + if (list_empty(&q->task_list)) + return; + + curr =3D list_first_entry(&q->task_list, typeof(*curr), task_list); + try_to_wake_up(curr->task, TASK_NORMAL, WF_CURRENT_CPU); + list_del_init(&curr->task_list); +} /* * Wake up all waiters. This is an interface which is solely exposed for * completions and not for general usage. diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 9860bb9a847c..9a78bca79419 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -157,6 +157,11 @@ void __wake_up(struct wait_queue_head *wq_head, unsign= ed int mode, } EXPORT_SYMBOL(__wake_up); =20 +void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned in= t mode, void *key) +{ + __wake_up_common_lock(wq_head, mode, 1, WF_CURRENT_CPU, key); +} + /* * Same as __wake_up but called with the spinlock in wait_queue_head_t hel= d. */ --=20 2.37.2 From nobody Tue Apr 7 07:05:54 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A1458ECAAD2 for ; Tue, 30 Aug 2022 01:44:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229785AbiH3BoP (ORCPT ); Mon, 29 Aug 2022 21:44:15 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48616 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229550AbiH3BoM (ORCPT ); Mon, 29 Aug 2022 21:44:12 -0400 Received: from mail-pl1-x631.google.com (mail-pl1-x631.google.com [IPv6:2607:f8b0:4864:20::631]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 76CD47C305 for ; Mon, 29 Aug 2022 18:44:11 -0700 (PDT) Received: by mail-pl1-x631.google.com with SMTP id f12so9505914plb.11 for ; Mon, 29 Aug 2022 18:44:11 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc; bh=kOzfBRq5yYox21NuVDXabEwbFujCa1I2u3nF6XVE7w8=; b=pALGE31EpvVdTowSCgZZehs0ki4m1NmuCM1sr1es6ml8MaL/52724cy15yzQP5whVd 68iSA+kCAF7QSWQ8LXV+4CzxQqrURBMQenvhYTGAM/j50cvmQNQ95O8FsnDsCJQWUUZS 7P2P8i2loOMvXPBVzMF74+5w+460YR5fCMLiv0XbwLl3HCPQH0Chk3kpoEcFDYiCgZx0 cza7/UTXI3BO++IJY5iQ23tjCRotz1pY96OVtp+OjRZah694J+9f+oGtKCZNz1YOQlJW kd/vsmHxHbKU1nyf+tHWvvmDX7xPvEqhbDPGafupJVsrUAvmMQvBUkTDo3mqPE9OXOyE kzyA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc; bh=kOzfBRq5yYox21NuVDXabEwbFujCa1I2u3nF6XVE7w8=; b=T2B/r9Zs5go9iSD2qEyCRHfExXRBdSZ8a6inyVgKYIMaUwcoZsbfSmS+FdbWFAoiEX V6a5yiAlwXE7pmZO4QWrodKnMhIJzajx7Rjj6D2tqHHA1zRBtqJ1JKbSEWXp/rUjk4zX /zJWXiH8WdoSxOM5hcsWHCvepVN67BSoFxS0+cBjVl76nazt9YCR9bOEsIyGtDdU08f+ XRz1/Ex2eyWuppqclsyAsJ1jFhr9PVvv6FI+JlifR19vuc2di5tUCDJKUnB4xTE8T7Fq HONz5cfCNsJqnbi1Xj8Ko4r+aQzCr0BO8WT4kzDnZGHZUjFuAYA+Pd7CLpUCyklnS82K +mPQ== X-Gm-Message-State: ACgBeo22Qz3RoHZkhbDVC2b4JH6UliItxQ9BegEMRVh2+CfqwmyDOxIL l6eDiT5eH1RjWNh/uOrwQOPDkCEte58= X-Google-Smtp-Source: AA6agR7QiP0eFqOebRee1dD781w80C77xiKWy9iQB4xkY+dihUOKld3Mgvzh8S9FY+JhrkhghWbuVg== X-Received: by 2002:a17:90a:cb14:b0:1fd:c964:f708 with SMTP id z20-20020a17090acb1400b001fdc964f708mr8311210pjt.62.1661823849855; Mon, 29 Aug 2022 18:44:09 -0700 (PDT) Received: from laptop.hsd1.wa.comcast.net ([2601:600:8500:5f14:d627:c51e:516e:a105]) by smtp.gmail.com with ESMTPSA id v14-20020aa799ce000000b00535faa9d6f2sm7976038pfi.53.2022.08.29.18.44.08 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Mon, 29 Aug 2022 18:44:09 -0700 (PDT) From: Andrei Vagin To: linux-kernel@vger.kernel.org Cc: Andrei Vagin , Andy Lutomirski , Christian Brauner , Dietmar Eggemann , Kees Cook , Ingo Molnar , Juri Lelli , Peter Oskolkov , Peter Zijlstra , Tycho Andersen , Will Drewry , Vincent Guittot Subject: [PATCH 4/4] seccomp: add the synchronous mode for seccomp_unotify Date: Mon, 29 Aug 2022 18:43:56 -0700 Message-Id: <20220830014356.5364-5-avagin@gmail.com> X-Mailer: git-send-email 2.37.2 In-Reply-To: <20220830014356.5364-1-avagin@gmail.com> References: <20220830014356.5364-1-avagin@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org seccomp_unotify allows more privileged processes does actions on behalf of less privileged processes. In many cases, the workflow is fully synchronous. It means a target process triggers a system call and passes controls to a supervisor process that handles the system call and returns controls to the target process. In this context, "synchronous" means that only one process is running and another one is waiting. There is the WF_CURRENT_CPU flag that is used to advise the scheduler to move the wakee to the current CPU. For such synchronous workflows, it makes context switches a few times faster. Right now, each interaction takes 12=C2=B5s. With this patch, it takes about 3=C2=B5s. This change introduce the SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP flag that it used to enable the sync mode. Signed-off-by: Andrei Vagin --- include/uapi/linux/seccomp.h | 4 ++++ kernel/seccomp.c | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 0fdc6ef02b94..dbfc9b37fcae 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -115,6 +115,8 @@ struct seccomp_notif_resp { __u32 flags; }; =20 +#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) + /* valid flags for seccomp_notif_addfd */ #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ #define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomic= ally */ @@ -150,4 +152,6 @@ struct seccomp_notif_addfd { #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ struct seccomp_notif_addfd) =20 +#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) + #endif /* _UAPI_LINUX_SECCOMP_H */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 667fd2d89464..c24900eb8ced 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -143,11 +143,14 @@ struct seccomp_kaddfd { * filter->notify_lock. * @next_id: The id of the next request. * @notifications: A list of struct seccomp_knotif elements. + * @flags: A set of SECCOMP_USER_NOTIF_FD_* flags. */ + struct notification { atomic_t requests; u64 next_id; struct list_head notifications; + int flags; }; =20 #ifdef SECCOMP_ARCH_NATIVE @@ -1117,7 +1120,10 @@ static int seccomp_do_user_notification(int this_sys= call, INIT_LIST_HEAD(&n.addfd); =20 atomic_add(1, &match->notif->requests); - wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); + if (match->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP) + wake_up_poll_on_current_cpu(&match->wqh, EPOLLIN | EPOLLRDNORM); + else + wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); =20 /* * This is where we wait for a reply from userspace. @@ -1574,7 +1580,10 @@ static long seccomp_notify_send(struct seccomp_filte= r *filter, knotif->error =3D resp.error; knotif->val =3D resp.val; knotif->flags =3D resp.flags; - complete(&knotif->ready); + if (filter->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP) + complete_on_current_cpu(&knotif->ready); + else + complete(&knotif->ready); out: mutex_unlock(&filter->notify_lock); return ret; @@ -1604,6 +1613,26 @@ static long seccomp_notify_id_valid(struct seccomp_f= ilter *filter, return ret; } =20 +static long seccomp_notify_set_flags(struct seccomp_filter *filter, + void __user *buf) +{ + u64 flags; + long ret; + + if (copy_from_user(&flags, buf, sizeof(flags))) + return -EFAULT; + + if (flags & ~SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP) + return -EINVAL; + + ret =3D mutex_lock_interruptible(&filter->notify_lock); + if (ret < 0) + return ret; + filter->notif->flags =3D flags; + mutex_unlock(&filter->notify_lock); + return ret; +} + static long seccomp_notify_addfd(struct seccomp_filter *filter, struct seccomp_notif_addfd __user *uaddfd, unsigned int size) @@ -1733,6 +1762,8 @@ static long seccomp_notify_ioctl(struct file *file, u= nsigned int cmd, case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR: case SECCOMP_IOCTL_NOTIF_ID_VALID: return seccomp_notify_id_valid(filter, buf); + case SECCOMP_IOCTL_NOTIF_SET_FLAGS: + return seccomp_notify_set_flags(filter, buf); } =20 /* Extensible Argument ioctls */ --=20 2.37.2