From nobody Mon Sep 15 18:06:08 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 50390C54EBE for ; Tue, 10 Jan 2023 21:31:48 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234519AbjAJVb3 (ORCPT ); Tue, 10 Jan 2023 16:31:29 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:40804 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234768AbjAJVaj (ORCPT ); Tue, 10 Jan 2023 16:30:39 -0500 Received: from mail-pj1-x1049.google.com (mail-pj1-x1049.google.com [IPv6:2607:f8b0:4864:20::1049]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id EC8D163D3C for ; Tue, 10 Jan 2023 13:30:19 -0800 (PST) Received: by mail-pj1-x1049.google.com with SMTP id y2-20020a17090a784200b00225c0839b80so5313179pjl.5 for ; Tue, 10 Jan 2023 13:30:19 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=v9qYSQFUkDmaIwPSOBMoDxk/OX6P7cFe8UAy56cpW5Q=; b=PdzJiw/LwFz0AEVC5fhfbbgcYl/W+56IRf1pubwDGgzlCHDfwj/JoqCetiyqFWUHtd Ro4/Tyzn2sAiVpnrLQeNXuLbJik3qrlTVkVr+61viJFbLB2hxAl0dfuCcmcRWcs8sNPi nZ128TjngolTkzPUMcSEsbCEqXTAhqCLylANSfZxiTeKfAN0MBPeLOvYtf9OCBLhX/La 0pyaT8EC0s6DRvN1Hon3jy/IXXNp8rE24EsVxjPzzh44WjK8UlzTL0qbNK6leU5TaXfV LilatZ38mgnFPkhtDv+otL08/VlBlq7uoZS3/aWeeSm+Pi3NKKW3sg4tPQbWuyDGDWkg mNcg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=v9qYSQFUkDmaIwPSOBMoDxk/OX6P7cFe8UAy56cpW5Q=; b=KhedbA/Vy69WGzk2ZpUx/Gr38HiP8iTClvg6QDV6fsYcjw+WdXZZ4XhrhZTDqfr021 FG6yHItX+L8ngCnW7QwlGuYQVYqVoOMZyIfFagDa4Xc6Y4Dl/QzbA0/FNWrvcNcnmJrC rZGpX/8IsC07i2t1Hs1vbr6EV+IONeG/x0kY6r18RWjAefxtPZw7DTt3JZmKsaysXBD8 t/SEb5wgEmQsAZZOUX7mQ4snlYt3Dw9t2SSv3UoQps9wCJYEJCU0Zrr+CyPVQvWMYDEz /2zmlv5nHNNXa3kAIj9aLi31wfilLNbFGUVFCIDQ8T1rnzokLH21YITRdJoL46Y7ZQ89 +Zcw== X-Gm-Message-State: AFqh2krVrwSdV7jkFUoNE9azBBLQNY22PD3QT+TpuSOPlhDdFXETXbZ4 Pq2ZNCf/ibdgNFhp3sBlB/+cXIh9dx8= X-Google-Smtp-Source: AMrXdXv0UO1DLWnD3xEnF2pGZeOJmyREmFwlqto5nxU5NFNojg5kSz8G4jjYFFz+bC3JK2T8Q2z5SUFV7G0= X-Received: from avagin.kir.corp.google.com ([2620:0:1008:11:6203:13b5:2d85:b75c]) (user=avagin job=sendgmr) by 2002:a62:e715:0:b0:58a:ebaa:9586 with SMTP id s21-20020a62e715000000b0058aebaa9586mr303794pfh.36.1673386219357; Tue, 10 Jan 2023 13:30:19 -0800 (PST) Date: Tue, 10 Jan 2023 13:30:06 -0800 In-Reply-To: <20230110213010.2683185-1-avagin@google.com> Mime-Version: 1.0 References: <20230110213010.2683185-1-avagin@google.com> X-Mailer: git-send-email 2.39.0.314.g84b9a713c41-goog Message-ID: <20230110213010.2683185-2-avagin@google.com> Subject: [PATCH 1/5] seccomp: don't use semaphore and wait_queue together From: Andrei Vagin To: Peter Zijlstra , Ingo Molnar , Vincent Guittot , Dietmar Eggemann Cc: linux-kernel@vger.kernel.org, Kees Cook , Christian Brauner , Andrei Vagin , Andy Lutomirski , Juri Lelli , Peter Oskolkov , Tycho Andersen , Will Drewry Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Andrei Vagin The main reason is to use new wake_up helpers that will be added in the following patches. But here are a few other reasons: * if we use two different ways, we always need to call them both. This patch fixes seccomp_notify_recv where we forgot to call wake_up_poll in the error path. * If we use one primitive, we can control how many waiters are woken up for each request. Our goal is to wake up just one that will handle a request. Right now, wake_up_poll can wake up one waiter and up(&match->notif->request) can wake up one more. Signed-off-by: Andrei Vagin --- kernel/seccomp.c | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index e9852d1b4a5e..876022e9c88c 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -145,7 +145,7 @@ struct seccomp_kaddfd { * @notifications: A list of struct seccomp_knotif elements. */ struct notification { - struct semaphore request; + atomic_t requests; u64 next_id; struct list_head notifications; }; @@ -1116,7 +1116,7 @@ static int seccomp_do_user_notification(int this_sysc= all, list_add_tail(&n.list, &match->notif->notifications); INIT_LIST_HEAD(&n.addfd); =20 - up(&match->notif->request); + atomic_add(1, &match->notif->requests); wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); =20 /* @@ -1450,6 +1450,37 @@ find_notification(struct seccomp_filter *filter, u64= id) return NULL; } =20 +static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode,= int sync, + void *key) +{ + /* Avoid a wakeup if event not interesting for us. */ + if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR))) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} + +static int recv_wait_event(struct seccomp_filter *filter) +{ + DEFINE_WAIT_FUNC(wait, recv_wake_function); + int ret; + + if (atomic_add_unless(&filter->notif->requests, -1, 0) !=3D 0) + return 0; + + for (;;) { + ret =3D prepare_to_wait_event(&filter->wqh, &wait, TASK_INTERRUPTIBLE); + + if (atomic_add_unless(&filter->notif->requests, -1, 0) !=3D 0) + break; + + if (ret) + return ret; + + schedule(); + } + finish_wait(&filter->wqh, &wait); + return 0; +} =20 static long seccomp_notify_recv(struct seccomp_filter *filter, void __user *buf) @@ -1467,7 +1498,7 @@ static long seccomp_notify_recv(struct seccomp_filter= *filter, =20 memset(&unotif, 0, sizeof(unotif)); =20 - ret =3D down_interruptible(&filter->notif->request); + ret =3D recv_wait_event(filter); if (ret < 0) return ret; =20 @@ -1515,7 +1546,8 @@ static long seccomp_notify_recv(struct seccomp_filter= *filter, if (should_sleep_killable(filter, knotif)) complete(&knotif->ready); knotif->state =3D SECCOMP_NOTIFY_INIT; - up(&filter->notif->request); + atomic_add(1, &filter->notif->requests); + wake_up_poll(&filter->wqh, EPOLLIN | EPOLLRDNORM); } mutex_unlock(&filter->notify_lock); } @@ -1777,7 +1809,6 @@ static struct file *init_listener(struct seccomp_filt= er *filter) if (!filter->notif) goto out; =20 - sema_init(&filter->notif->request, 0); filter->notif->next_id =3D get_random_u64(); INIT_LIST_HEAD(&filter->notif->notifications); =20 --=20 2.39.0.314.g84b9a713c41-goog From nobody Mon Sep 15 18:06:08 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 38F6DC54EBC for ; Tue, 10 Jan 2023 21:32:20 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232715AbjAJVbv (ORCPT ); Tue, 10 Jan 2023 16:31:51 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:40562 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234862AbjAJVbC (ORCPT ); Tue, 10 Jan 2023 16:31:02 -0500 Received: from mail-pj1-x104a.google.com (mail-pj1-x104a.google.com [IPv6:2607:f8b0:4864:20::104a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C36C36534B for ; Tue, 10 Jan 2023 13:30:21 -0800 (PST) Received: by mail-pj1-x104a.google.com with SMTP id r5-20020a17090a1bc500b00227067dde1eso3614448pjr.0 for ; Tue, 10 Jan 2023 13:30:21 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=er80A9Er8wBpxR2BIE0p2D2/xkiobfzzZD4tbBwSUuc=; b=k7kio1FL9Eg0JJeN23HIDqJzpDd/aXo24nWXA8QtE26rzS8NIcltXZhsRIlZMJ/v4y FFL90KgnLTpkw1lLYymV6ADXdp5aTBQFGoW/wwuZWYtoaI0yAtVlEWUoLZyRcnsjdNBi JT3BNX2YSuH9BnOBoE/oeMVzlB/ShvfW9zWuGCfcHHZJGFMQ+IHSYZwpZbmHCUMzb27R ej317WRq8YkKUX6P8M+rM3r5REU7oFpoNuUkoGCut433ssPdvnJ8ILCQi9Ks2QR+GZWj yYdrXk1tL6llthn89LX846SNzgDrI4Z2ToHsY9Vr3IhA/1ymq2oE6tNHQhaHGzgxsXQ2 M2Xg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=er80A9Er8wBpxR2BIE0p2D2/xkiobfzzZD4tbBwSUuc=; b=TTaZS+K84JXkm1xMvI2M+1JC4EBVlVuoXE6L1xjBU0GaSoRiBi1G+xGXO9AvLIQGgZ I8qABCMIf1PkAP0u0QFm0WNqdPmDeEzIzE9ApS9RJ82capkb+7Kf+UUOPmOuJweD6HKD 4rTaOwTMI8qNd+3QOT8WWJrx6g/c+ec2H4gnn73A7dI3g/ocU3AHLkfORNHtBa/lH7ka FwDKwZvjqBFnIUDGeykI46S+ug9jdw5LxdHoFRmM5AmFzsaXMMUAvpN72ziv669CSMYK 7bpquvM6YpfNv6Ug8OUcx3qJhg9PyygK6ddyQlR5cotd84t6Pnvwso7XxjAH3a/7C4Ay br5g== X-Gm-Message-State: AFqh2koKtqts7scJmxzwNMfoXWTYmm7qizf8lDDNWFcSr91JQcoLJi8S HXxTHEDLDBfwlajjXF0U3BeH6IMVh6Y= X-Google-Smtp-Source: AMrXdXtj0wrADAOgf+zF9QujslNiLfEm/Juh4EPsEFD/mE7R9rd6zhbudUg4fgg8yDp8PqaGX8+6nFDBnOY= X-Received: from avagin.kir.corp.google.com ([2620:0:1008:11:6203:13b5:2d85:b75c]) (user=avagin job=sendgmr) by 2002:a62:3683:0:b0:578:47f4:e0ec with SMTP id d125-20020a623683000000b0057847f4e0ecmr5429470pfa.60.1673386221162; Tue, 10 Jan 2023 13:30:21 -0800 (PST) Date: Tue, 10 Jan 2023 13:30:07 -0800 In-Reply-To: <20230110213010.2683185-1-avagin@google.com> Mime-Version: 1.0 References: <20230110213010.2683185-1-avagin@google.com> X-Mailer: git-send-email 2.39.0.314.g84b9a713c41-goog Message-ID: <20230110213010.2683185-3-avagin@google.com> Subject: [PATCH 2/5] sched: add WF_CURRENT_CPU and externise ttwu From: Andrei Vagin To: Peter Zijlstra , Ingo Molnar , Vincent Guittot , Dietmar Eggemann Cc: linux-kernel@vger.kernel.org, Kees Cook , Christian Brauner , Andrei Vagin , Andy Lutomirski , Juri Lelli , Peter Oskolkov , Tycho Andersen , Will Drewry Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Peter Oskolkov Add WF_CURRENT_CPU wake flag that advices the scheduler to move the wakee to the current CPU. This is useful for fast on-CPU context switching use cases. In addition, make ttwu external rather than static so that the flag could be passed to it from outside of sched/core.c. Signed-off-by: Peter Oskolkov Signed-off-by: Andrei Vagin --- kernel/sched/core.c | 3 +-- kernel/sched/fair.c | 4 ++++ kernel/sched/sched.h | 13 ++++++++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 25b582b6ee5f..6478e819eb99 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4063,8 +4063,7 @@ bool ttwu_state_match(struct task_struct *p, unsigned= int state, int *success) * Return: %true if @p->state changes (an actual wakeup was done), * %false otherwise. */ -static int -try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) +int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_fla= gs) { unsigned long flags; int cpu, success =3D 0; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c36aa54ae071..d6f76bead3c5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7380,6 +7380,10 @@ select_task_rq_fair(struct task_struct *p, int prev_= cpu, int wake_flags) if (wake_flags & WF_TTWU) { record_wakee(p); =20 + if ((wake_flags & WF_CURRENT_CPU) && + cpumask_test_cpu(cpu, p->cpus_ptr)) + return cpu; + if (sched_energy_enabled()) { new_cpu =3D find_energy_efficient_cpu(p, prev_cpu); if (new_cpu >=3D 0) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 771f8ddb7053..34b4c54b2a2a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2088,12 +2088,13 @@ static inline int task_on_rq_migrating(struct task_= struct *p) } =20 /* Wake flags. The first three directly map to some SD flag value */ -#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */ -#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */ -#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */ +#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC= */ +#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK= */ +#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE= */ =20 -#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ -#define WF_MIGRATED 0x20 /* Internal use, task got migrated */ +#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ +#define WF_MIGRATED 0x20 /* Internal use, task got migrated */ +#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CP= U. */ =20 #ifdef CONFIG_SMP static_assert(WF_EXEC =3D=3D SD_BALANCE_EXEC); @@ -3245,6 +3246,8 @@ static inline bool is_per_cpu_kthread(struct task_str= uct *p) extern void swake_up_all_locked(struct swait_queue_head *q); extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_qu= eue *wait); =20 +extern int try_to_wake_up(struct task_struct *tsk, unsigned int state, int= wake_flags); + #ifdef CONFIG_PREEMPT_DYNAMIC extern int preempt_dynamic_mode; extern int sched_dynamic_mode(const char *str); --=20 2.39.0.314.g84b9a713c41-goog From nobody Mon Sep 15 18:06:08 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 52608C46467 for ; Tue, 10 Jan 2023 21:32:20 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232643AbjAJVb4 (ORCPT ); Tue, 10 Jan 2023 16:31:56 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41092 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234966AbjAJVbL (ORCPT ); Tue, 10 Jan 2023 16:31:11 -0500 Received: from mail-pf1-x44a.google.com (mail-pf1-x44a.google.com [IPv6:2607:f8b0:4864:20::44a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id CC5256718A for ; Tue, 10 Jan 2023 13:30:23 -0800 (PST) Received: by mail-pf1-x44a.google.com with SMTP id bq10-20020a056a000e0a00b00581221976c0so5810223pfb.10 for ; Tue, 10 Jan 2023 13:30:23 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=168RoWZtH8izLuFq/qvBuIIwN3CImpi6a5tV66j3lJE=; b=M/U4lN6P2nNVPkbo2DMsRtb9YjHDgL7JgQBN0SOCGpKWJ4Hp5JP9+ou5aT9092WuMB tgM+VDO8LDjQiaO5NKk9wyQXWoPGpjGV72fjOYO0PsEjA8N+vzHujKp02feFpHhyffkm a8aBm/VbAPyZOCT0+pU4Z1c+Rp92g9F0AImVTiSOi7imJ5ZSammmPPMnzYVAj0LEPy8T /k+zmFp3NHhJm//uRR8MkD6N7MM/FPA/pAFMRsZXpKwsYfSDn8Dqc15pwPoJvX0SJ+E5 uLCKtOyr8OaQuT9glDWtN6H8KOhBI5vCbCvEsggEtSzqriwrdkOHP7yLvLu+OBGsduBj Z4lg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=168RoWZtH8izLuFq/qvBuIIwN3CImpi6a5tV66j3lJE=; b=LwO7h0AgUkGYdDJchSau0gX8CzPq1ENKvbIXxorhGW81j4OBRAw+ZhDlyWP0rGeDnI aXHzhElGe78+7ylEUmtc6ikaNOLMs0LkwQ6LyZCBxkUfP3yWjhRAERV9afZ4TtXNfCKH 4JQuzDHwl/yiOnhm7r9fKwSkT4nQJhIegQlYKdFwz3ZAPyhunUaEQFs6OOpeG9Cnwyko tOMRC8tC8lZ8/nqTGV9RGMzjHv1/AbgeZA3+JjtikuZJ4DcnC28xi+ImR/w8XWDU22Nc sjOux2oCAqNDSboSffOuDRQMuUua75aT1yNmSdiJJ1ic5hbQZzQDIZ5X9UY+yRXw9Roo YlnQ== X-Gm-Message-State: AFqh2kpHBY0VZWmWVP9q8BokkXcLaEOH1Ke+hLo9C3/kIBfDrn0eKtx2 4VGd4JrrqwhXpxfX9e/IAsxU0cDbV7M= X-Google-Smtp-Source: AMrXdXuCrUxyyCG3lMb5F4T7c25ZCFef+GV4+b0QTGExhGw0/1FdwhfME1TSR+Ee0OiOls9xJmMXNv691FI= X-Received: from avagin.kir.corp.google.com ([2620:0:1008:11:6203:13b5:2d85:b75c]) (user=avagin job=sendgmr) by 2002:a17:90a:206:b0:226:9980:67f3 with SMTP id c6-20020a17090a020600b00226998067f3mr8656pjc.1.1673386223090; Tue, 10 Jan 2023 13:30:23 -0800 (PST) Date: Tue, 10 Jan 2023 13:30:08 -0800 In-Reply-To: <20230110213010.2683185-1-avagin@google.com> Mime-Version: 1.0 References: <20230110213010.2683185-1-avagin@google.com> X-Mailer: git-send-email 2.39.0.314.g84b9a713c41-goog Message-ID: <20230110213010.2683185-4-avagin@google.com> Subject: [PATCH 3/5] sched: add a few helpers to wake up tasks on the current cpu From: Andrei Vagin To: Peter Zijlstra , Ingo Molnar , Vincent Guittot , Dietmar Eggemann Cc: linux-kernel@vger.kernel.org, Kees Cook , Christian Brauner , Andrei Vagin , Andy Lutomirski , Juri Lelli , Peter Oskolkov , Tycho Andersen , Will Drewry Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Andrei Vagin Add complete_on_current_cpu, wake_up_poll_on_current_cpu helpers to wake up tasks on the current CPU. These two helpers are useful when the task needs to make a synchronous cont= ext switch to another task. In this context, synchronous means it wakes up the target task and falls asleep right after that. One example of such workloads is seccomp user notifies. This mechanism allo= ws the supervisor process handles system calls on behalf of a target process. While the supervisor is handling an intercepted system call, the target pro= cess will be blocked in the kernel, waiting for a response to come back. On-CPU context switches are much faster than regular ones. Signed-off-by: Andrei Vagin --- include/linux/completion.h | 1 + include/linux/swait.h | 1 + include/linux/wait.h | 3 +++ kernel/sched/completion.c | 12 ++++++++++++ kernel/sched/core.c | 2 +- kernel/sched/swait.c | 11 +++++++++++ kernel/sched/wait.c | 5 +++++ 7 files changed, 34 insertions(+), 1 deletion(-) diff --git a/include/linux/completion.h b/include/linux/completion.h index 62b32b19e0a8..fb2915676574 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -116,6 +116,7 @@ extern bool try_wait_for_completion(struct completion *= x); extern bool completion_done(struct completion *x); =20 extern void complete(struct completion *); +extern void complete_on_current_cpu(struct completion *x); extern void complete_all(struct completion *); =20 #endif diff --git a/include/linux/swait.h b/include/linux/swait.h index 6a8c22b8c2a5..1f27b254adf5 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -147,6 +147,7 @@ static inline bool swq_has_sleeper(struct swait_queue_h= ead *wq) extern void swake_up_one(struct swait_queue_head *q); extern void swake_up_all(struct swait_queue_head *q); extern void swake_up_locked(struct swait_queue_head *q); +extern void swake_up_locked_on_current_cpu(struct swait_queue_head *q); =20 extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct = swait_queue *wait, int state); extern long prepare_to_swait_event(struct swait_queue_head *q, struct swai= t_queue *wait, int state); diff --git a/include/linux/wait.h b/include/linux/wait.h index a0307b516b09..5ec7739400f4 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -210,6 +210,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, st= ruct wait_queue_entry *wq } =20 int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, = void *key); +void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned in= t mode, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mo= de, void *key); void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, unsigned int mode, void *key, wait_queue_entry_t *bookmark); @@ -237,6 +238,8 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head= ); #define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m)) #define wake_up_poll(x, m) \ __wake_up(x, TASK_NORMAL, 1, poll_to_key(m)) +#define wake_up_poll_on_current_cpu(x, m) \ + __wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m)) #define wake_up_locked_poll(x, m) \ __wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m)) #define wake_up_interruptible_poll(x, m) \ diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index d57a5c1c1cd9..a1931a79c05a 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -38,6 +38,18 @@ void complete(struct completion *x) } EXPORT_SYMBOL(complete); =20 +void complete_on_current_cpu(struct completion *x) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&x->wait.lock, flags); + + if (x->done !=3D UINT_MAX) + x->done++; + swake_up_locked_on_current_cpu(&x->wait); + raw_spin_unlock_irqrestore(&x->wait.lock, flags); +} + /** * complete_all: - signals all threads waiting on this completion * @x: holds the state of this particular completion diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6478e819eb99..c81866821139 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6874,7 +6874,7 @@ asmlinkage __visible void __sched preempt_schedule_ir= q(void) int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wak= e_flags, void *key) { - WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC); + WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC|WF_= CURRENT_CPU)); return try_to_wake_up(curr->private, mode, wake_flags); } EXPORT_SYMBOL(default_wake_function); diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index 76b9b796e695..9ebe23868942 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c @@ -31,6 +31,17 @@ void swake_up_locked(struct swait_queue_head *q) } EXPORT_SYMBOL(swake_up_locked); =20 +void swake_up_locked_on_current_cpu(struct swait_queue_head *q) +{ + struct swait_queue *curr; + + if (list_empty(&q->task_list)) + return; + + curr =3D list_first_entry(&q->task_list, typeof(*curr), task_list); + try_to_wake_up(curr->task, TASK_NORMAL, WF_CURRENT_CPU); + list_del_init(&curr->task_list); +} /* * Wake up all waiters. This is an interface which is solely exposed for * completions and not for general usage. diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 133b74730738..47803a0b8d5d 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -161,6 +161,11 @@ int __wake_up(struct wait_queue_head *wq_head, unsigne= d int mode, } EXPORT_SYMBOL(__wake_up); =20 +void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned in= t mode, void *key) +{ + __wake_up_common_lock(wq_head, mode, 1, WF_CURRENT_CPU, key); +} + /* * Same as __wake_up but called with the spinlock in wait_queue_head_t hel= d. */ --=20 2.39.0.314.g84b9a713c41-goog From nobody Mon Sep 15 18:06:08 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 73542C54EBE for ; Tue, 10 Jan 2023 21:32:20 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234862AbjAJVcP (ORCPT ); Tue, 10 Jan 2023 16:32:15 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41192 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S235077AbjAJVbP (ORCPT ); Tue, 10 Jan 2023 16:31:15 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4893D687AF for ; Tue, 10 Jan 2023 13:30:26 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id r7-20020a25c107000000b006ff55ac0ee7so14015119ybf.15 for ; Tue, 10 Jan 2023 13:30:25 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=content-transfer-encoding:cc:to:from:subject:message-id:references :mime-version:in-reply-to:date:from:to:cc:subject:date:message-id :reply-to; bh=FgcxAE8iGG15WhvuAYkbURArrO/+on6Wx5AvlcLvta0=; b=leQdTYFdesAubkyBEKP4EGB8ajqVdgQISQqYPoEnT4j3qT2fDYjMXTQf7D9X0QtgyW iu6lh7wwkhKof9tuX5F8NR5c2e2zAy44O0cYUjjzdgWE8W2Rx3vcdQsJBexc6eR5vzlA ZXcacO0h8NcLqxDZOaSqjHbfjTJRkbwB6xy1W6R8le92hNg4V7iHGGqEtSPkBwPiz9cl 1FsATCzRCkGyfsUGwOq9xZzX3QAK4fm7KjTdbUtGZanZDPIMuLqCdhlqFrFO/9RnOl42 62JhQfumK/6MwxKQ+Kotogs3voCIG1rOdjDIGY2pxnTI3s0MlsAZAMML3L/Y8DhTIWO9 jNLQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:cc:to:from:subject:message-id:references :mime-version:in-reply-to:date:x-gm-message-state:from:to:cc:subject :date:message-id:reply-to; bh=FgcxAE8iGG15WhvuAYkbURArrO/+on6Wx5AvlcLvta0=; b=KJmAeYIuiTsPn7D9P5iG92EWtE+h2dvYervCFQNkcRESz6T5aBq+SZ3jcBCwStuiip I9iC8fjiDEvtH/btC1CdyocgXYXSiplziElkylgdQa9TJFNoaRIRCOVlG4Hr/Y39JgL0 Iat+U10i03i8ShnMYNdsEzP0jq7Jqivz6S20UZqGEjIlYKiPV6o5pBcsTAc8khuqPtim BUQIgWSTc87ZwFHMDV2njhpekFKfTLQ2t0IBkdCr89r1esRQQFxUzjDU7c+eRGrOFcGr iOzexuNZLBN1M2oYLN0zvK7kuad9B0r2R6OuOQv9c3ShTwNk4ocM9cFp3cFk6MJ9F2di 8RVQ== X-Gm-Message-State: AFqh2kpaRAjfefEm1XaW5OIidTtfpr95+cdzanDA8aOfGKj1i2hvD5xR G+tRhEYzGsSxh9L24xJc8jTzyQMF+yk= X-Google-Smtp-Source: AMrXdXt4sillpyrhcyoyGEWUU4IQTVShzCbFUy55/MCxI+FFS/AsF5aHBZazdJjIT9mWZr1W1DXt8RQkO3k= X-Received: from avagin.kir.corp.google.com ([2620:0:1008:11:6203:13b5:2d85:b75c]) (user=avagin job=sendgmr) by 2002:a5b:9cc:0:b0:722:f042:1036 with SMTP id y12-20020a5b09cc000000b00722f0421036mr8150828ybq.34.1673386225217; Tue, 10 Jan 2023 13:30:25 -0800 (PST) Date: Tue, 10 Jan 2023 13:30:09 -0800 In-Reply-To: <20230110213010.2683185-1-avagin@google.com> Mime-Version: 1.0 References: <20230110213010.2683185-1-avagin@google.com> X-Mailer: git-send-email 2.39.0.314.g84b9a713c41-goog Message-ID: <20230110213010.2683185-5-avagin@google.com> Subject: [PATCH 4/5] seccomp: add the synchronous mode for seccomp_unotify From: Andrei Vagin To: Peter Zijlstra , Ingo Molnar , Vincent Guittot , Dietmar Eggemann Cc: linux-kernel@vger.kernel.org, Kees Cook , Christian Brauner , Andrei Vagin , Andy Lutomirski , Juri Lelli , Peter Oskolkov , Tycho Andersen , Will Drewry Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" From: Andrei Vagin seccomp_unotify allows more privileged processes do actions on behalf of less privileged processes. In many cases, the workflow is fully synchronous. It means a target process triggers a system call and passes controls to a supervisor process that handles the system call and returns controls to the target process. In this context, "synchronous" means that only one process is running and another one is waiting. There is the WF_CURRENT_CPU flag that is used to advise the scheduler to move the wakee to the current CPU. For such synchronous workflows, it makes context switches a few times faster. Right now, each interaction takes 12=C2=B5s. With this patch, it takes about 3=C2=B5s. This change introduce the SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP flag that it used to enable the sync mode. Signed-off-by: Andrei Vagin --- include/uapi/linux/seccomp.h | 4 ++++ kernel/seccomp.c | 31 +++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 0fdc6ef02b94..dbfc9b37fcae 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -115,6 +115,8 @@ struct seccomp_notif_resp { __u32 flags; }; =20 +#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) + /* valid flags for seccomp_notif_addfd */ #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ #define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomic= ally */ @@ -150,4 +152,6 @@ struct seccomp_notif_addfd { #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ struct seccomp_notif_addfd) =20 +#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) + #endif /* _UAPI_LINUX_SECCOMP_H */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 876022e9c88c..0a62d44f4898 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -143,9 +143,12 @@ struct seccomp_kaddfd { * filter->notify_lock. * @next_id: The id of the next request. * @notifications: A list of struct seccomp_knotif elements. + * @flags: A set of SECCOMP_USER_NOTIF_FD_* flags. */ + struct notification { atomic_t requests; + u32 flags; u64 next_id; struct list_head notifications; }; @@ -1117,7 +1120,10 @@ static int seccomp_do_user_notification(int this_sys= call, INIT_LIST_HEAD(&n.addfd); =20 atomic_add(1, &match->notif->requests); - wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); + if (match->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP) + wake_up_poll_on_current_cpu(&match->wqh, EPOLLIN | EPOLLRDNORM); + else + wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); =20 /* * This is where we wait for a reply from userspace. @@ -1593,7 +1599,10 @@ static long seccomp_notify_send(struct seccomp_filte= r *filter, knotif->error =3D resp.error; knotif->val =3D resp.val; knotif->flags =3D resp.flags; - complete(&knotif->ready); + if (filter->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP) + complete_on_current_cpu(&knotif->ready); + else + complete(&knotif->ready); out: mutex_unlock(&filter->notify_lock); return ret; @@ -1623,6 +1632,22 @@ static long seccomp_notify_id_valid(struct seccomp_f= ilter *filter, return ret; } =20 +static long seccomp_notify_set_flags(struct seccomp_filter *filter, + unsigned long flags) +{ + long ret; + + if (flags & ~SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP) + return -EINVAL; + + ret =3D mutex_lock_interruptible(&filter->notify_lock); + if (ret < 0) + return ret; + filter->notif->flags =3D flags; + mutex_unlock(&filter->notify_lock); + return 0; +} + static long seccomp_notify_addfd(struct seccomp_filter *filter, struct seccomp_notif_addfd __user *uaddfd, unsigned int size) @@ -1752,6 +1777,8 @@ static long seccomp_notify_ioctl(struct file *file, u= nsigned int cmd, case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR: case SECCOMP_IOCTL_NOTIF_ID_VALID: return seccomp_notify_id_valid(filter, buf); + case SECCOMP_IOCTL_NOTIF_SET_FLAGS: + return seccomp_notify_set_flags(filter, arg); } =20 /* Extensible Argument ioctls */ --=20 2.39.0.314.g84b9a713c41-goog From nobody Mon Sep 15 18:06:08 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 63315C61DB3 for ; Tue, 10 Jan 2023 21:32:20 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232144AbjAJVcC (ORCPT ); Tue, 10 Jan 2023 16:32:02 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:40812 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234103AbjAJVbT (ORCPT ); Tue, 10 Jan 2023 16:31:19 -0500 Received: from mail-pg1-x54a.google.com (mail-pg1-x54a.google.com [IPv6:2607:f8b0:4864:20::54a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 592BC6953A for ; Tue, 10 Jan 2023 13:30:27 -0800 (PST) Received: by mail-pg1-x54a.google.com with SMTP id i70-20020a638749000000b004b2b09ec530so3351765pge.3 for ; Tue, 10 Jan 2023 13:30:27 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=bkELdbPnw9+w61A8fpDNrJsicK3/uTJLSpCSm5duGgo=; b=dgV9j6PjmL+d29HbfIHOLglcm3Ui8wA2gq+75miT4vD/C/CLZ3DxYtlOdM+D9OVFpB H9vIp/JvWx8LOffaSAl7VtdVcPRUZR7NVGaxUgyaeyWic1iPgn7eYsnTSpCui8gRGZX4 vfmeD/UrL1t7W13Y27A4PTyyuVwWk7eCwh7oczcNObs3oJzom7c2ZsleZeYL3ph0vzU+ Ubb8+lSuyGj5zyy6sLo/5Njz6sdjL8jb2QIMImuaTbisLMhvX/PM6MP2ZJoKXymtf/g9 IvTdjiFwZSeQz3VkOK81Mhac4fNtL1yEEfKqmdDuqiMVPK0Jf8j+Yd+bjTHn6YTC2jq+ lAKQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=bkELdbPnw9+w61A8fpDNrJsicK3/uTJLSpCSm5duGgo=; b=cXOHM2cRc7s3KgDf8pv12dXth76xLtvDPYvC2ejFG8yqXn5yVxnR+6MQb+Y1Tnh5CE LhTRjT2oMdzOFEZtXzQUmWMrQ66a0yH2ziLp1yz+ZqGKcQHBaE2QazrFTtUWqBAuGSWG 9IVtjUbIb4kSlWapJVghaUvZ+uSWBInTQOtV1+YX1zwSv3d75of2HR2mMT6b5AMDsKOW VeYJPfWDxA6lFMBU6wQ+K4H5ylLoymz2yqcMgFUYc4SFCcGxxz9XOVqzosqG1Jo7LPAT 9Nea0JQJtHtTsBYpJNEsjlgql1glssykv3+2S8GmFi7AKHJJxMuNmZXDAuDDJO9A15Jw kqfg== X-Gm-Message-State: AFqh2kp/VA6lnmclDnUytFR6SY8DfNk0TMhD9S7cAr97QRvDNN9atYRr uJgD9Z0ToLKIRmsTrty7GgGuInhfvVo= X-Google-Smtp-Source: AMrXdXtaEuSi7cf5KVjZXoaz5qjgVykb8pKVmq0/Xn3l3FPcNyjnVlP58xa6RMXCC+dzEXHZUF444DEeO6E= X-Received: from avagin.kir.corp.google.com ([2620:0:1008:11:6203:13b5:2d85:b75c]) (user=avagin job=sendgmr) by 2002:a17:90a:f315:b0:225:b164:8886 with SMTP id ca21-20020a17090af31500b00225b1648886mr4979473pjb.65.1673386226828; Tue, 10 Jan 2023 13:30:26 -0800 (PST) Date: Tue, 10 Jan 2023 13:30:10 -0800 In-Reply-To: <20230110213010.2683185-1-avagin@google.com> Mime-Version: 1.0 References: <20230110213010.2683185-1-avagin@google.com> X-Mailer: git-send-email 2.39.0.314.g84b9a713c41-goog Message-ID: <20230110213010.2683185-6-avagin@google.com> Subject: [PATCH 5/5] selftest/seccomp: add a new test for the sync mode of seccomp_user_notify From: Andrei Vagin To: Peter Zijlstra , Ingo Molnar , Vincent Guittot , Dietmar Eggemann Cc: linux-kernel@vger.kernel.org, Kees Cook , Christian Brauner , Andrei Vagin , Andy Lutomirski , Juri Lelli , Peter Oskolkov , Tycho Andersen , Will Drewry Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Andrei Vagin Test output: RUN global.user_notification_sync ... seccomp_bpf.c:4279:user_notification_sync:basic: 8655 nsec/syscall seccomp_bpf.c:4279:user_notification_sync:sync: 2919 nsec/syscall OK global.user_notification_sync Signed-off-by: Andrei Vagin --- tools/testing/selftests/seccomp/seccomp_bpf.c | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/= selftests/seccomp/seccomp_bpf.c index 9c2f448bb3a9..e4207cddd668 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -4243,6 +4243,94 @@ TEST(user_notification_addfd_rlimit) close(memfd); } =20 +/* USER_NOTIF_BENCH_TIMEOUT is 100 miliseconds. */ +#define USER_NOTIF_BENCH_TIMEOUT 100000000ULL +#define NSECS_PER_SEC 1000000000ULL + +#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP +#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) +#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) +#endif + +static uint64_t user_notification_sync_loop(struct __test_metadata *_metad= ata, + char *test_name, int listener) +{ + struct timespec ts; + uint64_t start, end, nr; + struct seccomp_notif req =3D {}; + struct seccomp_notif_resp resp =3D {}; + + clock_gettime(CLOCK_MONOTONIC, &ts); + start =3D ts.tv_nsec + ts.tv_sec * NSECS_PER_SEC; + for (end =3D start, nr =3D 0; end - start < USER_NOTIF_BENCH_TIMEOUT; nr+= +) { + memset(&req, 0, sizeof(req)); + req.pid =3D 0; + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + ASSERT_EQ(req.data.nr, __NR_getppid); + + resp.id =3D req.id; + resp.error =3D 0; + resp.val =3D USER_NOTIF_MAGIC; + resp.flags =3D 0; + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + clock_gettime(CLOCK_MONOTONIC, &ts); + end =3D ts.tv_nsec + ts.tv_sec * NSECS_PER_SEC; + } + TH_LOG("%s:\t%lld nsec/syscall", test_name, USER_NOTIF_BENCH_TIMEOUT / nr= ); + return nr; +} + +TEST(user_notification_sync) +{ + pid_t pid; + long ret; + int status, listener; + unsigned long calls, sync_calls; + + ret =3D prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + listener =3D user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid =3D fork(); + ASSERT_GE(pid, 0); + + if (pid =3D=3D 0) { + while (1) { + ret =3D syscall(__NR_getppid); + if (ret =3D=3D USER_NOTIF_MAGIC) + continue; + break; + } + _exit(1); + } + + calls =3D user_notification_sync_loop(_metadata, "basic", listener); + + /* Try to set invalid flags. */ + EXPECT_SYSCALL_RETURN(-EINVAL, + ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0)); + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, + SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0); + + sync_calls =3D user_notification_sync_loop(_metadata, "sync", listener); + + EXPECT_GT(sync_calls, calls); + + kill(pid, SIGKILL); + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_EQ(true, WIFSIGNALED(status)); + ASSERT_EQ(SIGKILL, WTERMSIG(status)); +} + + /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */ FIXTURE(O_SUSPEND_SECCOMP) { pid_t pid; --=20 2.39.0.314.g84b9a713c41-goog