From nobody Wed Jul 1 00:46:34 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id B1D6FC433F5 for ; Wed, 5 Jan 2022 21:31:36 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S244454AbiAEVbe (ORCPT ); Wed, 5 Jan 2022 16:31:34 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54022 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S244426AbiAEVb3 (ORCPT ); Wed, 5 Jan 2022 16:31:29 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B569FC061201 for ; Wed, 5 Jan 2022 13:31:29 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id e137-20020a25378f000000b0060c1f2f4939so1232442yba.3 for ; Wed, 05 Jan 2022 13:31:29 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=OXcWrdqzZ/LJa8MsNyqLit5QE2f2wr1XJUDpKBKFn9M=; b=k3hEATgcFTjuowvt9G4NvFYfAZjtw9yYqDQ5sEX9rEwss6AjrhomFWXG4FZYaewvrx 4V64TcMjowwztsnK0+hSjXm+0uKlEhkSCoowr50//xgN7o3g1CD1qR/EaFf2DTXfvi2j h4VF/GsbVO+s/WeCU3YvcaqHIjFUyWsQUDlbndnnJe2ho9S4lSs+sjPo9RNJwlszBhjk F2UWvKRdxMSr6t8o/rDdW1DLzncOTkpY6b/UqWftfdp+QgOfDbwW05+3VyPGwL6h1+Aw lMEJc2yknlR8baEKk7/lfrYoBQ59g4BdTCgoF5kQ6dNSz/XB9EYH4SHjJxB7woKt+I1d zQHA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=OXcWrdqzZ/LJa8MsNyqLit5QE2f2wr1XJUDpKBKFn9M=; b=SG5FwH95j8wqo7qs3FOGsndGo1zWnxE5+cuijO0k3d0GIgTZCWR76RMvr/U6CM3AKy V1a/huORRogL1wEb/lPTgoD368TzZMz+ZDBSEHh1ahG0CusJErQV50wuZQjc/nt7e28V FHwNdE8iOhSeaGsqtxk0UNfOz5+Vj0kv+db/HPfwNf68gr4DFpDvkepSDACMm9MQJqZQ FnfjzJ7Cyc7s75N0LUTrl0I9QpDBjjtnbD9/glVjEfFZ94FbUvxhHG2g74RLHd6O4OZK cyoY2bRiNR7HHQCKYX4HtOPW8oDIPTxUx1WP0E/EgHS8qDib4WxBeGd/wkJtHDOVxjAk 8sgg== X-Gm-Message-State: AOAM531YUYu32vtDHFM7DD0InHhv93Zg9+r65x2hVSJZR0o1qdLeWhd7 D8gj3gLE6vO1+IeLXGggvnXCTS1g X-Google-Smtp-Source: ABdhPJwk8/Y5JYC+V7mDVIU826YiSllixU9D0FiG854iq5FSVYplFs8FDFRg3eOicPrSA9zsN2U/nnuU X-Received: from gnomeregan.cam.corp.google.com ([2620:15c:6:412:3ab7:a23:e07a:5e22]) (user=brho job=sendgmr) by 2002:a25:40d7:: with SMTP id n206mr55266535yba.466.1641418289003; Wed, 05 Jan 2022 13:31:29 -0800 (PST) Date: Wed, 5 Jan 2022 16:28:26 -0500 In-Reply-To: <20220105212828.197013-1-brho@google.com> Message-Id: <20220105212828.197013-2-brho@google.com> Mime-Version: 1.0 References: <20220105212828.197013-1-brho@google.com> X-Mailer: git-send-email 2.34.1.448.ga2b2bfdf31-goog Subject: [PATCH v2 1/3] setpriority: only grab the tasklist_lock for PRIO_PGRP From: Barret Rhoden To: ebiederm@xmission.com Cc: Christian Brauner , Andrew Morton , Alexey Gladkov , William Cohen , Viresh Kumar , Alexey Dobriyan , Chris Hyser , Peter Collingbourne , Xiaofeng Cao , David Hildenbrand , Cyrill Gorcunov , linux-kernel@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The tasklist_lock is necessary only for PRIO_PGRP for both setpriority() and getpriority(). Unnecessarily grabbing the tasklist_lock can be a scalability bottleneck for workloads that also must grab the tasklist_lock for waiting, killing, and cloning. This change resulted in a 12% speedup on a microbenchmark where parents kill and wait on their children, and children getpriority, setpriority, and getrlimit. Signed-off-by: Barret Rhoden --- kernel/sys.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 8fdac0d90504..558e52fa5bbd 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -220,7 +220,6 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int,= niceval) niceval =3D MAX_NICE; =20 rcu_read_lock(); - read_lock(&tasklist_lock); switch (which) { case PRIO_PROCESS: if (who) @@ -231,6 +230,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int,= niceval) error =3D set_one_prio(p, niceval, error); break; case PRIO_PGRP: + read_lock(&tasklist_lock); if (who) pgrp =3D find_vpid(who); else @@ -238,6 +238,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int,= niceval) do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { error =3D set_one_prio(p, niceval, error); } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); + read_unlock(&tasklist_lock); break; case PRIO_USER: uid =3D make_kuid(cred->user_ns, who); @@ -258,7 +259,6 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int,= niceval) break; } out_unlock: - read_unlock(&tasklist_lock); rcu_read_unlock(); out: return error; @@ -283,7 +283,6 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) return -EINVAL; =20 rcu_read_lock(); - read_lock(&tasklist_lock); switch (which) { case PRIO_PROCESS: if (who) @@ -297,6 +296,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) } break; case PRIO_PGRP: + read_lock(&tasklist_lock); if (who) pgrp =3D find_vpid(who); else @@ -306,6 +306,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) if (niceval > retval) retval =3D niceval; } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); + read_unlock(&tasklist_lock); break; case PRIO_USER: uid =3D make_kuid(cred->user_ns, who); @@ -329,7 +330,6 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) break; } out_unlock: - read_unlock(&tasklist_lock); rcu_read_unlock(); =20 return retval; --=20 2.34.1.448.ga2b2bfdf31-goog From nobody Wed Jul 1 00:46:34 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id B73B0C433EF for ; Wed, 5 Jan 2022 21:31:37 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S244464AbiAEVbg (ORCPT ); Wed, 5 Jan 2022 16:31:36 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54030 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S244442AbiAEVbb (ORCPT ); Wed, 5 Jan 2022 16:31:31 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5F2F6C061245 for ; Wed, 5 Jan 2022 13:31:31 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id i65-20020a252244000000b0060b2e5fd54cso1167763ybi.13 for ; Wed, 05 Jan 2022 13:31:31 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=VaVcglQQWQyd+D8xYsm++asUm3d2zSMstiXGMzeYRRg=; b=kj1vfpt24GWSpAOGOWS2UytnuxnopwczjTyCX+z1ZrjgUheyWTWtg6bWynnkzj2fdm UV/+xl5xP7XCi3+G620jWUFNsXvmh6yWWJUy+T8DPzCplzJF/jcKxtj6sYGqhhZZPQ2u bwSrItZeQTwHivQqCl0xHBjbhSexP5bvjfQo2uQVMIHWdsUCavQ0dOic24pxkbFR4/oQ lnzYS2teYKOSK0hzWIXv/yjLgvZD9YFNFoj+8OOcrRSBbLSHZIBbdjB9u34C048e7sjV odg2R5dJ25nrJ2DVgJSp8IOhnZHskaPs4w2cPfd/L0DkR6BwogtRzbkP2KPJUziJ86Bh B0gg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=VaVcglQQWQyd+D8xYsm++asUm3d2zSMstiXGMzeYRRg=; b=b91NrzVsSZuBddvhugPQ9S15iSzx7sC849x/dM034rDh3gkICvZeoPS1uIhl3G9+bz xm7A/Bn3VGoe6c+Kq444B0BqjjPbgP2ykKVa3QG07ttFGzhDOryerEYrzWTDIX1FuWS4 EgVJU4OPSwux0EKphC0LUeWHYLUCcrtj3SnUlWCylbTQRcao/T/SE638yTI8pG32odTo zoU1+F4zhRAhNwuEIMoG7tYtuageDfKRaWY/jDwAA5bjL02gw0QPRCfhegjerz6L43uJ cU6aziCBiwWCqSl+j7ScFa9DW9S8k/4HghDhDU/mDccf4SjbxnB1q+VHdrRE3UJfpZWz 5adQ== X-Gm-Message-State: AOAM531f5HFRBrnSdfp2mvb8Ys96Iwum65LyO2REXmtTc4GwN9emyUPI 8KRAndXCd3oYy9fUh22mon7cRWHn X-Google-Smtp-Source: ABdhPJyC91nLMMhW+wKBzgEuZ5ccUrlcKq4EblaVq/f9QvMsMSM37Xu5zlM1VyNMQqEBZIkJOt3ocfaP X-Received: from gnomeregan.cam.corp.google.com ([2620:15c:6:412:3ab7:a23:e07a:5e22]) (user=brho job=sendgmr) by 2002:a25:c385:: with SMTP id t127mr41305535ybf.562.1641418290619; Wed, 05 Jan 2022 13:31:30 -0800 (PST) Date: Wed, 5 Jan 2022 16:28:27 -0500 In-Reply-To: <20220105212828.197013-1-brho@google.com> Message-Id: <20220105212828.197013-3-brho@google.com> Mime-Version: 1.0 References: <20220105212828.197013-1-brho@google.com> X-Mailer: git-send-email 2.34.1.448.ga2b2bfdf31-goog Subject: [PATCH v2 2/3] prlimit: make do_prlimit() static From: Barret Rhoden To: ebiederm@xmission.com Cc: Christian Brauner , Andrew Morton , Alexey Gladkov , William Cohen , Viresh Kumar , Alexey Dobriyan , Chris Hyser , Peter Collingbourne , Xiaofeng Cao , David Hildenbrand , Cyrill Gorcunov , linux-kernel@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" There are no other callers in the kernel. Fixed up a comment format and whitespace issue when moving do_prlimit() higher in sys.c. Signed-off-by: Barret Rhoden --- include/linux/resource.h | 2 - kernel/sys.c | 116 ++++++++++++++++++++------------------- 2 files changed, 59 insertions(+), 59 deletions(-) diff --git a/include/linux/resource.h b/include/linux/resource.h index bdf491cbcab7..4fdbc0c3f315 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -8,7 +8,5 @@ struct task_struct; =20 void getrusage(struct task_struct *p, int who, struct rusage *ru); -int do_prlimit(struct task_struct *tsk, unsigned int resource, - struct rlimit *new_rlim, struct rlimit *old_rlim); =20 #endif diff --git a/kernel/sys.c b/kernel/sys.c index 558e52fa5bbd..fb2a5e7c0589 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1415,6 +1415,65 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, = int, len) return errno; } =20 +/* make sure you are allowed to change @tsk limits before calling this */ +static int do_prlimit(struct task_struct *tsk, unsigned int resource, + struct rlimit *new_rlim, struct rlimit *old_rlim) +{ + struct rlimit *rlim; + int retval =3D 0; + + if (resource >=3D RLIM_NLIMITS) + return -EINVAL; + if (new_rlim) { + if (new_rlim->rlim_cur > new_rlim->rlim_max) + return -EINVAL; + if (resource =3D=3D RLIMIT_NOFILE && + new_rlim->rlim_max > sysctl_nr_open) + return -EPERM; + } + + /* protect tsk->signal and tsk->sighand from disappearing */ + read_lock(&tasklist_lock); + if (!tsk->sighand) { + retval =3D -ESRCH; + goto out; + } + + rlim =3D tsk->signal->rlim + resource; + task_lock(tsk->group_leader); + if (new_rlim) { + /* + * Keep the capable check against init_user_ns until cgroups can + * contain all limits. + */ + if (new_rlim->rlim_max > rlim->rlim_max && + !capable(CAP_SYS_RESOURCE)) + retval =3D -EPERM; + if (!retval) + retval =3D security_task_setrlimit(tsk, resource, new_rlim); + } + if (!retval) { + if (old_rlim) + *old_rlim =3D *rlim; + if (new_rlim) + *rlim =3D *new_rlim; + } + task_unlock(tsk->group_leader); + + /* + * RLIMIT_CPU handling. Arm the posix CPU timer if the limit is not + * infinite. In case of RLIM_INFINITY the posix CPU timer code + * ignores the rlimit. + */ + if (!retval && new_rlim && resource =3D=3D RLIMIT_CPU && + new_rlim->rlim_cur !=3D RLIM_INFINITY && + IS_ENABLED(CONFIG_POSIX_TIMERS)) + update_rlimit_cpu(tsk, new_rlim->rlim_cur); +out: + read_unlock(&tasklist_lock); + return retval; +} + SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *,= rlim) { struct rlimit value; @@ -1558,63 +1617,6 @@ static void rlim64_to_rlim(const struct rlimit64 *rl= im64, struct rlimit *rlim) rlim->rlim_max =3D (unsigned long)rlim64->rlim_max; } =20 -/* make sure you are allowed to change @tsk limits before calling this */ -int do_prlimit(struct task_struct *tsk, unsigned int resource, - struct rlimit *new_rlim, struct rlimit *old_rlim) -{ - struct rlimit *rlim; - int retval =3D 0; - - if (resource >=3D RLIM_NLIMITS) - return -EINVAL; - if (new_rlim) { - if (new_rlim->rlim_cur > new_rlim->rlim_max) - return -EINVAL; - if (resource =3D=3D RLIMIT_NOFILE && - new_rlim->rlim_max > sysctl_nr_open) - return -EPERM; - } - - /* protect tsk->signal and tsk->sighand from disappearing */ - read_lock(&tasklist_lock); - if (!tsk->sighand) { - retval =3D -ESRCH; - goto out; - } - - rlim =3D tsk->signal->rlim + resource; - task_lock(tsk->group_leader); - if (new_rlim) { - /* Keep the capable check against init_user_ns until - cgroups can contain all limits */ - if (new_rlim->rlim_max > rlim->rlim_max && - !capable(CAP_SYS_RESOURCE)) - retval =3D -EPERM; - if (!retval) - retval =3D security_task_setrlimit(tsk, resource, new_rlim); - } - if (!retval) { - if (old_rlim) - *old_rlim =3D *rlim; - if (new_rlim) - *rlim =3D *new_rlim; - } - task_unlock(tsk->group_leader); - - /* - * RLIMIT_CPU handling. Arm the posix CPU timer if the limit is not - * infinite. In case of RLIM_INFINITY the posix CPU timer code - * ignores the rlimit. - */ - if (!retval && new_rlim && resource =3D=3D RLIMIT_CPU && - new_rlim->rlim_cur !=3D RLIM_INFINITY && - IS_ENABLED(CONFIG_POSIX_TIMERS)) - update_rlimit_cpu(tsk, new_rlim->rlim_cur); -out: - read_unlock(&tasklist_lock); - return retval; -} - /* rcu lock must be held */ static int check_prlimit_permission(struct task_struct *task, unsigned int flags) --=20 2.34.1.448.ga2b2bfdf31-goog From nobody Wed Jul 1 00:46:34 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id D825EC433F5 for ; Wed, 5 Jan 2022 21:31:48 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S244490AbiAEVbo (ORCPT ); Wed, 5 Jan 2022 16:31:44 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54038 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S244446AbiAEVbd (ORCPT ); Wed, 5 Jan 2022 16:31:33 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D5824C061245 for ; Wed, 5 Jan 2022 13:31:32 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id e2-20020a25d302000000b0060c57942183so1136148ybf.18 for ; Wed, 05 Jan 2022 13:31:32 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=e02bjD6pRKTWNu3KvN2MtMLMNFzM1qFsgH72gSuIkl4=; b=g2LCcjXXaW7gzE4dCTvUSuTgcCA53czMalkKvkChm6TBE4b+r6X9NJWmpuA55yNesS QrMBzdp784HO2SshmhwbZdZGW6EgWwGp5fI/LWT4ApiqTkr3s0zsnAW2pWGbzZRTxvWS 6Sgn3DMaxdU5px2s/SRPeNQY+7VUv0dH0HwMGjjTllzTT+junL1sGuyc0y0JtCiJ4YXE j10KhriKENIYwbu232IvSkly2qnCdPU4qe+SCzEBV61mbmgGWyiyK/gPGEzsbCwpHgHQ Ew8E4W226r7Uz8JI4dJTzvli2WIgmmoQjUHxBcO9xenxIS9WZDH1qbsr7weCSUYjAUy+ DBSA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=e02bjD6pRKTWNu3KvN2MtMLMNFzM1qFsgH72gSuIkl4=; b=XZm4JElAIpxM7y6lGZzkl4K0Fp6H+AChKERUTkgzfavxzaUCJs/YdS6jSy4BiD74Bt 56lpXxuvXFsbgIADrqjRPt3emqDv7dyUja3M1r1u0affuqY4ccjG9xIs3xWXBityUsRc SLsPciAtQZ+d2rNNhfvjwyZnLk59yJQD16u/kb//aNRl44sBSGarIzgoNQji+u8RvwiB 8LzbeUixScioBq/YbjA/3+QM/a8K+SduCaw9swOpM0ioPfJiFKtn//AaAK2bia3J+OHj R3LWfzHECUQVaAQTkY8v0TEXcM6obrnSFBdnKcwHYG8ttnIZPIv+0N2ncKjahFeLGhhV W2cw== X-Gm-Message-State: AOAM533AEwCM66VkJ95sd7iX3ZBt4sw4VvspdLamCN1u2NDwRvgL12rj 0w2kPoCNpCZpwExej8qSreI7LFUB X-Google-Smtp-Source: ABdhPJwvIDThxVqCBdibl4805Y+ll3+2PEgxcX3feRUyW/5ke6vBMekJwngIcIi2u5eLiWbbIU/tNQLj X-Received: from gnomeregan.cam.corp.google.com ([2620:15c:6:412:3ab7:a23:e07a:5e22]) (user=brho job=sendgmr) by 2002:a25:bfc6:: with SMTP id q6mr64374645ybm.709.1641418292040; Wed, 05 Jan 2022 13:31:32 -0800 (PST) Date: Wed, 5 Jan 2022 16:28:28 -0500 In-Reply-To: <20220105212828.197013-1-brho@google.com> Message-Id: <20220105212828.197013-4-brho@google.com> Mime-Version: 1.0 References: <20220105212828.197013-1-brho@google.com> X-Mailer: git-send-email 2.34.1.448.ga2b2bfdf31-goog Subject: [PATCH v2 3/3] prlimit: do not grab the tasklist_lock From: Barret Rhoden To: ebiederm@xmission.com Cc: Christian Brauner , Andrew Morton , Alexey Gladkov , William Cohen , Viresh Kumar , Alexey Dobriyan , Chris Hyser , Peter Collingbourne , Xiaofeng Cao , David Hildenbrand , Cyrill Gorcunov , linux-kernel@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Unnecessarily grabbing the tasklist_lock can be a scalability bottleneck for workloads that also must grab the tasklist_lock for waiting, killing, and cloning. The tasklist_lock was grabbed to protect tsk->sighand from disappearing (becoming NULL). tsk->signal was already protected by holding a reference to tsk. update_rlimit_cpu() assumed tsk->sighand !=3D NULL. With this commit, it attempts to lock_task_sighand(). However, this means that update_rlimit_cpu() can fail. This only happens when a task is exiting. Note that during exec, sighand may *change*, but it will not be NULL. Prior to this commit, the do_prlimit() ensured that update_rlimit_cpu() would not fail by read locking the tasklist_lock and checking tsk->sighand !=3D NULL. If update_rlimit_cpu() fails, there may be other tasks that are not exiting that share tsk->signal. We need to run update_rlimit_cpu() on one of them. We can't "back out" the new rlim - once we unlocked task_lock(group_leader), the rlim is essentially changed. The only other caller of update_rlimit_cpu() is selinux_bprm_committing_creds(). It has tsk =3D=3D current, so update_rlimit_cpu() cannot fail (current->sighand cannot disappear until current exits). This change resulted in a 14% speedup on a microbenchmark where parents kill and wait on their children, and children getpriority, setpriority, and getrlimit. Signed-off-by: Barret Rhoden --- include/linux/posix-timers.h | 2 +- kernel/sys.c | 32 +++++++++++++++++++++----------- kernel/time/posix-cpu-timers.c | 12 +++++++++--- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 5bbcd280bfd2..9cf126c3b27f 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -253,7 +253,7 @@ void posix_cpu_timers_exit_group(struct task_struct *ta= sk); void set_process_cpu_timer(struct task_struct *task, unsigned int clock_id= x, u64 *newval, u64 *oldval); =20 -void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); +int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); =20 void posixtimer_rearm(struct kernel_siginfo *info); #endif diff --git a/kernel/sys.c b/kernel/sys.c index fb2a5e7c0589..073ae9db192f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1432,13 +1432,7 @@ static int do_prlimit(struct task_struct *tsk, unsig= ned int resource, return -EPERM; } =20 - /* protect tsk->signal and tsk->sighand from disappearing */ - read_lock(&tasklist_lock); - if (!tsk->sighand) { - retval =3D -ESRCH; - goto out; - } - + /* Holding a refcount on tsk protects tsk->signal from disappearing. */ rlim =3D tsk->signal->rlim + resource; task_lock(tsk->group_leader); if (new_rlim) { @@ -1467,10 +1461,26 @@ static int do_prlimit(struct task_struct *tsk, unsi= gned int resource, */ if (!retval && new_rlim && resource =3D=3D RLIMIT_CPU && new_rlim->rlim_cur !=3D RLIM_INFINITY && - IS_ENABLED(CONFIG_POSIX_TIMERS)) - update_rlimit_cpu(tsk, new_rlim->rlim_cur); -out: - read_unlock(&tasklist_lock); + IS_ENABLED(CONFIG_POSIX_TIMERS)) { + if (update_rlimit_cpu(tsk, new_rlim->rlim_cur)) { + /* + * update_rlimit_cpu can fail if the task is exiting. + * We already set the task group's rlim, so we need to + * update_rlimit_cpu for some other task in the process. + * If all of the tasks are exiting, then we don't need + * to update_rlimit_cpu. + */ + struct task_struct *t_i; + + rcu_read_lock(); + for_each_thread(tsk, t_i) { + if (!update_rlimit_cpu(t_i, new_rlim->rlim_cur)) + break; + } + rcu_read_unlock(); + } + } + return retval; } =20 diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 96b4e7810426..e13e628509fb 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -34,14 +34,20 @@ void posix_cputimers_group_init(struct posix_cputimers = *pct, u64 cpu_limit) * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if * necessary. Needs siglock protection since other code may update the * expiration cache as well. + * + * Returns 0 on success, -ESRCH on failure. Can fail if the task is exiti= ng and + * we cannot lock_task_sighand. Cannot fail if task is current. */ -void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) +int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) { u64 nsecs =3D rlim_new * NSEC_PER_SEC; + unsigned long irq_fl; =20 - spin_lock_irq(&task->sighand->siglock); + if (!lock_task_sighand(task, &irq_fl)) + return -ESRCH; set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL); - spin_unlock_irq(&task->sighand->siglock); + unlock_task_sighand(task, &irq_fl); + return 0; } =20 /* --=20 2.34.1.448.ga2b2bfdf31-goog