From nobody Tue Jun 30 23:27:53 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4E375C433F5 for ; Thu, 6 Jan 2022 17:21:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S241712AbiAFRVA (ORCPT ); Thu, 6 Jan 2022 12:21:00 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:39108 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S241666AbiAFRUz (ORCPT ); Thu, 6 Jan 2022 12:20:55 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 612D3C061201 for ; Thu, 6 Jan 2022 09:20:55 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id s83-20020a255e56000000b0060c46cc1890so6074767ybb.16 for ; Thu, 06 Jan 2022 09:20:55 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=OXcWrdqzZ/LJa8MsNyqLit5QE2f2wr1XJUDpKBKFn9M=; b=kZSdwWIqyPhTSuB53f4VjBfc7Xb76bIC+Is8zxCJcb0bKgm4qXs/pPlLVkozbH0ki+ YBw/3CbGAfccFjdVnf8cw73rjHtSrXxQFTfF4SMLVgx1u48S4NugDxA2PhtuVmcF4oxB S4CJuGgLhtf9zUe44UG//df3smOzAkYjUzh+Z9HFDwUEsdooMHcwLn2BXs13YVHOgXwb ocmHFXMGmcXvWinYOJ/okP0BPWAFmfhHU1T4qsqYLozuDfm339uaOPALhg3MU/56ZDGc jYFERPu11DuNLCvhx/juf+UYXsO95GlKf0GdLNT82XSYO/5qbVttkUOnz4YFdpWRRgsv Z1DQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=OXcWrdqzZ/LJa8MsNyqLit5QE2f2wr1XJUDpKBKFn9M=; b=yLqVO+D9s+jmm6xz2N9qh+zSHh4d1IHo9D9e0M+82fjhoZDQ6ILgJ9EEPZ7bUx4b6j g5/pfwNMsk0oYegO7RFzrOPqDi7FwZNQitzJshicGhPMxNIw1ra7xSNvO411ME3M+ilQ aUZlg2vqwJjT1p8AOUFAIBYCpc61OqFNHqtZQkoFGaBTJwB4hnD+2sdtBoS5Ku4NVV2Q 3dJj7VO+Ic7xOQMQww6zoBE3bxFffaXI3gghLh/cVmNJQIzbwuCJTV6g0DhBLYO83D8F WZ88+9uwv2dJ19lofx3D4Q8z8m5Rr+lkI9T5+W+5MJg1d7HErb8Fz0mluAPzuS3F3DfD WxiA== X-Gm-Message-State: AOAM533BjOfhFlbeBYDwRSI7/nTfUjLiBVZbW45Tx6a9aVXJp0w7FMgg G4vvx+yvJ8gWaxyKgsM4+02eTBlf X-Google-Smtp-Source: ABdhPJwoMQLAddFJICdKDWy+ImRQXv8fmsh4pVwQzqlsIgtdlFR7aiM6B3T7Hs9Q3Ig032HhLHTaQzcg X-Received: from gnomeregan.cam.corp.google.com ([2620:15c:6:412:f130:19b6:8678:c9d1]) (user=brho job=sendgmr) by 2002:a25:c245:: with SMTP id s66mr32796965ybf.243.1641489654639; Thu, 06 Jan 2022 09:20:54 -0800 (PST) Date: Thu, 6 Jan 2022 12:20:39 -0500 In-Reply-To: <20220106172041.522167-1-brho@google.com> Message-Id: <20220106172041.522167-2-brho@google.com> Mime-Version: 1.0 References: <20220106172041.522167-1-brho@google.com> X-Mailer: git-send-email 2.34.1.448.ga2b2bfdf31-goog Subject: [PATCH v3 1/3] setpriority: only grab the tasklist_lock for PRIO_PGRP From: Barret Rhoden To: ebiederm@xmission.com Cc: Christian Brauner , Andrew Morton , Alexey Gladkov , William Cohen , Viresh Kumar , Alexey Dobriyan , Chris Hyser , Peter Collingbourne , Xiaofeng Cao , David Hildenbrand , Cyrill Gorcunov , linux-kernel@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The tasklist_lock is necessary only for PRIO_PGRP for both setpriority() and getpriority(). Unnecessarily grabbing the tasklist_lock can be a scalability bottleneck for workloads that also must grab the tasklist_lock for waiting, killing, and cloning. This change resulted in a 12% speedup on a microbenchmark where parents kill and wait on their children, and children getpriority, setpriority, and getrlimit. Signed-off-by: Barret Rhoden --- kernel/sys.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 8fdac0d90504..558e52fa5bbd 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -220,7 +220,6 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int,= niceval) niceval =3D MAX_NICE; =20 rcu_read_lock(); - read_lock(&tasklist_lock); switch (which) { case PRIO_PROCESS: if (who) @@ -231,6 +230,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int,= niceval) error =3D set_one_prio(p, niceval, error); break; case PRIO_PGRP: + read_lock(&tasklist_lock); if (who) pgrp =3D find_vpid(who); else @@ -238,6 +238,7 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int,= niceval) do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { error =3D set_one_prio(p, niceval, error); } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); + read_unlock(&tasklist_lock); break; case PRIO_USER: uid =3D make_kuid(cred->user_ns, who); @@ -258,7 +259,6 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int,= niceval) break; } out_unlock: - read_unlock(&tasklist_lock); rcu_read_unlock(); out: return error; @@ -283,7 +283,6 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) return -EINVAL; =20 rcu_read_lock(); - read_lock(&tasklist_lock); switch (which) { case PRIO_PROCESS: if (who) @@ -297,6 +296,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) } break; case PRIO_PGRP: + read_lock(&tasklist_lock); if (who) pgrp =3D find_vpid(who); else @@ -306,6 +306,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) if (niceval > retval) retval =3D niceval; } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); + read_unlock(&tasklist_lock); break; case PRIO_USER: uid =3D make_kuid(cred->user_ns, who); @@ -329,7 +330,6 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who) break; } out_unlock: - read_unlock(&tasklist_lock); rcu_read_unlock(); =20 return retval; --=20 2.34.1.448.ga2b2bfdf31-goog From nobody Tue Jun 30 23:27:53 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 98526C433FE for ; Thu, 6 Jan 2022 17:21:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S241731AbiAFRVC (ORCPT ); Thu, 6 Jan 2022 12:21:02 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:39126 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S241684AbiAFRU5 (ORCPT ); Thu, 6 Jan 2022 12:20:57 -0500 Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id DD9BCC0611FF for ; Thu, 6 Jan 2022 09:20:56 -0800 (PST) Received: by mail-yb1-xb49.google.com with SMTP id b10-20020a251b0a000000b0060a7fbb7a64so6133586ybb.23 for ; Thu, 06 Jan 2022 09:20:56 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=VaVcglQQWQyd+D8xYsm++asUm3d2zSMstiXGMzeYRRg=; b=Hr6WeOoUTwdN4ffE+1b6oJc7c04qmGPJdYFf0sYZD1i9sdfRJczM+1F+Kfgnf+lg/W KWlsdvHQbnmd7MH4BT3RkNEKKZYntVTPT2vWczi/C+KRN7wYleegIKyEjw+V7sQ9DTBa Eg6nvFvrgO0PMFFEd5aDpVLm6mBK07R6SsKQQzoV/+RsiE8tWb+NBfcUV/hAWE+3zEx2 tZ7aPdpYNL8KZlXU0fQcvKIOfWQXA0WWV781zdRtCa0c5RdpvbjZW51Bq8BVgzBfbDwd bzNlxfzC0n95SzUBwU0LS2nAh0CCxYM2dUBmyIxP+hOQu+au14MCb2hcr+Usk9MR9D1c HWoA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=VaVcglQQWQyd+D8xYsm++asUm3d2zSMstiXGMzeYRRg=; b=6jBD/rSP5xfENgYG9VfbETb/IEDh8ELe+cC0lK0Dg67PR5LX42rZvkKt8k73TeoVPy 3q/65BDg217nnPx3mB4pb2qPKBW5LN36a3pVsPMuIxw8IujeYx4HnOtDgz9VWnqP14CZ AVLlYptHanNGQPEjyTI6moitLRXl4Pn4A40esD6NiAjPOOrc99c4W6Ll7L7puvfaIj57 nv/cwm/sUmYLEOW87ItZPD+A8WVgs/8dkkCFa1NdfdAsbzhqwDLeIEg9nbrN4G2i/MEA BBm7hMUqQMRpXxQqSRAWt6397Ryuf8MVHmp9oCXelfhHXylRWJ4QK31YPs3XZbNGuFis QlhQ== X-Gm-Message-State: AOAM531J9/I2alsGrco5eE65evbEkh2tfbPtaAOqu1KqSbVQiqbCVFSW qtKO1BkSSl8v1p1+kThlfinR6gp+ X-Google-Smtp-Source: ABdhPJxH4wialPDTtm2tQJ8QBYOiePs4jx8BQYI3Lan0gm90jVSj+a/A8LSxPT+6XdO30SQsiWBFyYSN X-Received: from gnomeregan.cam.corp.google.com ([2620:15c:6:412:f130:19b6:8678:c9d1]) (user=brho job=sendgmr) by 2002:a5b:74b:: with SMTP id s11mr22180493ybq.90.1641489656140; Thu, 06 Jan 2022 09:20:56 -0800 (PST) Date: Thu, 6 Jan 2022 12:20:40 -0500 In-Reply-To: <20220106172041.522167-1-brho@google.com> Message-Id: <20220106172041.522167-3-brho@google.com> Mime-Version: 1.0 References: <20220106172041.522167-1-brho@google.com> X-Mailer: git-send-email 2.34.1.448.ga2b2bfdf31-goog Subject: [PATCH v3 2/3] prlimit: make do_prlimit() static From: Barret Rhoden To: ebiederm@xmission.com Cc: Christian Brauner , Andrew Morton , Alexey Gladkov , William Cohen , Viresh Kumar , Alexey Dobriyan , Chris Hyser , Peter Collingbourne , Xiaofeng Cao , David Hildenbrand , Cyrill Gorcunov , linux-kernel@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" There are no other callers in the kernel. Fixed up a comment format and whitespace issue when moving do_prlimit() higher in sys.c. Signed-off-by: Barret Rhoden --- include/linux/resource.h | 2 - kernel/sys.c | 116 ++++++++++++++++++++------------------- 2 files changed, 59 insertions(+), 59 deletions(-) diff --git a/include/linux/resource.h b/include/linux/resource.h index bdf491cbcab7..4fdbc0c3f315 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -8,7 +8,5 @@ struct task_struct; =20 void getrusage(struct task_struct *p, int who, struct rusage *ru); -int do_prlimit(struct task_struct *tsk, unsigned int resource, - struct rlimit *new_rlim, struct rlimit *old_rlim); =20 #endif diff --git a/kernel/sys.c b/kernel/sys.c index 558e52fa5bbd..fb2a5e7c0589 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1415,6 +1415,65 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, = int, len) return errno; } =20 +/* make sure you are allowed to change @tsk limits before calling this */ +static int do_prlimit(struct task_struct *tsk, unsigned int resource, + struct rlimit *new_rlim, struct rlimit *old_rlim) +{ + struct rlimit *rlim; + int retval =3D 0; + + if (resource >=3D RLIM_NLIMITS) + return -EINVAL; + if (new_rlim) { + if (new_rlim->rlim_cur > new_rlim->rlim_max) + return -EINVAL; + if (resource =3D=3D RLIMIT_NOFILE && + new_rlim->rlim_max > sysctl_nr_open) + return -EPERM; + } + + /* protect tsk->signal and tsk->sighand from disappearing */ + read_lock(&tasklist_lock); + if (!tsk->sighand) { + retval =3D -ESRCH; + goto out; + } + + rlim =3D tsk->signal->rlim + resource; + task_lock(tsk->group_leader); + if (new_rlim) { + /* + * Keep the capable check against init_user_ns until cgroups can + * contain all limits. + */ + if (new_rlim->rlim_max > rlim->rlim_max && + !capable(CAP_SYS_RESOURCE)) + retval =3D -EPERM; + if (!retval) + retval =3D security_task_setrlimit(tsk, resource, new_rlim); + } + if (!retval) { + if (old_rlim) + *old_rlim =3D *rlim; + if (new_rlim) + *rlim =3D *new_rlim; + } + task_unlock(tsk->group_leader); + + /* + * RLIMIT_CPU handling. Arm the posix CPU timer if the limit is not + * infinite. In case of RLIM_INFINITY the posix CPU timer code + * ignores the rlimit. + */ + if (!retval && new_rlim && resource =3D=3D RLIMIT_CPU && + new_rlim->rlim_cur !=3D RLIM_INFINITY && + IS_ENABLED(CONFIG_POSIX_TIMERS)) + update_rlimit_cpu(tsk, new_rlim->rlim_cur); +out: + read_unlock(&tasklist_lock); + return retval; +} + SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *,= rlim) { struct rlimit value; @@ -1558,63 +1617,6 @@ static void rlim64_to_rlim(const struct rlimit64 *rl= im64, struct rlimit *rlim) rlim->rlim_max =3D (unsigned long)rlim64->rlim_max; } =20 -/* make sure you are allowed to change @tsk limits before calling this */ -int do_prlimit(struct task_struct *tsk, unsigned int resource, - struct rlimit *new_rlim, struct rlimit *old_rlim) -{ - struct rlimit *rlim; - int retval =3D 0; - - if (resource >=3D RLIM_NLIMITS) - return -EINVAL; - if (new_rlim) { - if (new_rlim->rlim_cur > new_rlim->rlim_max) - return -EINVAL; - if (resource =3D=3D RLIMIT_NOFILE && - new_rlim->rlim_max > sysctl_nr_open) - return -EPERM; - } - - /* protect tsk->signal and tsk->sighand from disappearing */ - read_lock(&tasklist_lock); - if (!tsk->sighand) { - retval =3D -ESRCH; - goto out; - } - - rlim =3D tsk->signal->rlim + resource; - task_lock(tsk->group_leader); - if (new_rlim) { - /* Keep the capable check against init_user_ns until - cgroups can contain all limits */ - if (new_rlim->rlim_max > rlim->rlim_max && - !capable(CAP_SYS_RESOURCE)) - retval =3D -EPERM; - if (!retval) - retval =3D security_task_setrlimit(tsk, resource, new_rlim); - } - if (!retval) { - if (old_rlim) - *old_rlim =3D *rlim; - if (new_rlim) - *rlim =3D *new_rlim; - } - task_unlock(tsk->group_leader); - - /* - * RLIMIT_CPU handling. Arm the posix CPU timer if the limit is not - * infinite. In case of RLIM_INFINITY the posix CPU timer code - * ignores the rlimit. - */ - if (!retval && new_rlim && resource =3D=3D RLIMIT_CPU && - new_rlim->rlim_cur !=3D RLIM_INFINITY && - IS_ENABLED(CONFIG_POSIX_TIMERS)) - update_rlimit_cpu(tsk, new_rlim->rlim_cur); -out: - read_unlock(&tasklist_lock); - return retval; -} - /* rcu lock must be held */ static int check_prlimit_permission(struct task_struct *task, unsigned int flags) --=20 2.34.1.448.ga2b2bfdf31-goog From nobody Tue Jun 30 23:27:53 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id BE5BBC433EF for ; Thu, 6 Jan 2022 17:21:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S241728AbiAFRVE (ORCPT ); Thu, 6 Jan 2022 12:21:04 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:39144 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S241708AbiAFRVA (ORCPT ); Thu, 6 Jan 2022 12:21:00 -0500 Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 84DF9C06118A for ; Thu, 6 Jan 2022 09:20:58 -0800 (PST) Received: by mail-yb1-xb4a.google.com with SMTP id b10-20020a251b0a000000b0060a7fbb7a64so6133691ybb.23 for ; Thu, 06 Jan 2022 09:20:58 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=DdZQv1doZLKYKHs3Sz1XKfB4tATz7avSalIIxv9XLzY=; b=DEesoxeTzQUQHNtWQiXl06nmGTd7LbJlQJALRjDgCmwAExVNv4Zxh6X/UQYKaEE5zZ 7PhV3ti8GM3/52z4+OU7pqo8w2Jt64CBSuMkK0RduVg4jQM/9VCXRsgVeFX7jDzeOTD0 oTrzf/VL+y84wRVuT4eMaFGCKWJt/vqva7N4s7SflExTNSJJa6gmGZdywaa7nmy3qCEc g9sC8n4/TuTf1tCgySOkccy567f0AT0gXn2xc5fWvbkJjyX/vzFjVfYPSnb/39gQsSnQ iLC/Mb355rYWFvmOnnt7mnAyHMSPNGGpV4jQRZzySHmM554CHZ3An7BV5xPmB8eI10D6 5i5A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=DdZQv1doZLKYKHs3Sz1XKfB4tATz7avSalIIxv9XLzY=; b=Og/2aYUADsVczKTG5Mg+GQ3A6yf7lzi3bULbifYtMnivbBoD/xorEBtHy7MLtQrORN AhAR0A5FcDpVtaz7VnHbEHKM3scVQKyk8uRFil/7Dcp29gz6h1hRi5ymx5b+QIcg2NqE wah5qoiKua8b6JgHx2NxLQBme0WevR/vCK2pF++omJW3YjhMOTlj2zNMWracw1/luqfR UmDRb/DBDic2JUJ+DWi4/aHUDm/smyKWgNv0MA6AkiIHgNlgnCgYJCfoPGDTGH1c/i2V naIEVNtHTeB67G8EGglFv9Xkb5bhhcKNWpLKX5CwqNRlA0VEW5ipWnnb3MRtktuqHUj3 dXOQ== X-Gm-Message-State: AOAM530KVe7G3WTATr2GNitDM64pb4yhyjarzK5wrgSAuVUsHCltu8Ns vYvqs0ZSJhycdKKHMcC1660Rqx5Z X-Google-Smtp-Source: ABdhPJw15q+XRsuRZYgyKZUfyKjmP2jedsN3meW+MT79Q4lLZzYHxFIEEoR12+DVM9ZenKNXT2I467HV X-Received: from gnomeregan.cam.corp.google.com ([2620:15c:6:412:f130:19b6:8678:c9d1]) (user=brho job=sendgmr) by 2002:a25:260d:: with SMTP id m13mr61028627ybm.29.1641489657787; Thu, 06 Jan 2022 09:20:57 -0800 (PST) Date: Thu, 6 Jan 2022 12:20:41 -0500 In-Reply-To: <20220106172041.522167-1-brho@google.com> Message-Id: <20220106172041.522167-4-brho@google.com> Mime-Version: 1.0 References: <20220106172041.522167-1-brho@google.com> X-Mailer: git-send-email 2.34.1.448.ga2b2bfdf31-goog Subject: [PATCH v3 3/3] prlimit: do not grab the tasklist_lock From: Barret Rhoden To: ebiederm@xmission.com Cc: Christian Brauner , Andrew Morton , Alexey Gladkov , William Cohen , Viresh Kumar , Alexey Dobriyan , Chris Hyser , Peter Collingbourne , Xiaofeng Cao , David Hildenbrand , Cyrill Gorcunov , linux-kernel@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Unnecessarily grabbing the tasklist_lock can be a scalability bottleneck for workloads that also must grab the tasklist_lock for waiting, killing, and cloning. The tasklist_lock was grabbed to protect tsk->sighand from disappearing (becoming NULL). tsk->signal was already protected by holding a reference to tsk. update_rlimit_cpu() assumed tsk->sighand !=3D NULL. With this commit, it attempts to lock_task_sighand(). However, this means that update_rlimit_cpu() can fail. This only happens when a task is exiting. Note that during exec, sighand may *change*, but it will not be NULL. Prior to this commit, the do_prlimit() ensured that update_rlimit_cpu() would not fail by read locking the tasklist_lock and checking tsk->sighand !=3D NULL. If update_rlimit_cpu() fails, there may be other tasks that are not exiting that share tsk->signal. However, the group_leader is the last task to be released, so if we cannot update_rlimit_cpu(group_leader), then the entire process is exiting. The only other caller of update_rlimit_cpu() is selinux_bprm_committing_creds(). It has tsk =3D=3D current, so update_rlimit_cpu() cannot fail (current->sighand cannot disappear until current exits). This change resulted in a 14% speedup on a microbenchmark where parents kill and wait on their children, and children getpriority, setpriority, and getrlimit. Signed-off-by: Barret Rhoden --- include/linux/posix-timers.h | 2 +- kernel/sys.c | 25 ++++++++++++++----------- kernel/time/posix-cpu-timers.c | 12 +++++++++--- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 5bbcd280bfd2..9cf126c3b27f 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -253,7 +253,7 @@ void posix_cpu_timers_exit_group(struct task_struct *ta= sk); void set_process_cpu_timer(struct task_struct *task, unsigned int clock_id= x, u64 *newval, u64 *oldval); =20 -void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); +int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); =20 void posixtimer_rearm(struct kernel_siginfo *info); #endif diff --git a/kernel/sys.c b/kernel/sys.c index fb2a5e7c0589..d155b21f4ba1 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1432,13 +1432,7 @@ static int do_prlimit(struct task_struct *tsk, unsig= ned int resource, return -EPERM; } =20 - /* protect tsk->signal and tsk->sighand from disappearing */ - read_lock(&tasklist_lock); - if (!tsk->sighand) { - retval =3D -ESRCH; - goto out; - } - + /* Holding a refcount on tsk protects tsk->signal from disappearing. */ rlim =3D tsk->signal->rlim + resource; task_lock(tsk->group_leader); if (new_rlim) { @@ -1467,10 +1461,19 @@ static int do_prlimit(struct task_struct *tsk, unsi= gned int resource, */ if (!retval && new_rlim && resource =3D=3D RLIMIT_CPU && new_rlim->rlim_cur !=3D RLIM_INFINITY && - IS_ENABLED(CONFIG_POSIX_TIMERS)) - update_rlimit_cpu(tsk, new_rlim->rlim_cur); -out: - read_unlock(&tasklist_lock); + IS_ENABLED(CONFIG_POSIX_TIMERS)) { + /* + * update_rlimit_cpu can fail if the task is exiting, but there + * may be other tasks in the thread group that are not exiting, + * and they need their cpu timers adjusted. + * + * The group_leader is the last task to be released, so if we + * cannot update_rlimit_cpu on it, then the entire process is + * exiting and we do not need to update at all. + */ + update_rlimit_cpu(tsk->group_leader, new_rlim->rlim_cur); + } + return retval; } =20 diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 96b4e7810426..e13e628509fb 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -34,14 +34,20 @@ void posix_cputimers_group_init(struct posix_cputimers = *pct, u64 cpu_limit) * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if * necessary. Needs siglock protection since other code may update the * expiration cache as well. + * + * Returns 0 on success, -ESRCH on failure. Can fail if the task is exiti= ng and + * we cannot lock_task_sighand. Cannot fail if task is current. */ -void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) +int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) { u64 nsecs =3D rlim_new * NSEC_PER_SEC; + unsigned long irq_fl; =20 - spin_lock_irq(&task->sighand->siglock); + if (!lock_task_sighand(task, &irq_fl)) + return -ESRCH; set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL); - spin_unlock_irq(&task->sighand->siglock); + unlock_task_sighand(task, &irq_fl); + return 0; } =20 /* --=20 2.34.1.448.ga2b2bfdf31-goog