From nobody Wed Dec 17 12:14:39 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 59652C4167B for ; Mon, 27 Nov 2023 04:20:50 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232131AbjK0EUi (ORCPT ); Sun, 26 Nov 2023 23:20:38 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:32898 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232003AbjK0EUf (ORCPT ); Sun, 26 Nov 2023 23:20:35 -0500 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 34729F0 for ; Sun, 26 Nov 2023 20:20:41 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1701058840; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=TAfo+PH8YddN+favmmuzXyUVuAnYnZIJF7f/6iibylI=; b=TsNLS/O8Wer970+GeF2HxroX+Y4qCdi8EXLzwnt+Tb+UYLlNVAwoiXjQR1g9tvKhOq/De/ 4M0fBFTiVXD8q7tVMbYJN2JHmy0ZWMmjOSLHZx1xTGUkw/VPyd4JVhvEOd91MT04b5XF0A +4VAzEM8pS81i1XBsc53mpa8/EcIRGI= Received: from mimecast-mx02.redhat.com (mx-ext.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-155-4Nw5EQhbM4S_rLoEBLAohg-1; Sun, 26 Nov 2023 23:20:35 -0500 X-MC-Unique: 4Nw5EQhbM4S_rLoEBLAohg-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 74BC11C0433B; Mon, 27 Nov 2023 04:20:35 +0000 (UTC) Received: from llong.com (unknown [10.22.32.84]) by smtp.corp.redhat.com (Postfix) with ESMTP id C6A2D10E46; Mon, 27 Nov 2023 04:20:34 +0000 (UTC) From: Waiman Long To: Tejun Heo , Zefan Li , Johannes Weiner , Andrew Morton , Michal Hocko , Frederic Weisbecker Cc: cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, Mrunal Patel , Ryan Phillips , Brent Rowsell , Peter Hunt , Waiman Long Subject: [PATCH-cgroup 1/2] cgroup/cpuset: Make callback_lock a raw_spinlock_t Date: Sun, 26 Nov 2023 23:19:55 -0500 Message-Id: <20231127041956.266026-2-longman@redhat.com> In-Reply-To: <20231127041956.266026-1-longman@redhat.com> References: <20231127041956.266026-1-longman@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.5 Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" All the callback_lock critical sections are pretty small and there shouldn't be much contention on that lock. Make it a raw_spinlock_t to avoid additional locking overhead on PREEMPT_RT kernel. Signed-off-by: Waiman Long --- kernel/cgroup/cpuset.c | 102 ++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 2a16df86c55c..e34bbb0e2f24 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -445,7 +445,7 @@ void cpuset_unlock(void) mutex_unlock(&cpuset_mutex); } =20 -static DEFINE_SPINLOCK(callback_lock); +static DEFINE_RAW_SPINLOCK(callback_lock); =20 static struct workqueue_struct *cpuset_migrate_mm_wq; =20 @@ -1588,7 +1588,7 @@ static int remote_partition_enable(struct cpuset *cs,= int new_prs, cpumask_subset(top_cpuset.effective_cpus, tmp->new_cpus)) return 0; =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); isolcpus_updated =3D partition_xcpus_add(new_prs, NULL, tmp->new_cpus); list_add(&cs->remote_sibling, &remote_children); if (cs->use_parent_ecpus) { @@ -1597,7 +1597,7 @@ static int remote_partition_enable(struct cpuset *cs,= int new_prs, cs->use_parent_ecpus =3D false; parent->child_ecpus_count--; } - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(isolcpus_updated); =20 /* @@ -1625,7 +1625,7 @@ static void remote_partition_disable(struct cpuset *c= s, struct tmpmasks *tmp) WARN_ON_ONCE(!is_remote_partition(cs)); WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus)); =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); list_del_init(&cs->remote_sibling); isolcpus_updated =3D partition_xcpus_del(cs->partition_root_state, NULL, tmp->new_cpus); @@ -1633,7 +1633,7 @@ static void remote_partition_disable(struct cpuset *c= s, struct tmpmasks *tmp) if (!cs->prs_err) cs->prs_err =3D PERR_INVCPUS; reset_partition_data(cs); - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(isolcpus_updated); =20 /* @@ -1680,12 +1680,12 @@ static void remote_cpus_update(struct cpuset *cs, s= truct cpumask *newmask, cpumask_subset(top_cpuset.effective_cpus, tmp->addmask))) goto invalidate; =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); if (adding) isolcpus_updated +=3D partition_xcpus_add(prs, NULL, tmp->addmask); if (deleting) isolcpus_updated +=3D partition_xcpus_del(prs, NULL, tmp->delmask); - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(isolcpus_updated); =20 /* @@ -2034,7 +2034,7 @@ static int update_parent_effective_cpumask(struct cpu= set *cs, int cmd, * Newly added CPUs will be removed from effective_cpus and * newly deleted ones will be added back to effective_cpus. */ - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); if (old_prs !=3D new_prs) { cs->partition_root_state =3D new_prs; if (new_prs <=3D 0) @@ -2055,7 +2055,7 @@ static int update_parent_effective_cpumask(struct cpu= set *cs, int cmd, parent->nr_subparts +=3D subparts_delta; WARN_ON_ONCE(parent->nr_subparts < 0); } - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(isolcpus_updated); =20 if ((old_prs !=3D new_prs) && (cmd =3D=3D partcmd_update)) @@ -2134,11 +2134,11 @@ static void compute_partition_effective_cpumask(str= uct cpuset *cs, /* * Invalidate child partition */ - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); make_partition_invalid(child); cs->nr_subparts--; child->nr_subparts =3D 0; - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); notify_partition_change(child, old_prs); continue; } @@ -2195,9 +2195,9 @@ static void update_cpumasks_hier(struct cpuset *cs, s= truct tmpmasks *tmp, * The case when exclusive_cpus isn't set is handled later. */ if (!cpumask_empty(cp->exclusive_cpus) && (cp !=3D cs)) { - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); compute_effective_exclusive_cpumask(cp, NULL); - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); } =20 old_prs =3D new_prs =3D cp->partition_root_state; @@ -2295,7 +2295,7 @@ static void update_cpumasks_hier(struct cpuset *cs, s= truct tmpmasks *tmp, new_prs =3D cp->partition_root_state; } =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); cpumask_copy(cp->effective_cpus, tmp->new_cpus); cp->partition_root_state =3D new_prs; /* @@ -2307,7 +2307,7 @@ static void update_cpumasks_hier(struct cpuset *cs, s= truct tmpmasks *tmp, cp->cpus_allowed, parent->effective_xcpus); else if (new_prs < 0) reset_partition_data(cp); - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); =20 notify_partition_change(cp, old_prs); =20 @@ -2536,12 +2536,12 @@ static int update_cpumask(struct cpuset *cs, struct= cpuset *trialcs, trialcs->effective_cpus, &tmp); } =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); cpumask_copy(cs->effective_xcpus, trialcs->effective_xcpus); if ((old_prs > 0) && !is_partition_valid(cs)) reset_partition_data(cs); - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); =20 /* effective_cpus/effective_xcpus will be updated here */ update_cpumasks_hier(cs, &tmp, hier_flags); @@ -2636,12 +2636,12 @@ static int update_exclusive_cpumask(struct cpuset *= cs, struct cpuset *trialcs, remote_partition_check(cs, trialcs->effective_xcpus, trialcs->effective_cpus, &tmp); } - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); cpumask_copy(cs->exclusive_cpus, trialcs->exclusive_cpus); cpumask_copy(cs->effective_xcpus, trialcs->effective_xcpus); if ((old_prs > 0) && !is_partition_valid(cs)) reset_partition_data(cs); - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); =20 /* * Call update_cpumasks_hier() to update effective_cpus/effective_xcpus @@ -2841,9 +2841,9 @@ static void update_nodemasks_hier(struct cpuset *cs, = nodemask_t *new_mems) continue; rcu_read_unlock(); =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); cp->effective_mems =3D *new_mems; - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); =20 WARN_ON(!is_in_v2_mode() && !nodes_equal(cp->mems_allowed, cp->effective_mems)); @@ -2913,9 +2913,9 @@ static int update_nodemask(struct cpuset *cs, struct = cpuset *trialcs, =20 check_insane_mems_config(&trialcs->mems_allowed); =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); cs->mems_allowed =3D trialcs->mems_allowed; - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); =20 /* use trialcs->mems_allowed as a temp variable */ update_nodemasks_hier(cs, &trialcs->mems_allowed); @@ -3006,9 +3006,9 @@ static int update_flag(cpuset_flagbits_t bit, struct = cpuset *cs, spread_flag_changed =3D ((is_spread_slab(cs) !=3D is_spread_slab(trialcs)) || (is_spread_page(cs) !=3D is_spread_page(trialcs))); =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); cs->flags =3D trialcs->flags; - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); =20 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) rebuild_sched_domains_locked(); @@ -3052,10 +3052,10 @@ static int update_prstate(struct cpuset *cs, int ne= w_prs) * later if partition becomes invalid. */ if ((new_prs > 0) && cpumask_empty(cs->exclusive_cpus)) { - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); cpumask_and(cs->effective_xcpus, cs->cpus_allowed, parent->effective_xcpus); - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); } =20 err =3D update_partition_exclusive(cs, new_prs); @@ -3112,14 +3112,14 @@ static int update_prstate(struct cpuset *cs, int ne= w_prs) update_partition_exclusive(cs, new_prs); } =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); cs->partition_root_state =3D new_prs; WRITE_ONCE(cs->prs_err, err); if (!is_partition_valid(cs)) reset_partition_data(cs); else if (new_xcpus_state) partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus); - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(new_xcpus_state); =20 /* Force update if switching back to member */ @@ -3650,7 +3650,7 @@ static int cpuset_common_seq_show(struct seq_file *sf= , void *v) cpuset_filetype_t type =3D seq_cft(sf)->private; int ret =3D 0; =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); =20 switch (type) { case FILE_CPULIST: @@ -3681,7 +3681,7 @@ static int cpuset_common_seq_show(struct seq_file *sf= , void *v) ret =3D -EINVAL; } =20 - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); return ret; } =20 @@ -4042,7 +4042,7 @@ static int cpuset_css_online(struct cgroup_subsys_sta= te *css) =20 cpuset_inc(); =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); if (is_in_v2_mode()) { cpumask_copy(cs->effective_cpus, parent->effective_cpus); cs->effective_mems =3D parent->effective_mems; @@ -4062,7 +4062,7 @@ static int cpuset_css_online(struct cgroup_subsys_sta= te *css) !is_sched_load_balance(parent)) clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); =20 - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); =20 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; @@ -4089,12 +4089,12 @@ static int cpuset_css_online(struct cgroup_subsys_s= tate *css) } rcu_read_unlock(); =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); cs->mems_allowed =3D parent->mems_allowed; cs->effective_mems =3D parent->mems_allowed; cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); cpumask_copy(cs->effective_cpus, parent->cpus_allowed); - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); out_unlock: mutex_unlock(&cpuset_mutex); cpus_read_unlock(); @@ -4150,7 +4150,7 @@ static void cpuset_css_free(struct cgroup_subsys_stat= e *css) static void cpuset_bind(struct cgroup_subsys_state *root_css) { mutex_lock(&cpuset_mutex); - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); =20 if (is_in_v2_mode()) { cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); @@ -4162,7 +4162,7 @@ static void cpuset_bind(struct cgroup_subsys_state *r= oot_css) top_cpuset.mems_allowed =3D top_cpuset.effective_mems; } =20 - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); mutex_unlock(&cpuset_mutex); } =20 @@ -4349,12 +4349,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs, { bool is_empty; =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); cpumask_copy(cs->cpus_allowed, new_cpus); cpumask_copy(cs->effective_cpus, new_cpus); cs->mems_allowed =3D *new_mems; cs->effective_mems =3D *new_mems; - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); =20 /* * Don't call update_tasks_cpumask() if the cpuset becomes empty, @@ -4391,10 +4391,10 @@ hotplug_update_tasks(struct cpuset *cs, if (nodes_empty(*new_mems)) *new_mems =3D parent_cs(cs)->effective_mems; =20 - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); cpumask_copy(cs->effective_cpus, new_cpus); cs->effective_mems =3D *new_mems; - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); =20 if (cpus_updated) update_tasks_cpumask(cs, new_cpus); @@ -4597,7 +4597,7 @@ static void cpuset_hotplug_workfn(struct work_struct = *work) =20 /* For v1, synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); if (!on_dfl) cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); /* @@ -4616,17 +4616,17 @@ static void cpuset_hotplug_workfn(struct work_struc= t *work) } } cpumask_copy(top_cpuset.effective_cpus, &new_cpus); - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); /* we don't mess with cpumasks of tasks in top_cpuset */ } =20 /* synchronize mems_allowed to N_MEMORY */ if (mems_updated) { - spin_lock_irq(&callback_lock); + raw_spin_lock_irq(&callback_lock); if (!on_dfl) top_cpuset.mems_allowed =3D new_mems; top_cpuset.effective_mems =3D new_mems; - spin_unlock_irq(&callback_lock); + raw_spin_unlock_irq(&callback_lock); update_tasks_nodemask(&top_cpuset); } =20 @@ -4726,7 +4726,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, str= uct cpumask *pmask) unsigned long flags; struct cpuset *cs; =20 - spin_lock_irqsave(&callback_lock, flags); + raw_spin_lock_irqsave(&callback_lock, flags); rcu_read_lock(); =20 cs =3D task_cs(tsk); @@ -4750,7 +4750,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, str= uct cpumask *pmask) } =20 rcu_read_unlock(); - spin_unlock_irqrestore(&callback_lock, flags); + raw_spin_unlock_irqrestore(&callback_lock, flags); } =20 /** @@ -4821,11 +4821,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *= tsk) nodemask_t mask; unsigned long flags; =20 - spin_lock_irqsave(&callback_lock, flags); + raw_spin_lock_irqsave(&callback_lock, flags); rcu_read_lock(); guarantee_online_mems(task_cs(tsk), &mask); rcu_read_unlock(); - spin_unlock_irqrestore(&callback_lock, flags); + raw_spin_unlock_irqrestore(&callback_lock, flags); =20 return mask; } @@ -4917,14 +4917,14 @@ bool cpuset_node_allowed(int node, gfp_t gfp_mask) return true; =20 /* Not hardwall and node outside mems_allowed: scan up cpusets */ - spin_lock_irqsave(&callback_lock, flags); + raw_spin_lock_irqsave(&callback_lock, flags); =20 rcu_read_lock(); cs =3D nearest_hardwall_ancestor(task_cs(current)); allowed =3D node_isset(node, cs->mems_allowed); rcu_read_unlock(); =20 - spin_unlock_irqrestore(&callback_lock, flags); + raw_spin_unlock_irqrestore(&callback_lock, flags); return allowed; } =20 --=20 2.39.3 From nobody Wed Dec 17 12:14:39 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6F02DC46CA0 for ; Mon, 27 Nov 2023 04:20:50 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232003AbjK0EUl (ORCPT ); Sun, 26 Nov 2023 23:20:41 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:32910 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229480AbjK0EUg (ORCPT ); Sun, 26 Nov 2023 23:20:36 -0500 Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 217E4131 for ; Sun, 26 Nov 2023 20:20:43 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1701058842; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=pKVtOlZpPOEAj5lNTi1dWbItOap9Z0LKbf6VYUUDvbg=; b=FMTtt8QMywmnEf596PEhyNl5rUB3V738/i5GtPktNKAmaZTD/gkrgtzvkQ3weNorTuhSA/ slb3CCTF8k24e96sKLSX5AsQDnIizvrBu1v70T3KzQmY5CMEh2wO9G85IbRXw3VkUqLuIq Cdyn9IyMjcc+lz/eNCbWZ0ADa8dgcKk= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-435-j3AE3EJcPLitu_hjDmnbBg-1; Sun, 26 Nov 2023 23:20:36 -0500 X-MC-Unique: j3AE3EJcPLitu_hjDmnbBg-1 Received: from smtp.corp.redhat.com (int-mx05.intmail.prod.int.rdu2.redhat.com [10.11.54.5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 28C3981D8A1; Mon, 27 Nov 2023 04:20:36 +0000 (UTC) Received: from llong.com (unknown [10.22.32.84]) by smtp.corp.redhat.com (Postfix) with ESMTP id 8543410EA1; Mon, 27 Nov 2023 04:20:35 +0000 (UTC) From: Waiman Long To: Tejun Heo , Zefan Li , Johannes Weiner , Andrew Morton , Michal Hocko , Frederic Weisbecker Cc: cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, Mrunal Patel , Ryan Phillips , Brent Rowsell , Peter Hunt , Waiman Long Subject: [PATCH-cgroup 2/2] cgroup/cpuset: Include isolated cpuset CPUs in cpu_is_isolated() check Date: Sun, 26 Nov 2023 23:19:56 -0500 Message-Id: <20231127041956.266026-3-longman@redhat.com> In-Reply-To: <20231127041956.266026-1-longman@redhat.com> References: <20231127041956.266026-1-longman@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.5 Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Currently, the cpu_is_isolated() function checks only the statically isolated CPUs specified via the "isolcpus" and "nohz_full" kernel command line options. This function is used by vmstat and memcg to reduce interference with isolated CPUs by not doing stat flushing or scheduling works on those CPUs. Workloads running on isolated CPUs within isolated cpuset partitions should receive the same treatment to reduce unnecessary interference. This patch introduces a new cpuset_cpu_is_isolated() function to be called by cpu_is_isolated() so that the set of dynamically created cpuset isolated CPUs will be included in the check. To minimize overhead of calling cpuset_cpu_is_isolated(), a seqcount is used to protect read access of the isolated cpumask without taking the cpuset_mutex or callback_lock. Signed-off-by: Waiman Long --- include/linux/cpuset.h | 6 ++++++ include/linux/sched/isolation.h | 4 +++- kernel/cgroup/cpuset.c | 25 +++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index d629094fac6e..875d12598bd2 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -77,6 +77,7 @@ extern void cpuset_lock(void); extern void cpuset_unlock(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mas= k); extern bool cpuset_cpus_allowed_fallback(struct task_struct *p); +extern bool cpuset_cpu_is_isolated(int cpu); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); @@ -207,6 +208,11 @@ static inline bool cpuset_cpus_allowed_fallback(struct= task_struct *p) return false; } =20 +static inline bool cpuset_cpu_is_isolated(int cpu) +{ + return false; +} + static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) { return node_possible_map; diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolatio= n.h index fe1a46f30d24..2b461129d1fa 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -2,6 +2,7 @@ #define _LINUX_SCHED_ISOLATION_H =20 #include +#include #include #include =20 @@ -67,7 +68,8 @@ static inline bool housekeeping_cpu(int cpu, enum hk_type= type) static inline bool cpu_is_isolated(int cpu) { return !housekeeping_test_cpu(cpu, HK_TYPE_DOMAIN) || - !housekeeping_test_cpu(cpu, HK_TYPE_TICK); + !housekeeping_test_cpu(cpu, HK_TYPE_TICK) || + cpuset_cpu_is_isolated(cpu); } =20 #endif /* _LINUX_SCHED_ISOLATION_H */ diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index e34bbb0e2f24..4adb6d2209ca 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -208,8 +208,13 @@ static cpumask_var_t subpartitions_cpus; =20 /* * Exclusive CPUs in isolated partitions + * + * The isolcpus_seq is used to protect read access to isolated_cpus without + * taking callback_lock or cpuset_mutex while write access requires taking + * both cpuset_mutex and callback_lock. */ static cpumask_var_t isolated_cpus; +static seqcount_t isolcpus_seq =3D SEQCNT_ZERO(isolcpus_seq); =20 /* List of remote partition root children */ static struct list_head remote_children; @@ -1435,10 +1440,12 @@ static void reset_partition_data(struct cpuset *cs) static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpum= ask *xcpus) { WARN_ON_ONCE(old_prs =3D=3D new_prs); + write_seqcount_begin(&isolcpus_seq); if (new_prs =3D=3D PRS_ISOLATED) cpumask_or(isolated_cpus, isolated_cpus, xcpus); else cpumask_andnot(isolated_cpus, isolated_cpus, xcpus); + write_seqcount_end(&isolcpus_seq); } =20 /* @@ -1518,6 +1525,24 @@ static void update_unbound_workqueue_cpumask(bool is= olcpus_updated) WARN_ON_ONCE(ret < 0); } =20 +/** + * cpuset_cpu_is_isolated - Check if the given CPU is isolated + * @cpu: the CPU number to be checked + * Return: true if CPU is used in an isolated partition, false otherwise + */ +bool cpuset_cpu_is_isolated(int cpu) +{ + unsigned int seq; + bool ret; + + do { + seq =3D read_seqcount_begin(&isolcpus_seq); + ret =3D cpumask_test_cpu(cpu, isolated_cpus); + } while (read_seqcount_retry(&isolcpus_seq, seq)); + return ret; +} +EXPORT_SYMBOL_GPL(cpuset_cpu_is_isolated); + /* * compute_effective_exclusive_cpumask - compute effective exclusive CPUs * @cs: cpuset --=20 2.39.3