[v1] some optimization for cpuset

[PATCH -next 4/4] cpuset: replace cpuset_lock() with guard_cpuset()

Posted by Chen Ridong 6 months ago

From: Chen Ridong <chenridong@huawei.com>

The guard_cpuset() is introduced, we can replace all cpuset_lock()
usage with it and remove the cpuset_lock().

Signed-off-by: Chen Ridong <chenridong@huawei.com>
---
 include/linux/cpuset.h  |  2 --
 kernel/cgroup/cpuset.c  | 10 ----------
 kernel/sched/syscalls.c | 15 +++++----------
 3 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 6153de28acf0..1baf12f4be19 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -72,8 +72,6 @@ extern void cpuset_force_rebuild(void);
 extern void cpuset_update_active_cpus(void);
 extern void inc_dl_tasks_cs(struct task_struct *task);
 extern void dec_dl_tasks_cs(struct task_struct *task);
-extern void cpuset_lock(void);
-extern void cpuset_unlock(void);
 extern void guard_cpuset(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
 extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 110d2b93ff96..04ed73d0887e 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -250,16 +250,6 @@ static struct cpuset top_cpuset = {
 
 static DEFINE_MUTEX(cpuset_mutex);
 
-void cpuset_lock(void)
-{
-	mutex_lock(&cpuset_mutex);
-}
-
-void cpuset_unlock(void)
-{
-	mutex_unlock(&cpuset_mutex);
-}
-
 void guard_cpuset(void)
 {
 	guard(mutex)(&cpuset_mutex);
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index 77ae87f36e84..954f6e9af41b 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -577,8 +577,10 @@ int __sched_setscheduler(struct task_struct *p,
 	 * information.
 	 */
 	if (dl_policy(policy) || dl_policy(p->policy)) {
-		cpuset_locked = true;
-		cpuset_lock();
+		if (!cpuset_locked) {
+			guard_cpuset();
+			cpuset_locked = true;
+		}
 	}
 
 	/*
@@ -660,8 +662,6 @@ int __sched_setscheduler(struct task_struct *p,
 	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
 		policy = oldpolicy = -1;
 		task_rq_unlock(rq, p, &rf);
-		if (cpuset_locked)
-			cpuset_unlock();
 		goto recheck;
 	}
 
@@ -733,11 +733,8 @@ int __sched_setscheduler(struct task_struct *p,
 	head = splice_balance_callbacks(rq);
 	task_rq_unlock(rq, p, &rf);
 
-	if (pi) {
-		if (cpuset_locked)
-			cpuset_unlock();
+	if (pi)
 		rt_mutex_adjust_pi(p);
-	}
 
 	/* Run balance callbacks after we've adjusted the PI chain: */
 	balance_callbacks(rq, head);
@@ -747,8 +744,6 @@ int __sched_setscheduler(struct task_struct *p,
 
 unlock:
 	task_rq_unlock(rq, p, &rf);
-	if (cpuset_locked)
-		cpuset_unlock();
 	return retval;
 }
 
-- 
2.34.1

Re: [PATCH -next 4/4] cpuset: replace cpuset_lock() with guard_cpuset()

Posted by kernel test robot 6 months ago

Hi Chen,

kernel test robot noticed the following build errors:

[auto build test ERROR on next-20250808]

url:    https://github.com/intel-lab-lkp/linux/commits/Chen-Ridong/cpuset-remove-redundant-CS_ONLINE-flag/20250808-174245
base:   next-20250808
patch link:    https://lore.kernel.org/r/20250808092515.764820-5-chenridong%40huaweicloud.com
patch subject: [PATCH -next 4/4] cpuset: replace cpuset_lock() with guard_cpuset()
config: x86_64-buildonly-randconfig-001-20250809 (https://download.01.org/0day-ci/archive/20250809/202508090557.XjdGVjX4-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14+deb12u1) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250809/202508090557.XjdGVjX4-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202508090557.XjdGVjX4-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from kernel/sched/build_policy.c:65:
   kernel/sched/syscalls.c: In function '__sched_setscheduler':
>> kernel/sched/syscalls.c:581:25: error: implicit declaration of function 'guard_cpuset' [-Werror=implicit-function-declaration]
     581 |                         guard_cpuset();
         |                         ^~~~~~~~~~~~
   In file included from kernel/sched/build_policy.c:52:
   kernel/sched/rt.c: At top level:
   kernel/sched/rt.c:12:18: warning: 'max_rt_runtime' defined but not used [-Wunused-const-variable=]
      12 | static const u64 max_rt_runtime = MAX_BW;
         |                  ^~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +/guard_cpuset +581 kernel/sched/syscalls.c

   512	
   513	int __sched_setscheduler(struct task_struct *p,
   514				 const struct sched_attr *attr,
   515				 bool user, bool pi)
   516	{
   517		int oldpolicy = -1, policy = attr->sched_policy;
   518		int retval, oldprio, newprio, queued, running;
   519		const struct sched_class *prev_class, *next_class;
   520		struct balance_callback *head;
   521		struct rq_flags rf;
   522		int reset_on_fork;
   523		int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
   524		struct rq *rq;
   525		bool cpuset_locked = false;
   526	
   527		/* The pi code expects interrupts enabled */
   528		BUG_ON(pi && in_interrupt());
   529	recheck:
   530		/* Double check policy once rq lock held: */
   531		if (policy < 0) {
   532			reset_on_fork = p->sched_reset_on_fork;
   533			policy = oldpolicy = p->policy;
   534		} else {
   535			reset_on_fork = !!(attr->sched_flags & SCHED_FLAG_RESET_ON_FORK);
   536	
   537			if (!valid_policy(policy))
   538				return -EINVAL;
   539		}
   540	
   541		if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV))
   542			return -EINVAL;
   543	
   544		/*
   545		 * Valid priorities for SCHED_FIFO and SCHED_RR are
   546		 * 1..MAX_RT_PRIO-1, valid priority for SCHED_NORMAL,
   547		 * SCHED_BATCH and SCHED_IDLE is 0.
   548		 */
   549		if (attr->sched_priority > MAX_RT_PRIO-1)
   550			return -EINVAL;
   551		if ((dl_policy(policy) && !__checkparam_dl(attr)) ||
   552		    (rt_policy(policy) != (attr->sched_priority != 0)))
   553			return -EINVAL;
   554	
   555		if (user) {
   556			retval = user_check_sched_setscheduler(p, attr, policy, reset_on_fork);
   557			if (retval)
   558				return retval;
   559	
   560			if (attr->sched_flags & SCHED_FLAG_SUGOV)
   561				return -EINVAL;
   562	
   563			retval = security_task_setscheduler(p);
   564			if (retval)
   565				return retval;
   566		}
   567	
   568		/* Update task specific "requested" clamps */
   569		if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
   570			retval = uclamp_validate(p, attr);
   571			if (retval)
   572				return retval;
   573		}
   574	
   575		/*
   576		 * SCHED_DEADLINE bandwidth accounting relies on stable cpusets
   577		 * information.
   578		 */
   579		if (dl_policy(policy) || dl_policy(p->policy)) {
   580			if (!cpuset_locked) {
 > 581				guard_cpuset();
   582				cpuset_locked = true;
   583			}
   584		}
   585	
   586		/*
   587		 * Make sure no PI-waiters arrive (or leave) while we are
   588		 * changing the priority of the task:
   589		 *
   590		 * To be able to change p->policy safely, the appropriate
   591		 * runqueue lock must be held.
   592		 */
   593		rq = task_rq_lock(p, &rf);
   594		update_rq_clock(rq);
   595	
   596		/*
   597		 * Changing the policy of the stop threads its a very bad idea:
   598		 */
   599		if (p == rq->stop) {
   600			retval = -EINVAL;
   601			goto unlock;
   602		}
   603	
   604		retval = scx_check_setscheduler(p, policy);
   605		if (retval)
   606			goto unlock;
   607	
   608		/*
   609		 * If not changing anything there's no need to proceed further,
   610		 * but store a possible modification of reset_on_fork.
   611		 */
   612		if (unlikely(policy == p->policy)) {
   613			if (fair_policy(policy) &&
   614			    (attr->sched_nice != task_nice(p) ||
   615			     (attr->sched_runtime != p->se.slice)))
   616				goto change;
   617			if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
   618				goto change;
   619			if (dl_policy(policy) && dl_param_changed(p, attr))
   620				goto change;
   621			if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
   622				goto change;
   623	
   624			p->sched_reset_on_fork = reset_on_fork;
   625			retval = 0;
   626			goto unlock;
   627		}
   628	change:
   629	
   630		if (user) {
   631	#ifdef CONFIG_RT_GROUP_SCHED
   632			/*
   633			 * Do not allow real-time tasks into groups that have no runtime
   634			 * assigned.
   635			 */
   636			if (rt_group_sched_enabled() &&
   637					rt_bandwidth_enabled() && rt_policy(policy) &&
   638					task_group(p)->rt_bandwidth.rt_runtime == 0 &&
   639					!task_group_is_autogroup(task_group(p))) {
   640				retval = -EPERM;
   641				goto unlock;
   642			}
   643	#endif /* CONFIG_RT_GROUP_SCHED */
   644			if (dl_bandwidth_enabled() && dl_policy(policy) &&
   645					!(attr->sched_flags & SCHED_FLAG_SUGOV)) {
   646				cpumask_t *span = rq->rd->span;
   647	
   648				/*
   649				 * Don't allow tasks with an affinity mask smaller than
   650				 * the entire root_domain to become SCHED_DEADLINE. We
   651				 * will also fail if there's no bandwidth available.
   652				 */
   653				if (!cpumask_subset(span, p->cpus_ptr) ||
   654				    rq->rd->dl_bw.bw == 0) {
   655					retval = -EPERM;
   656					goto unlock;
   657				}
   658			}
   659		}
   660	
   661		/* Re-check policy now with rq lock held: */
   662		if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
   663			policy = oldpolicy = -1;
   664			task_rq_unlock(rq, p, &rf);
   665			goto recheck;
   666		}
   667	
   668		/*
   669		 * If setscheduling to SCHED_DEADLINE (or changing the parameters
   670		 * of a SCHED_DEADLINE task) we need to check if enough bandwidth
   671		 * is available.
   672		 */
   673		if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) {
   674			retval = -EBUSY;
   675			goto unlock;
   676		}
   677	
   678		p->sched_reset_on_fork = reset_on_fork;
   679		oldprio = p->prio;
   680	
   681		newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice);
   682		if (pi) {
   683			/*
   684			 * Take priority boosted tasks into account. If the new
   685			 * effective priority is unchanged, we just store the new
   686			 * normal parameters and do not touch the scheduler class and
   687			 * the runqueue. This will be done when the task deboost
   688			 * itself.
   689			 */
   690			newprio = rt_effective_prio(p, newprio);
   691			if (newprio == oldprio)
   692				queue_flags &= ~DEQUEUE_MOVE;
   693		}
   694	
   695		prev_class = p->sched_class;
   696		next_class = __setscheduler_class(policy, newprio);
   697	
   698		if (prev_class != next_class && p->se.sched_delayed)
   699			dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
   700	
   701		queued = task_on_rq_queued(p);
   702		running = task_current_donor(rq, p);
   703		if (queued)
   704			dequeue_task(rq, p, queue_flags);
   705		if (running)
   706			put_prev_task(rq, p);
   707	
   708		if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
   709			__setscheduler_params(p, attr);
   710			p->sched_class = next_class;
   711			p->prio = newprio;
   712		}
   713		__setscheduler_uclamp(p, attr);
   714		check_class_changing(rq, p, prev_class);
   715	
   716		if (queued) {
   717			/*
   718			 * We enqueue to tail when the priority of a task is
   719			 * increased (user space view).
   720			 */
   721			if (oldprio < p->prio)
   722				queue_flags |= ENQUEUE_HEAD;
   723	
   724			enqueue_task(rq, p, queue_flags);
   725		}
   726		if (running)
   727			set_next_task(rq, p);
   728	
   729		check_class_changed(rq, p, prev_class, oldprio);
   730	
   731		/* Avoid rq from going away on us: */
   732		preempt_disable();
   733		head = splice_balance_callbacks(rq);
   734		task_rq_unlock(rq, p, &rf);
   735	
   736		if (pi)
   737			rt_mutex_adjust_pi(p);
   738	
   739		/* Run balance callbacks after we've adjusted the PI chain: */
   740		balance_callbacks(rq, head);
   741		preempt_enable();
   742	
   743		return 0;
   744	
   745	unlock:
   746		task_rq_unlock(rq, p, &rf);
   747		return retval;
   748	}
   749	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

[PATCH -next 1/4] cpuset: remove redundant CS_ONLINE flag
[PATCH -next 2/4] cpuset: add helpers for cpuset related locks
[PATCH -next 3/4] cpuset: use guard_cpus_read_and_cpuset to make code concise
[PATCH -next 4/4] cpuset: replace cpuset_lock() with guard_cpuset()