[PATCH -next RFC 09/16] cpuset: introduce local_partition_update()

Chen Ridong posted 16 patches 4 months, 2 weeks ago
There is a newer version of this series
[PATCH -next RFC 09/16] cpuset: introduce local_partition_update()
Posted by Chen Ridong 4 months, 2 weeks ago
From: Chen Ridong <chenridong@huawei.com>

Extend the partition_update() infrastructure to handle local partition
updates.

The local_partition_update() function replaces the command partcmd_update
previously handled within update_parent_effective_cpumask(). The update
logic follows a state-based approach:

1. Validation check: First verify if the local partition is currently valid
2. Invalidation handling: If the partition is invalid, trigger invalidation
3. State transition: If an invalid partition has no errors, transition to
   valid
4. cpus updates: For local partition that only cpu maks changes, use
   partition_update() to handle partition change.

With the introduction of this function, update_parent_effective_cpumask()
function is removed, simplifying the partition update code path and
creating a cleaner separation between local and remote partition
operations.

Signed-off-by: Chen Ridong <chenridong@huawei.com>
---
 kernel/cgroup/cpuset.c | 376 +++++++++++++----------------------------
 1 file changed, 122 insertions(+), 254 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index e460d03286ba..d0217db04b69 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1622,12 +1622,14 @@ static void partition_update(struct cpuset *cs, int prs, struct cpumask *xcpus,
 	bool isolcpus_updated;
 	bool excl_updated;
 	struct cpuset *parent;
+	int old_prs;
 
 	lockdep_assert_held(&cpuset_mutex);
 	WARN_ON_ONCE(!cpuset_v2());
 	WARN_ON_ONCE(prs <= 0);
 
 	parent = is_remote_partition(cs) ? NULL : parent_cs(cs);
+	old_prs = cs->partition_root_state;
 	excl_updated = !cpumask_empty(tmp->addmask) ||
 			!cpumask_empty(tmp->delmask);
 
@@ -1645,6 +1647,8 @@ static void partition_update(struct cpuset *cs, int prs, struct cpumask *xcpus,
 	update_unbound_workqueue_cpumask(isolcpus_updated);
 	if (excl_updated)
 		cpuset_force_rebuild();
+	update_partition_exclusive_flag(cs, prs);
+	notify_partition_change(cs, old_prs);
 }
 
 /*
@@ -1790,6 +1794,27 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
 	return false;
 }
 
+static bool cpuset_user_cpus_exclusive(struct cpuset *cs)
+{
+	struct cpuset *parent = parent_cs(cs);
+
+	struct cgroup_subsys_state *css;
+	struct cpuset *child;
+	bool exclusive = true;
+
+	rcu_read_lock();
+	cpuset_for_each_child(child, css, parent) {
+		if (child == cs)
+			continue;
+		if (!cpusets_are_exclusive(cs, child)) {
+			exclusive = false;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return exclusive;
+}
+
 /**
  * validate_partition - Validate a cpuset partition configuration
  * @cs: The cpuset to validate
@@ -1818,6 +1843,39 @@ static enum prs_errcode validate_partition(struct cpuset *cs, int new_prs,
 	return PERR_NONE;
 }
 
+/**
+ * local_partition_check - Validate for local partition
+ * @cs: Target cpuset to validate
+ * @new_prs: New partition root state to validate
+ * @excpus: New exclusive effectuve CPUs mask to validate
+ * @excl_check: Flag to enable exclusive CPUs ownership validation
+ *
+ * Return: PERR_NONE if validation passes, appropriate error code otherwise
+ *
+ * Important: The caller must ensure that @cs's cpu mask is updated before
+ * invoking this function when exclusive CPU validation is required.
+ */
+static enum prs_errcode local_partition_check(struct cpuset *cs, int new_prs,
+							 struct cpumask *excpus, bool excl_check)
+{
+	struct cpuset *parent = parent_cs(cs);
+
+	/*
+	 * The parent must be a partition root.
+	 * The new cpumask, if present, or the current cpus_allowed must
+	 * not be empty.
+	 */
+	if (!is_partition_valid(parent)) {
+		return is_partition_invalid(parent)
+			? PERR_INVPARENT : PERR_NOTPART;
+	}
+
+	if (excl_check && !cpuset_user_cpus_exclusive(cs))
+		return PERR_NOTEXCL;
+
+	return validate_partition(cs, new_prs, excpus);
+}
+
 /**
  * local_partition_enable - Enable local partition for a cpuset
  * @cs: Target cpuset to become a local partition root
@@ -1945,280 +2003,85 @@ static void local_partition_invalidate(struct cpuset *cs, struct tmpmasks *tmp)
 }
 
 /**
- * update_parent_effective_cpumask - update effective_cpus mask of parent cpuset
- * @cs:      The cpuset that requests change in partition root state
- * @cmd:     Partition root state change command
- * @newmask: Optional new cpumask for partcmd_update
- * @tmp:     Temporary addmask and delmask
- * Return:   0 or a partition root state error code
- *
- * For partcmd_enable*, the cpuset is being transformed from a non-partition
- * root to a partition root. The effective_xcpus (cpus_allowed if
- * effective_xcpus not set) mask of the given cpuset will be taken away from
- * parent's effective_cpus. The function will return 0 if all the CPUs listed
- * in effective_xcpus can be granted or an error code will be returned.
- *
- * For partcmd_disable, the cpuset is being transformed from a partition
- * root back to a non-partition root. Any CPUs in effective_xcpus will be
- * given back to parent's effective_cpus. 0 will always be returned.
+ * __local_partition_update - Update local CPU partition configuration
+ * @cs: Target cpuset to update
+ * @xcpus: New exclusive CPU mask
+ * @excpus: New effective exclusive CPU mask
+ * @tmp: Temporary mask storage for intermediate calculations
+ * @excl_check: Flag to enable exclusivity validation
  *
- * For partcmd_update, if the optional newmask is specified, the cpu list is
- * to be changed from effective_xcpus to newmask. Otherwise, effective_xcpus is
- * assumed to remain the same. The cpuset should either be a valid or invalid
- * partition root. The partition root state may change from valid to invalid
- * or vice versa. An error code will be returned if transitioning from
- * invalid to valid violates the exclusivity rule.
+ * Handles updates to local CPU partition configurations by validating
+ * changes, managing state transitions, and propagating updates through
+ * the cpuset hierarchy.
  *
- * For partcmd_invalidate, the current partition will be made invalid.
+ * Note on exclusivity checking: Exclusivity validation is required when
+ * transitioning from an invalid to valid partition state. However, when
+ * updating cpus_allowed or exclusive_cpus, exclusivity should have already
+ * been verified by validate_change(). In such cases, excl_check must be
+ * false since the cs cpumasks are not yet updated.
  *
- * The partcmd_enable* and partcmd_disable commands are used by
- * update_prstate(). An error code may be returned and the caller will check
- * for error.
- *
- * The partcmd_update command is used by update_cpumasks_hier() with newmask
- * NULL and update_cpumask() with newmask set. The partcmd_invalidate is used
- * by update_cpumask() with NULL newmask. In both cases, the callers won't
- * check for error and so partition_root_state and prs_err will be updated
- * directly.
+ * Return: Partition error code (PERR_NONE indicates success)
  */
-static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
-					   struct cpumask *newmask,
-					   struct tmpmasks *tmp)
+static int __local_partition_update(struct cpuset *cs, struct cpumask *xcpus,
+				  struct cpumask *excpus, struct tmpmasks *tmp,
+				  bool excl_check)
 {
 	struct cpuset *parent = parent_cs(cs);
-	int adding;	/* Adding cpus to parent's effective_cpus	*/
-	int deleting;	/* Deleting cpus from parent's effective_cpus	*/
-	int old_prs, new_prs;
 	int part_error = PERR_NONE;	/* Partition error? */
-	int subparts_delta = 0;
-	int isolcpus_updated = 0;
-	struct cpumask *xcpus = user_xcpus(cs);
-	bool nocpu;
+	int old_prs, new_prs;
+	bool cpumask_updated = false;
 
 	lockdep_assert_held(&cpuset_mutex);
-	WARN_ON_ONCE(is_remote_partition(cs));	/* For local partition only */
+	/* For local partition only */
+	if (WARN_ON_ONCE(is_remote_partition(cs) || cs_is_member(cs)))
+		return PERR_NONE;
 
+	old_prs = cs->partition_root_state;
 	/*
-	 * new_prs will only be changed for the partcmd_update and
-	 * partcmd_invalidate commands.
+	 * If new_prs < 0, it might transition to valid partition state.
+	 * Use absolute value for validation checks.
 	 */
-	adding = deleting = false;
-	old_prs = new_prs = cs->partition_root_state;
-
-	/*
-	 * The parent must be a partition root.
-	 * The new cpumask, if present, or the current cpus_allowed must
-	 * not be empty.
-	 */
-	if (!is_partition_valid(parent)) {
-		return is_partition_invalid(parent)
-		       ? PERR_INVPARENT : PERR_NOTPART;
-	}
-	if (!newmask && xcpus_empty(cs))
-		return PERR_CPUSEMPTY;
-
-	nocpu = tasks_nocpu_error(parent, cs, xcpus);
-
-	if (newmask) {
-		/*
-		 * Empty cpumask is not allowed
-		 */
-		if (cpumask_empty(newmask)) {
-			part_error = PERR_CPUSEMPTY;
-			goto write_error;
-		}
-
-		/* Check newmask again, whether cpus are available for parent/cs */
-		nocpu |= tasks_nocpu_error(parent, cs, newmask);
-
-		/*
-		 * partcmd_update with newmask:
-		 *
-		 * Compute add/delete mask to/from effective_cpus
-		 *
-		 * For valid partition:
-		 *   addmask = exclusive_cpus & ~newmask
-		 *			      & parent->effective_xcpus
-		 *   delmask = newmask & ~exclusive_cpus
-		 *		       & parent->effective_xcpus
-		 *
-		 * For invalid partition:
-		 *   delmask = newmask & parent->effective_xcpus
-		 */
-		if (is_partition_invalid(cs)) {
-			adding = false;
-			deleting = cpumask_and(tmp->delmask,
-					newmask, parent->effective_xcpus);
-		} else {
-			cpumask_andnot(tmp->addmask, xcpus, newmask);
-			adding = cpumask_and(tmp->addmask, tmp->addmask,
-					     parent->effective_xcpus);
-
-			cpumask_andnot(tmp->delmask, newmask, xcpus);
-			deleting = cpumask_and(tmp->delmask, tmp->delmask,
-					       parent->effective_xcpus);
-		}
-		/*
-		 * The new CPUs to be removed from parent's effective CPUs
-		 * must be present.
-		 */
-		if (deleting) {
-			cpumask_and(tmp->new_cpus, tmp->delmask, cpu_active_mask);
-			WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, parent->effective_cpus));
-		}
-
-		/*
-		 * Make partition invalid if parent's effective_cpus could
-		 * become empty and there are tasks in the parent.
-		 */
-		if (nocpu && (!adding ||
-		    !cpumask_intersects(tmp->addmask, cpu_active_mask))) {
-			part_error = PERR_NOCPUS;
-			deleting = false;
-			adding = cpumask_and(tmp->addmask,
-					     xcpus, parent->effective_xcpus);
-		}
-	} else {
-		/*
-		 * partcmd_update w/o newmask
-		 *
-		 * delmask = effective_xcpus & parent->effective_cpus
-		 *
-		 * This can be called from:
-		 * 1) update_cpumasks_hier()
-		 * 2) cpuset_hotplug_update_tasks()
-		 *
-		 * Check to see if it can be transitioned from valid to
-		 * invalid partition or vice versa.
-		 *
-		 * A partition error happens when parent has tasks and all
-		 * its effective CPUs will have to be distributed out.
-		 */
-		if (nocpu) {
-			part_error = PERR_NOCPUS;
-			if (is_partition_valid(cs))
-				adding = cpumask_and(tmp->addmask,
-						xcpus, parent->effective_xcpus);
-		} else if (is_partition_invalid(cs) && !cpumask_empty(xcpus) &&
-			   cpumask_subset(xcpus, parent->effective_xcpus)) {
-			struct cgroup_subsys_state *css;
-			struct cpuset *child;
-			bool exclusive = true;
-
-			/*
-			 * Convert invalid partition to valid has to
-			 * pass the cpu exclusivity test.
-			 */
-			rcu_read_lock();
-			cpuset_for_each_child(child, css, parent) {
-				if (child == cs)
-					continue;
-				if (!cpusets_are_exclusive(cs, child)) {
-					exclusive = false;
-					break;
-				}
-			}
-			rcu_read_unlock();
-			if (exclusive)
-				deleting = cpumask_and(tmp->delmask,
-						xcpus, parent->effective_cpus);
-			else
-				part_error = PERR_NOTEXCL;
-		}
-	}
-
-write_error:
-	if (part_error)
-		WRITE_ONCE(cs->prs_err, part_error);
-
-	if (cmd == partcmd_update) {
-		/*
-		 * Check for possible transition between valid and invalid
-		 * partition root.
-		 */
-		switch (cs->partition_root_state) {
-		case PRS_ROOT:
-		case PRS_ISOLATED:
-			if (part_error) {
-				new_prs = -old_prs;
-				subparts_delta--;
-			}
-			break;
-		case PRS_INVALID_ROOT:
-		case PRS_INVALID_ISOLATED:
-			if (!part_error) {
-				new_prs = -old_prs;
-				subparts_delta++;
-			}
-			break;
-		}
+	new_prs = old_prs < 0 ? -old_prs : old_prs;
+	part_error = local_partition_check(cs, new_prs, excpus, excl_check);
+	if (part_error) {
+		local_partition_invalidate(cs, tmp);
+		return part_error;
 	}
 
-	if (!adding && !deleting && (new_prs == old_prs))
-		return 0;
+	/* Nothing changes, return PERR_NONE */
+	if (new_prs == old_prs && cpumask_equal(excpus, cs->effective_xcpus))
+		return PERR_NONE;
 
 	/*
-	 * Transitioning between invalid to valid or vice versa may require
-	 * changing CS_CPU_EXCLUSIVE. In the case of partcmd_update,
-	 * validate_change() has already been successfully called and
-	 * CPU lists in cs haven't been updated yet. So defer it to later.
+	 * If partition was previously invalid but now passes checks,
+	 * enable it and update related flags
 	 */
-	if ((old_prs != new_prs) && (cmd != partcmd_update))  {
-		int err = update_partition_exclusive_flag(cs, new_prs);
-
-		if (err)
-			return err;
+	if (is_partition_invalid(cs) && !part_error) {
+		partition_enable(cs, parent, new_prs, excpus);
+		update_partition_exclusive_flag(cs, new_prs);
+		update_partition_sd_lb(cs, old_prs);
+		return part_error;
 	}
 
+	cpumask_updated = cpumask_andnot(tmp->addmask, excpus, cs->effective_xcpus);
+	cpumask_updated |= cpumask_andnot(tmp->delmask, cs->effective_xcpus, excpus);
+	partition_update(cs, new_prs, xcpus, excpus, tmp);
 	/*
-	 * Change the parent's effective_cpus & effective_xcpus (top cpuset
-	 * only).
-	 *
-	 * Newly added CPUs will be removed from effective_cpus and
-	 * newly deleted ones will be added back to effective_cpus.
-	 */
-	spin_lock_irq(&callback_lock);
-	if (old_prs != new_prs) {
-		cs->partition_root_state = new_prs;
-		if (new_prs <= 0)
-			cs->nr_subparts = 0;
-	}
-	/*
-	 * Adding to parent's effective_cpus means deletion CPUs from cs
-	 * and vice versa.
+	 * Propagate changes in parent's effective_cpus down the hierarchy.
 	 */
-	if (adding)
-		isolcpus_updated += partition_xcpus_del(old_prs, parent,
-							tmp->addmask);
-	if (deleting)
-		isolcpus_updated += partition_xcpus_add(new_prs, parent,
-							tmp->delmask);
-
-	if (is_partition_valid(parent)) {
-		parent->nr_subparts += subparts_delta;
-		WARN_ON_ONCE(parent->nr_subparts < 0);
-	}
-	spin_unlock_irq(&callback_lock);
-	update_unbound_workqueue_cpumask(isolcpus_updated);
-
-	if ((old_prs != new_prs) && (cmd == partcmd_update))
-		update_partition_exclusive_flag(cs, new_prs);
-
-	if (adding || deleting) {
+	if (cpumask_updated) {
 		cpuset_update_tasks_cpumask(parent, tmp->addmask);
 		update_sibling_cpumasks(parent, cs, tmp);
 	}
+	return part_error;
+}
 
-	/*
-	 * For partcmd_update without newmask, it is being called from
-	 * cpuset_handle_hotplug(). Update the load balance flag and
-	 * scheduling domain accordingly.
-	 */
-	if ((cmd == partcmd_update) && !newmask)
-		update_partition_sd_lb(cs, old_prs);
+static int local_partition_update(struct cpuset *cs, struct tmpmasks *tmp)
+{
+	struct cpuset *parent = parent_cs(cs);
 
-	notify_partition_change(cs, old_prs);
-	return 0;
+	cpumask_and(tmp->new_cpus, user_xcpus(cs), parent->effective_xcpus);
+	return __local_partition_update(cs, NULL, tmp->new_cpus, tmp, true);
 }
 
 /**
@@ -2419,9 +2282,16 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
 		if (!css_tryget_online(&cp->css))
 			continue;
 		rcu_read_unlock();
+		/*
+		 * The tmp->new_cpus may by modified.
+		 * Update effective_cpus before passing tmp to other functions.
+		 */
+		spin_lock_irq(&callback_lock);
+		cpumask_copy(cp->effective_cpus, tmp->new_cpus);
+		spin_unlock_irq(&callback_lock);
 
 		if (update_parent) {
-			update_parent_effective_cpumask(cp, partcmd_update, NULL, tmp);
+			local_partition_update(cp, tmp);
 			/*
 			 * The cpuset partition_root_state may become
 			 * invalid. Capture it.
@@ -2430,7 +2300,6 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
 		}
 
 		spin_lock_irq(&callback_lock);
-		cpumask_copy(cp->effective_cpus, tmp->new_cpus);
 		cp->partition_root_state = new_prs;
 		if (!cpumask_empty(cp->exclusive_cpus) && (cp != cs))
 			compute_excpus(cp, cp->effective_xcpus);
@@ -2610,8 +2479,8 @@ static void partition_cpus_change(struct cpuset *cs, struct cpuset *trialcs,
 		if (trialcs->prs_err)
 			local_partition_invalidate(cs, tmp);
 		else
-			update_parent_effective_cpumask(cs, partcmd_update,
-							trialcs->effective_xcpus, tmp);
+			__local_partition_update(cs, trialcs->exclusive_cpus,
+						trialcs->effective_xcpus, tmp, false);
 	}
 }
 
@@ -4063,9 +3932,8 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
 	else if (is_partition_valid(parent) && is_partition_invalid(cs) &&
 		 !cpumask_empty(user_xcpus(cs))) {
 		partcmd = partcmd_update;
-		update_parent_effective_cpumask(cs, partcmd, NULL, tmp);
+		local_partition_update(cs, tmp);
 	}
-
 	if (partcmd >= 0) {
 		if ((partcmd == partcmd_invalidate) || is_partition_valid(cs)) {
 			compute_partition_effective_cpumask(cs, &new_cpus);
-- 
2.34.1
Re: [PATCH -next RFC 09/16] cpuset: introduce local_partition_update()
Posted by Waiman Long 3 months, 3 weeks ago
On 9/28/25 3:12 AM, Chen Ridong wrote:
> From: Chen Ridong <chenridong@huawei.com>
>
> Extend the partition_update() infrastructure to handle local partition
> updates.
>
> The local_partition_update() function replaces the command partcmd_update
> previously handled within update_parent_effective_cpumask(). The update
> logic follows a state-based approach:
>
> 1. Validation check: First verify if the local partition is currently valid
> 2. Invalidation handling: If the partition is invalid, trigger invalidation
> 3. State transition: If an invalid partition has no errors, transition to
>     valid
> 4. cpus updates: For local partition that only cpu maks changes, use
"cpu mask"?
>     partition_update() to handle partition change.
>
> With the introduction of this function, update_parent_effective_cpumask()
> function is removed, simplifying the partition update code path and
> creating a cleaner separation between local and remote partition
> operations.
>
> Signed-off-by: Chen Ridong <chenridong@huawei.com>
> ---
>   kernel/cgroup/cpuset.c | 376 +++++++++++++----------------------------
>   1 file changed, 122 insertions(+), 254 deletions(-)
>
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index e460d03286ba..d0217db04b69 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -1622,12 +1622,14 @@ static void partition_update(struct cpuset *cs, int prs, struct cpumask *xcpus,
>   	bool isolcpus_updated;
>   	bool excl_updated;
>   	struct cpuset *parent;
> +	int old_prs;
>   
>   	lockdep_assert_held(&cpuset_mutex);
>   	WARN_ON_ONCE(!cpuset_v2());
>   	WARN_ON_ONCE(prs <= 0);
>   
>   	parent = is_remote_partition(cs) ? NULL : parent_cs(cs);
> +	old_prs = cs->partition_root_state;
>   	excl_updated = !cpumask_empty(tmp->addmask) ||
>   			!cpumask_empty(tmp->delmask);
>   
> @@ -1645,6 +1647,8 @@ static void partition_update(struct cpuset *cs, int prs, struct cpumask *xcpus,
>   	update_unbound_workqueue_cpumask(isolcpus_updated);
>   	if (excl_updated)
>   		cpuset_force_rebuild();
> +	update_partition_exclusive_flag(cs, prs);
> +	notify_partition_change(cs, old_prs);
>   }
>   

Again, change to partition_update() should be done in the patch that 
introduces it.

>   /*
> @@ -1790,6 +1794,27 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
>   	return false;
>   }
>   
> +static bool cpuset_user_cpus_exclusive(struct cpuset *cs)

The cpuset prefix is only needed if it is an externally visible 
function. For this one, I think a better name should be 
"is_user_xcpus_exclusive".

> +{
> +	struct cpuset *parent = parent_cs(cs);
> +
> +	struct cgroup_subsys_state *css;
> +	struct cpuset *child;
> +	bool exclusive = true;
> +
> +	rcu_read_lock();
> +	cpuset_for_each_child(child, css, parent) {
> +		if (child == cs)
> +			continue;
> +		if (!cpusets_are_exclusive(cs, child)) {
> +			exclusive = false;
> +			break;
> +		}
> +	}
> +	rcu_read_unlock();
> +	return exclusive;
> +}
> +
>   /**
>    * validate_partition - Validate a cpuset partition configuration
>    * @cs: The cpuset to validate
> @@ -1818,6 +1843,39 @@ static enum prs_errcode validate_partition(struct cpuset *cs, int new_prs,
>   	return PERR_NONE;
>   }
>   
> +/**
> + * local_partition_check - Validate for local partition
> + * @cs: Target cpuset to validate
> + * @new_prs: New partition root state to validate
> + * @excpus: New exclusive effectuve CPUs mask to validate
> + * @excl_check: Flag to enable exclusive CPUs ownership validation
> + *
> + * Return: PERR_NONE if validation passes, appropriate error code otherwise
> + *
> + * Important: The caller must ensure that @cs's cpu mask is updated before
> + * invoking this function when exclusive CPU validation is required.
> + */
> +static enum prs_errcode local_partition_check(struct cpuset *cs, int new_prs,
> +							 struct cpumask *excpus, bool excl_check)

I would suggest naming it to "validate_local_partition()" as the local 
counterpart of validate_partition().

> +{
> +	struct cpuset *parent = parent_cs(cs);
> +
> +	/*
> +	 * The parent must be a partition root.
> +	 * The new cpumask, if present, or the current cpus_allowed must
> +	 * not be empty.
> +	 */
> +	if (!is_partition_valid(parent)) {
> +		return is_partition_invalid(parent)
> +			? PERR_INVPARENT : PERR_NOTPART;
> +	}
> +
> +	if (excl_check && !cpuset_user_cpus_exclusive(cs))
> +		return PERR_NOTEXCL;
> +
> +	return validate_partition(cs, new_prs, excpus);
> +}
> +
>   /**
>    * local_partition_enable - Enable local partition for a cpuset
>    * @cs: Target cpuset to become a local partition root
> @@ -1945,280 +2003,85 @@ static void local_partition_invalidate(struct cpuset *cs, struct tmpmasks *tmp)
>   }
>   
>   /**
> - * update_parent_effective_cpumask - update effective_cpus mask of parent cpuset
> - * @cs:      The cpuset that requests change in partition root state
> - * @cmd:     Partition root state change command
> - * @newmask: Optional new cpumask for partcmd_update
> - * @tmp:     Temporary addmask and delmask
> - * Return:   0 or a partition root state error code
> - *
> - * For partcmd_enable*, the cpuset is being transformed from a non-partition
> - * root to a partition root. The effective_xcpus (cpus_allowed if
> - * effective_xcpus not set) mask of the given cpuset will be taken away from
> - * parent's effective_cpus. The function will return 0 if all the CPUs listed
> - * in effective_xcpus can be granted or an error code will be returned.
> - *
> - * For partcmd_disable, the cpuset is being transformed from a partition
> - * root back to a non-partition root. Any CPUs in effective_xcpus will be
> - * given back to parent's effective_cpus. 0 will always be returned.
> + * __local_partition_update - Update local CPU partition configuration
> + * @cs: Target cpuset to update
> + * @xcpus: New exclusive CPU mask
> + * @excpus: New effective exclusive CPU mask
> + * @tmp: Temporary mask storage for intermediate calculations
> + * @excl_check: Flag to enable exclusivity validation
>    *
> - * For partcmd_update, if the optional newmask is specified, the cpu list is
> - * to be changed from effective_xcpus to newmask. Otherwise, effective_xcpus is
> - * assumed to remain the same. The cpuset should either be a valid or invalid
> - * partition root. The partition root state may change from valid to invalid
> - * or vice versa. An error code will be returned if transitioning from
> - * invalid to valid violates the exclusivity rule.
> + * Handles updates to local CPU partition configurations by validating
> + * changes, managing state transitions, and propagating updates through
> + * the cpuset hierarchy.
>    *
> - * For partcmd_invalidate, the current partition will be made invalid.
> + * Note on exclusivity checking: Exclusivity validation is required when
> + * transitioning from an invalid to valid partition state. However, when
> + * updating cpus_allowed or exclusive_cpus, exclusivity should have already
> + * been verified by validate_change(). In such cases, excl_check must be
> + * false since the cs cpumasks are not yet updated.
>    *
> - * The partcmd_enable* and partcmd_disable commands are used by
> - * update_prstate(). An error code may be returned and the caller will check
> - * for error.
> - *
> - * The partcmd_update command is used by update_cpumasks_hier() with newmask
> - * NULL and update_cpumask() with newmask set. The partcmd_invalidate is used
> - * by update_cpumask() with NULL newmask. In both cases, the callers won't
> - * check for error and so partition_root_state and prs_err will be updated
> - * directly.
> + * Return: Partition error code (PERR_NONE indicates success)
>    */
> -static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
> -					   struct cpumask *newmask,
> -					   struct tmpmasks *tmp)
Separate out the removal of update_parent_effective_cpumask() into its 
own patch as intermixing the removal of this code and new code make it 
harder to review.

> +static int __local_partition_update(struct cpuset *cs, struct cpumask *xcpus,
> +				  struct cpumask *excpus, struct tmpmasks *tmp,
> +				  bool excl_check)
>   {
>   	struct cpuset *parent = parent_cs(cs);
> -	int adding;	/* Adding cpus to parent's effective_cpus	*/
> -	int deleting;	/* Deleting cpus from parent's effective_cpus	*/
> -	int old_prs, new_prs;
>   	int part_error = PERR_NONE;	/* Partition error? */
> -	int subparts_delta = 0;
> -	int isolcpus_updated = 0;
> -	struct cpumask *xcpus = user_xcpus(cs);
> -	bool nocpu;
> +	int old_prs, new_prs;
> +	bool cpumask_updated = false;
>   
>   	lockdep_assert_held(&cpuset_mutex);
> -	WARN_ON_ONCE(is_remote_partition(cs));	/* For local partition only */
> +	/* For local partition only */
> +	if (WARN_ON_ONCE(is_remote_partition(cs) || cs_is_member(cs)))
> +		return PERR_NONE;
>   
> +	old_prs = cs->partition_root_state;
>   	/*
> -	 * new_prs will only be changed for the partcmd_update and
> -	 * partcmd_invalidate commands.
> +	 * If new_prs < 0, it might transition to valid partition state.
> +	 * Use absolute value for validation checks.
>   	 */
> -	adding = deleting = false;
> -	old_prs = new_prs = cs->partition_root_state;
> -
> -	/*
> -	 * The parent must be a partition root.
> -	 * The new cpumask, if present, or the current cpus_allowed must
> -	 * not be empty.
> -	 */
> -	if (!is_partition_valid(parent)) {
> -		return is_partition_invalid(parent)
> -		       ? PERR_INVPARENT : PERR_NOTPART;
> -	}
> -	if (!newmask && xcpus_empty(cs))
> -		return PERR_CPUSEMPTY;
> -
> -	nocpu = tasks_nocpu_error(parent, cs, xcpus);
> -
> -	if (newmask) {
> -		/*
> -		 * Empty cpumask is not allowed
> -		 */
> -		if (cpumask_empty(newmask)) {
> -			part_error = PERR_CPUSEMPTY;
> -			goto write_error;
> -		}
> -
> -		/* Check newmask again, whether cpus are available for parent/cs */
> -		nocpu |= tasks_nocpu_error(parent, cs, newmask);
> -
> -		/*
> -		 * partcmd_update with newmask:
> -		 *
> -		 * Compute add/delete mask to/from effective_cpus
> -		 *
> -		 * For valid partition:
> -		 *   addmask = exclusive_cpus & ~newmask
> -		 *			      & parent->effective_xcpus
> -		 *   delmask = newmask & ~exclusive_cpus
> -		 *		       & parent->effective_xcpus
> -		 *
> -		 * For invalid partition:
> -		 *   delmask = newmask & parent->effective_xcpus
> -		 */
> -		if (is_partition_invalid(cs)) {
> -			adding = false;
> -			deleting = cpumask_and(tmp->delmask,
> -					newmask, parent->effective_xcpus);
> -		} else {
> -			cpumask_andnot(tmp->addmask, xcpus, newmask);
> -			adding = cpumask_and(tmp->addmask, tmp->addmask,
> -					     parent->effective_xcpus);
> -
> -			cpumask_andnot(tmp->delmask, newmask, xcpus);
> -			deleting = cpumask_and(tmp->delmask, tmp->delmask,
> -					       parent->effective_xcpus);
> -		}
> -		/*
> -		 * The new CPUs to be removed from parent's effective CPUs
> -		 * must be present.
> -		 */
> -		if (deleting) {
> -			cpumask_and(tmp->new_cpus, tmp->delmask, cpu_active_mask);
> -			WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, parent->effective_cpus));
> -		}
> -
> -		/*
> -		 * Make partition invalid if parent's effective_cpus could
> -		 * become empty and there are tasks in the parent.
> -		 */
> -		if (nocpu && (!adding ||
> -		    !cpumask_intersects(tmp->addmask, cpu_active_mask))) {
> -			part_error = PERR_NOCPUS;
> -			deleting = false;
> -			adding = cpumask_and(tmp->addmask,
> -					     xcpus, parent->effective_xcpus);
> -		}
> -	} else {
> -		/*
> -		 * partcmd_update w/o newmask
> -		 *
> -		 * delmask = effective_xcpus & parent->effective_cpus
> -		 *
> -		 * This can be called from:
> -		 * 1) update_cpumasks_hier()
> -		 * 2) cpuset_hotplug_update_tasks()
> -		 *
> -		 * Check to see if it can be transitioned from valid to
> -		 * invalid partition or vice versa.
> -		 *
> -		 * A partition error happens when parent has tasks and all
> -		 * its effective CPUs will have to be distributed out.
> -		 */
> -		if (nocpu) {
> -			part_error = PERR_NOCPUS;
> -			if (is_partition_valid(cs))
> -				adding = cpumask_and(tmp->addmask,
> -						xcpus, parent->effective_xcpus);
> -		} else if (is_partition_invalid(cs) && !cpumask_empty(xcpus) &&
> -			   cpumask_subset(xcpus, parent->effective_xcpus)) {
> -			struct cgroup_subsys_state *css;
> -			struct cpuset *child;
> -			bool exclusive = true;
> -
> -			/*
> -			 * Convert invalid partition to valid has to
> -			 * pass the cpu exclusivity test.
> -			 */
> -			rcu_read_lock();
> -			cpuset_for_each_child(child, css, parent) {
> -				if (child == cs)
> -					continue;
> -				if (!cpusets_are_exclusive(cs, child)) {
> -					exclusive = false;
> -					break;
> -				}
> -			}
> -			rcu_read_unlock();
> -			if (exclusive)
> -				deleting = cpumask_and(tmp->delmask,
> -						xcpus, parent->effective_cpus);
> -			else
> -				part_error = PERR_NOTEXCL;
> -		}
> -	}
> -
> -write_error:
> -	if (part_error)
> -		WRITE_ONCE(cs->prs_err, part_error);
> -
> -	if (cmd == partcmd_update) {
> -		/*
> -		 * Check for possible transition between valid and invalid
> -		 * partition root.
> -		 */
> -		switch (cs->partition_root_state) {
> -		case PRS_ROOT:
> -		case PRS_ISOLATED:
> -			if (part_error) {
> -				new_prs = -old_prs;
> -				subparts_delta--;
> -			}
> -			break;
> -		case PRS_INVALID_ROOT:
> -		case PRS_INVALID_ISOLATED:
> -			if (!part_error) {
> -				new_prs = -old_prs;
> -				subparts_delta++;
> -			}
> -			break;
> -		}
> +	new_prs = old_prs < 0 ? -old_prs : old_prs;
> +	part_error = local_partition_check(cs, new_prs, excpus, excl_check);
> +	if (part_error) {
> +		local_partition_invalidate(cs, tmp);

local_partition_invalidate() should only called if old_prs > 0.

> +		return part_error;
>   	}
>   
> -	if (!adding && !deleting && (new_prs == old_prs))
> -		return 0;
> +	/* Nothing changes, return PERR_NONE */
> +	if (new_prs == old_prs && cpumask_equal(excpus, cs->effective_xcpus))
> +		return PERR_NONE;
>   
>   	/*
> -	 * Transitioning between invalid to valid or vice versa may require
> -	 * changing CS_CPU_EXCLUSIVE. In the case of partcmd_update,
> -	 * validate_change() has already been successfully called and
> -	 * CPU lists in cs haven't been updated yet. So defer it to later.
> +	 * If partition was previously invalid but now passes checks,
> +	 * enable it and update related flags
>   	 */
> -	if ((old_prs != new_prs) && (cmd != partcmd_update))  {
> -		int err = update_partition_exclusive_flag(cs, new_prs);
> -
> -		if (err)
> -			return err;
> +	if (is_partition_invalid(cs) && !part_error) {
The !part_error check should be unnecessary as this path will not be 
reached if part_error is non-zero.

> +		partition_enable(cs, parent, new_prs, excpus);
> +		update_partition_exclusive_flag(cs, new_prs);
> +		update_partition_sd_lb(cs, old_prs);
> +		return part_error;
Just return PERR_NONE if it is not expected to be set.

>   	}
>   
> +	cpumask_updated = cpumask_andnot(tmp->addmask, excpus, cs->effective_xcpus);
> +	cpumask_updated |= cpumask_andnot(tmp->delmask, cs->effective_xcpus, excpus);
> +	partition_update(cs, new_prs, xcpus, excpus, tmp);
>   	/*
> -	 * Change the parent's effective_cpus & effective_xcpus (top cpuset
> -	 * only).
> -	 *
> -	 * Newly added CPUs will be removed from effective_cpus and
> -	 * newly deleted ones will be added back to effective_cpus.
> -	 */
> -	spin_lock_irq(&callback_lock);
> -	if (old_prs != new_prs) {
> -		cs->partition_root_state = new_prs;
> -		if (new_prs <= 0)
> -			cs->nr_subparts = 0;
> -	}
> -	/*
> -	 * Adding to parent's effective_cpus means deletion CPUs from cs
> -	 * and vice versa.
> +	 * Propagate changes in parent's effective_cpus down the hierarchy.
>   	 */
> -	if (adding)
> -		isolcpus_updated += partition_xcpus_del(old_prs, parent,
> -							tmp->addmask);
> -	if (deleting)
> -		isolcpus_updated += partition_xcpus_add(new_prs, parent,
> -							tmp->delmask);
> -
> -	if (is_partition_valid(parent)) {
> -		parent->nr_subparts += subparts_delta;
> -		WARN_ON_ONCE(parent->nr_subparts < 0);
> -	}
> -	spin_unlock_irq(&callback_lock);
> -	update_unbound_workqueue_cpumask(isolcpus_updated);
> -
> -	if ((old_prs != new_prs) && (cmd == partcmd_update))
> -		update_partition_exclusive_flag(cs, new_prs);
> -
> -	if (adding || deleting) {
> +	if (cpumask_updated) {
>   		cpuset_update_tasks_cpumask(parent, tmp->addmask);
>   		update_sibling_cpumasks(parent, cs, tmp);
>   	}
> +	return part_error;

Ditto.

Cheers,
Longman
Re: [PATCH -next RFC 09/16] cpuset: introduce local_partition_update()
Posted by Chen Ridong 3 months, 3 weeks ago

On 2025/10/20 10:57, Waiman Long wrote:
> 
> On 9/28/25 3:12 AM, Chen Ridong wrote:
>> From: Chen Ridong <chenridong@huawei.com>
>>
>> Extend the partition_update() infrastructure to handle local partition
>> updates.
>>
>> The local_partition_update() function replaces the command partcmd_update
>> previously handled within update_parent_effective_cpumask(). The update
>> logic follows a state-based approach:
>>
>> 1. Validation check: First verify if the local partition is currently valid
>> 2. Invalidation handling: If the partition is invalid, trigger invalidation
>> 3. State transition: If an invalid partition has no errors, transition to
>>     valid
>> 4. cpus updates: For local partition that only cpu maks changes, use
> "cpu mask"?
>>     partition_update() to handle partition change.
>>
>> With the introduction of this function, update_parent_effective_cpumask()
>> function is removed, simplifying the partition update code path and
>> creating a cleaner separation between local and remote partition
>> operations.
>>
>> Signed-off-by: Chen Ridong <chenridong@huawei.com>
>> ---
>>   kernel/cgroup/cpuset.c | 376 +++++++++++++----------------------------
>>   1 file changed, 122 insertions(+), 254 deletions(-)
>>
>> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
>> index e460d03286ba..d0217db04b69 100644
>> --- a/kernel/cgroup/cpuset.c
>> +++ b/kernel/cgroup/cpuset.c
>> @@ -1622,12 +1622,14 @@ static void partition_update(struct cpuset *cs, int prs, struct cpumask
>> *xcpus,
>>       bool isolcpus_updated;
>>       bool excl_updated;
>>       struct cpuset *parent;
>> +    int old_prs;
>>         lockdep_assert_held(&cpuset_mutex);
>>       WARN_ON_ONCE(!cpuset_v2());
>>       WARN_ON_ONCE(prs <= 0);
>>         parent = is_remote_partition(cs) ? NULL : parent_cs(cs);
>> +    old_prs = cs->partition_root_state;
>>       excl_updated = !cpumask_empty(tmp->addmask) ||
>>               !cpumask_empty(tmp->delmask);
>>   @@ -1645,6 +1647,8 @@ static void partition_update(struct cpuset *cs, int prs, struct cpumask
>> *xcpus,
>>       update_unbound_workqueue_cpumask(isolcpus_updated);
>>       if (excl_updated)
>>           cpuset_force_rebuild();
>> +    update_partition_exclusive_flag(cs, prs);
>> +    notify_partition_change(cs, old_prs);
>>   }
>>   
> 
> Again, change to partition_update() should be done in the patch that introduces it.
> 

Thank you,

Will update.

>>   /*
>> @@ -1790,6 +1794,27 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask
>> *new_cpus)
>>       return false;
>>   }
>>   +static bool cpuset_user_cpus_exclusive(struct cpuset *cs)
> 
> The cpuset prefix is only needed if it is an externally visible function. For this one, I think a
> better name should be "is_user_xcpus_exclusive".
> 

Will update.

>> +{
>> +    struct cpuset *parent = parent_cs(cs);
>> +
>> +    struct cgroup_subsys_state *css;
>> +    struct cpuset *child;
>> +    bool exclusive = true;
>> +
>> +    rcu_read_lock();
>> +    cpuset_for_each_child(child, css, parent) {
>> +        if (child == cs)
>> +            continue;
>> +        if (!cpusets_are_exclusive(cs, child)) {
>> +            exclusive = false;
>> +            break;
>> +        }
>> +    }
>> +    rcu_read_unlock();
>> +    return exclusive;
>> +}
>> +
>>   /**
>>    * validate_partition - Validate a cpuset partition configuration
>>    * @cs: The cpuset to validate
>> @@ -1818,6 +1843,39 @@ static enum prs_errcode validate_partition(struct cpuset *cs, int new_prs,
>>       return PERR_NONE;
>>   }
>>   +/**
>> + * local_partition_check - Validate for local partition
>> + * @cs: Target cpuset to validate
>> + * @new_prs: New partition root state to validate
>> + * @excpus: New exclusive effectuve CPUs mask to validate
>> + * @excl_check: Flag to enable exclusive CPUs ownership validation
>> + *
>> + * Return: PERR_NONE if validation passes, appropriate error code otherwise
>> + *
>> + * Important: The caller must ensure that @cs's cpu mask is updated before
>> + * invoking this function when exclusive CPU validation is required.
>> + */
>> +static enum prs_errcode local_partition_check(struct cpuset *cs, int new_prs,
>> +                             struct cpumask *excpus, bool excl_check)
> 
> I would suggest naming it to "validate_local_partition()" as the local counterpart of
> validate_partition().
> 

Will upate

>> +{
>> +    struct cpuset *parent = parent_cs(cs);
>> +
>> +    /*
>> +     * The parent must be a partition root.
>> +     * The new cpumask, if present, or the current cpus_allowed must
>> +     * not be empty.
>> +     */
>> +    if (!is_partition_valid(parent)) {
>> +        return is_partition_invalid(parent)
>> +            ? PERR_INVPARENT : PERR_NOTPART;
>> +    }
>> +
>> +    if (excl_check && !cpuset_user_cpus_exclusive(cs))
>> +        return PERR_NOTEXCL;
>> +
>> +    return validate_partition(cs, new_prs, excpus);
>> +}
>> +
>>   /**
>>    * local_partition_enable - Enable local partition for a cpuset
>>    * @cs: Target cpuset to become a local partition root
>> @@ -1945,280 +2003,85 @@ static void local_partition_invalidate(struct cpuset *cs, struct tmpmasks
>> *tmp)
>>   }
>>     /**
>> - * update_parent_effective_cpumask - update effective_cpus mask of parent cpuset
>> - * @cs:      The cpuset that requests change in partition root state
>> - * @cmd:     Partition root state change command
>> - * @newmask: Optional new cpumask for partcmd_update
>> - * @tmp:     Temporary addmask and delmask
>> - * Return:   0 or a partition root state error code
>> - *
>> - * For partcmd_enable*, the cpuset is being transformed from a non-partition
>> - * root to a partition root. The effective_xcpus (cpus_allowed if
>> - * effective_xcpus not set) mask of the given cpuset will be taken away from
>> - * parent's effective_cpus. The function will return 0 if all the CPUs listed
>> - * in effective_xcpus can be granted or an error code will be returned.
>> - *
>> - * For partcmd_disable, the cpuset is being transformed from a partition
>> - * root back to a non-partition root. Any CPUs in effective_xcpus will be
>> - * given back to parent's effective_cpus. 0 will always be returned.
>> + * __local_partition_update - Update local CPU partition configuration
>> + * @cs: Target cpuset to update
>> + * @xcpus: New exclusive CPU mask
>> + * @excpus: New effective exclusive CPU mask
>> + * @tmp: Temporary mask storage for intermediate calculations
>> + * @excl_check: Flag to enable exclusivity validation
>>    *
>> - * For partcmd_update, if the optional newmask is specified, the cpu list is
>> - * to be changed from effective_xcpus to newmask. Otherwise, effective_xcpus is
>> - * assumed to remain the same. The cpuset should either be a valid or invalid
>> - * partition root. The partition root state may change from valid to invalid
>> - * or vice versa. An error code will be returned if transitioning from
>> - * invalid to valid violates the exclusivity rule.
>> + * Handles updates to local CPU partition configurations by validating
>> + * changes, managing state transitions, and propagating updates through
>> + * the cpuset hierarchy.
>>    *
>> - * For partcmd_invalidate, the current partition will be made invalid.
>> + * Note on exclusivity checking: Exclusivity validation is required when
>> + * transitioning from an invalid to valid partition state. However, when
>> + * updating cpus_allowed or exclusive_cpus, exclusivity should have already
>> + * been verified by validate_change(). In such cases, excl_check must be
>> + * false since the cs cpumasks are not yet updated.
>>    *
>> - * The partcmd_enable* and partcmd_disable commands are used by
>> - * update_prstate(). An error code may be returned and the caller will check
>> - * for error.
>> - *
>> - * The partcmd_update command is used by update_cpumasks_hier() with newmask
>> - * NULL and update_cpumask() with newmask set. The partcmd_invalidate is used
>> - * by update_cpumask() with NULL newmask. In both cases, the callers won't
>> - * check for error and so partition_root_state and prs_err will be updated
>> - * directly.
>> + * Return: Partition error code (PERR_NONE indicates success)
>>    */
>> -static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
>> -                       struct cpumask *newmask,
>> -                       struct tmpmasks *tmp)
> Separate out the removal of update_parent_effective_cpumask() into its own patch as intermixing the
> removal of this code and new code make it harder to review.
> 

Will do.

>> +static int __local_partition_update(struct cpuset *cs, struct cpumask *xcpus,
>> +                  struct cpumask *excpus, struct tmpmasks *tmp,
>> +                  bool excl_check)
>>   {
>>       struct cpuset *parent = parent_cs(cs);
>> -    int adding;    /* Adding cpus to parent's effective_cpus    */
>> -    int deleting;    /* Deleting cpus from parent's effective_cpus    */
>> -    int old_prs, new_prs;
>>       int part_error = PERR_NONE;    /* Partition error? */
>> -    int subparts_delta = 0;
>> -    int isolcpus_updated = 0;
>> -    struct cpumask *xcpus = user_xcpus(cs);
>> -    bool nocpu;
>> +    int old_prs, new_prs;
>> +    bool cpumask_updated = false;
>>         lockdep_assert_held(&cpuset_mutex);
>> -    WARN_ON_ONCE(is_remote_partition(cs));    /* For local partition only */
>> +    /* For local partition only */
>> +    if (WARN_ON_ONCE(is_remote_partition(cs) || cs_is_member(cs)))
>> +        return PERR_NONE;
>>   +    old_prs = cs->partition_root_state;
>>       /*
>> -     * new_prs will only be changed for the partcmd_update and
>> -     * partcmd_invalidate commands.
>> +     * If new_prs < 0, it might transition to valid partition state.
>> +     * Use absolute value for validation checks.
>>        */
>> -    adding = deleting = false;
>> -    old_prs = new_prs = cs->partition_root_state;
>> -
>> -    /*
>> -     * The parent must be a partition root.
>> -     * The new cpumask, if present, or the current cpus_allowed must
>> -     * not be empty.
>> -     */
>> -    if (!is_partition_valid(parent)) {
>> -        return is_partition_invalid(parent)
>> -               ? PERR_INVPARENT : PERR_NOTPART;
>> -    }
>> -    if (!newmask && xcpus_empty(cs))
>> -        return PERR_CPUSEMPTY;
>> -
>> -    nocpu = tasks_nocpu_error(parent, cs, xcpus);
>> -
>> -    if (newmask) {
>> -        /*
>> -         * Empty cpumask is not allowed
>> -         */
>> -        if (cpumask_empty(newmask)) {
>> -            part_error = PERR_CPUSEMPTY;
>> -            goto write_error;
>> -        }
>> -
>> -        /* Check newmask again, whether cpus are available for parent/cs */
>> -        nocpu |= tasks_nocpu_error(parent, cs, newmask);
>> -
>> -        /*
>> -         * partcmd_update with newmask:
>> -         *
>> -         * Compute add/delete mask to/from effective_cpus
>> -         *
>> -         * For valid partition:
>> -         *   addmask = exclusive_cpus & ~newmask
>> -         *                  & parent->effective_xcpus
>> -         *   delmask = newmask & ~exclusive_cpus
>> -         *               & parent->effective_xcpus
>> -         *
>> -         * For invalid partition:
>> -         *   delmask = newmask & parent->effective_xcpus
>> -         */
>> -        if (is_partition_invalid(cs)) {
>> -            adding = false;
>> -            deleting = cpumask_and(tmp->delmask,
>> -                    newmask, parent->effective_xcpus);
>> -        } else {
>> -            cpumask_andnot(tmp->addmask, xcpus, newmask);
>> -            adding = cpumask_and(tmp->addmask, tmp->addmask,
>> -                         parent->effective_xcpus);
>> -
>> -            cpumask_andnot(tmp->delmask, newmask, xcpus);
>> -            deleting = cpumask_and(tmp->delmask, tmp->delmask,
>> -                           parent->effective_xcpus);
>> -        }
>> -        /*
>> -         * The new CPUs to be removed from parent's effective CPUs
>> -         * must be present.
>> -         */
>> -        if (deleting) {
>> -            cpumask_and(tmp->new_cpus, tmp->delmask, cpu_active_mask);
>> -            WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, parent->effective_cpus));
>> -        }
>> -
>> -        /*
>> -         * Make partition invalid if parent's effective_cpus could
>> -         * become empty and there are tasks in the parent.
>> -         */
>> -        if (nocpu && (!adding ||
>> -            !cpumask_intersects(tmp->addmask, cpu_active_mask))) {
>> -            part_error = PERR_NOCPUS;
>> -            deleting = false;
>> -            adding = cpumask_and(tmp->addmask,
>> -                         xcpus, parent->effective_xcpus);
>> -        }
>> -    } else {
>> -        /*
>> -         * partcmd_update w/o newmask
>> -         *
>> -         * delmask = effective_xcpus & parent->effective_cpus
>> -         *
>> -         * This can be called from:
>> -         * 1) update_cpumasks_hier()
>> -         * 2) cpuset_hotplug_update_tasks()
>> -         *
>> -         * Check to see if it can be transitioned from valid to
>> -         * invalid partition or vice versa.
>> -         *
>> -         * A partition error happens when parent has tasks and all
>> -         * its effective CPUs will have to be distributed out.
>> -         */
>> -        if (nocpu) {
>> -            part_error = PERR_NOCPUS;
>> -            if (is_partition_valid(cs))
>> -                adding = cpumask_and(tmp->addmask,
>> -                        xcpus, parent->effective_xcpus);
>> -        } else if (is_partition_invalid(cs) && !cpumask_empty(xcpus) &&
>> -               cpumask_subset(xcpus, parent->effective_xcpus)) {
>> -            struct cgroup_subsys_state *css;
>> -            struct cpuset *child;
>> -            bool exclusive = true;
>> -
>> -            /*
>> -             * Convert invalid partition to valid has to
>> -             * pass the cpu exclusivity test.
>> -             */
>> -            rcu_read_lock();
>> -            cpuset_for_each_child(child, css, parent) {
>> -                if (child == cs)
>> -                    continue;
>> -                if (!cpusets_are_exclusive(cs, child)) {
>> -                    exclusive = false;
>> -                    break;
>> -                }
>> -            }
>> -            rcu_read_unlock();
>> -            if (exclusive)
>> -                deleting = cpumask_and(tmp->delmask,
>> -                        xcpus, parent->effective_cpus);
>> -            else
>> -                part_error = PERR_NOTEXCL;
>> -        }
>> -    }
>> -
>> -write_error:
>> -    if (part_error)
>> -        WRITE_ONCE(cs->prs_err, part_error);
>> -
>> -    if (cmd == partcmd_update) {
>> -        /*
>> -         * Check for possible transition between valid and invalid
>> -         * partition root.
>> -         */
>> -        switch (cs->partition_root_state) {
>> -        case PRS_ROOT:
>> -        case PRS_ISOLATED:
>> -            if (part_error) {
>> -                new_prs = -old_prs;
>> -                subparts_delta--;
>> -            }
>> -            break;
>> -        case PRS_INVALID_ROOT:
>> -        case PRS_INVALID_ISOLATED:
>> -            if (!part_error) {
>> -                new_prs = -old_prs;
>> -                subparts_delta++;
>> -            }
>> -            break;
>> -        }
>> +    new_prs = old_prs < 0 ? -old_prs : old_prs;
>> +    part_error = local_partition_check(cs, new_prs, excpus, excl_check);
>> +    if (part_error) {
>> +        local_partition_invalidate(cs, tmp);
> 
> local_partition_invalidate() should only called if old_prs > 0.
> 
>> +        return part_error;
>>       }
>>   -    if (!adding && !deleting && (new_prs == old_prs))
>> -        return 0;
>> +    /* Nothing changes, return PERR_NONE */
>> +    if (new_prs == old_prs && cpumask_equal(excpus, cs->effective_xcpus))
>> +        return PERR_NONE;
>>         /*
>> -     * Transitioning between invalid to valid or vice versa may require
>> -     * changing CS_CPU_EXCLUSIVE. In the case of partcmd_update,
>> -     * validate_change() has already been successfully called and
>> -     * CPU lists in cs haven't been updated yet. So defer it to later.
>> +     * If partition was previously invalid but now passes checks,
>> +     * enable it and update related flags
>>        */
>> -    if ((old_prs != new_prs) && (cmd != partcmd_update))  {
>> -        int err = update_partition_exclusive_flag(cs, new_prs);
>> -
>> -        if (err)
>> -            return err;
>> +    if (is_partition_invalid(cs) && !part_error) {
> The !part_error check should be unnecessary as this path will not be reached if part_error is non-zero.
> 

Thank you, Longman, you are right.

Checking !part_error here is redundant because part_error check is already performed at the
beginning of the function.

>> +        partition_enable(cs, parent, new_prs, excpus);
>> +        update_partition_exclusive_flag(cs, new_prs);
>> +        update_partition_sd_lb(cs, old_prs);
>> +        return part_error;
> Just return PERR_NONE if it is not expected to be set.
> 

Will update.

>>       }
>>   +    cpumask_updated = cpumask_andnot(tmp->addmask, excpus, cs->effective_xcpus);
>> +    cpumask_updated |= cpumask_andnot(tmp->delmask, cs->effective_xcpus, excpus);
>> +    partition_update(cs, new_prs, xcpus, excpus, tmp);
>>       /*
>> -     * Change the parent's effective_cpus & effective_xcpus (top cpuset
>> -     * only).
>> -     *
>> -     * Newly added CPUs will be removed from effective_cpus and
>> -     * newly deleted ones will be added back to effective_cpus.
>> -     */
>> -    spin_lock_irq(&callback_lock);
>> -    if (old_prs != new_prs) {
>> -        cs->partition_root_state = new_prs;
>> -        if (new_prs <= 0)
>> -            cs->nr_subparts = 0;
>> -    }
>> -    /*
>> -     * Adding to parent's effective_cpus means deletion CPUs from cs
>> -     * and vice versa.
>> +     * Propagate changes in parent's effective_cpus down the hierarchy.
>>        */
>> -    if (adding)
>> -        isolcpus_updated += partition_xcpus_del(old_prs, parent,
>> -                            tmp->addmask);
>> -    if (deleting)
>> -        isolcpus_updated += partition_xcpus_add(new_prs, parent,
>> -                            tmp->delmask);
>> -
>> -    if (is_partition_valid(parent)) {
>> -        parent->nr_subparts += subparts_delta;
>> -        WARN_ON_ONCE(parent->nr_subparts < 0);
>> -    }
>> -    spin_unlock_irq(&callback_lock);
>> -    update_unbound_workqueue_cpumask(isolcpus_updated);
>> -
>> -    if ((old_prs != new_prs) && (cmd == partcmd_update))
>> -        update_partition_exclusive_flag(cs, new_prs);
>> -
>> -    if (adding || deleting) {
>> +    if (cpumask_updated) {
>>           cpuset_update_tasks_cpumask(parent, tmp->addmask);
>>           update_sibling_cpumasks(parent, cs, tmp);
>>       }
>> +    return part_error;
> 
> Ditto.
> 
> Cheers,
> Longman
> 

-- 
Best regards,
Ridong