[v2] Cache aware scheduling

[PATCH v2 06/23] sched/cache: Track LLC-preferred tasks per runqueue

Posted by Tim Chen 2 months ago

For each runqueue, track the number of tasks with an LLC preference
and how many of them are running on their preferred LLC. This mirrors
nr_numa_running and nr_preferred_running for NUMA balancing, and will
be used by cache-aware load balancing in later patches.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---

Notes:
    v1->v2: Invoke task_of() once and reuse its result afterwards.
            (Peter Zijlstra)
            Remove hacky reset_llc_stats() and introduce sched_llc_active flag
            to properly pair enqueue/dequeue statistics update (Peter Zijlstra, K Prateek Nayak)

 include/linux/sched.h |  2 ++
 init/init_task.c      |  1 +
 kernel/sched/core.c   |  5 ++++
 kernel/sched/fair.c   | 60 ++++++++++++++++++++++++++++++++++++++++---
 kernel/sched/sched.h  |  6 +++++
 5 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1ad46220cd04..466ba8b7398c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1408,6 +1408,8 @@ struct task_struct {
 
 #ifdef CONFIG_SCHED_CACHE
 	struct callback_head		cache_work;
+	/*the p is currently refcounted in a rq's preferred llc stats*/
+	bool				sched_llc_active;
 	int				preferred_llc;
 #endif
 
diff --git a/init/init_task.c b/init/init_task.c
index 44bae72b5b7d..ee78837b0aa2 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -192,6 +192,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
 	.numa_faults	= NULL,
 #endif
 #ifdef CONFIG_SCHED_CACHE
+	.sched_llc_active = false,
 	.preferred_llc  = -1,
 #endif
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e8bdf03a4b7f..48626c81ba8e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -531,6 +531,11 @@ void __trace_set_current_state(int state_value)
 }
 EXPORT_SYMBOL(__trace_set_current_state);
 
+int task_llc(const struct task_struct *p)
+{
+	return per_cpu(sd_llc_id, task_cpu(p));
+}
+
 /*
  * Serialization rules:
  *
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 10cec83f65d5..d46a70a9d9fb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1223,6 +1223,43 @@ static int llc_id(int cpu)
 	return llc;
 }
 
+static void account_llc_enqueue(struct rq *rq, struct task_struct *p)
+{
+	int pref_llc;
+
+	if (!sched_cache_enabled())
+		return;
+
+	pref_llc = p->preferred_llc;
+	if (pref_llc < 0)
+		return;
+
+	rq->nr_llc_running++;
+	rq->nr_pref_llc_running += (pref_llc == task_llc(p));
+	p->sched_llc_active = true;
+}
+
+static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
+{
+	int pref_llc;
+
+	/*
+	 * Borrow the uc_se->active from uclamp_rq_inc_id(),
+	 * uclamp_rq_dec_id() to avoid the unbalanced calculation
+	 * of rq statistics.
+	 */
+	if (unlikely(!p->sched_llc_active))
+		return;
+
+	pref_llc = p->preferred_llc;
+	if (pref_llc < 0)
+		return;
+
+	rq->nr_llc_running--;
+	rq->nr_pref_llc_running -= (pref_llc == task_llc(p));
+	p->sched_llc_active = false;
+}
+
 void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *_pcpu_sched)
 {
 	unsigned long epoch;
@@ -1294,6 +1331,8 @@ static unsigned long __no_profile fraction_mm_sched(struct rq *rq, struct mm_sch
 	return div64_u64(NICE_0_LOAD * pcpu_sched->runtime, rq->cpu_runtime + 1);
 }
 
+static unsigned int task_running_on_cpu(int cpu, struct task_struct *p);
+
 static inline
 void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
 {
@@ -1346,8 +1385,13 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
 #endif
 	}
 
-	if (p->preferred_llc != mm_sched_llc)
+	/* task not on rq accounted later in account_entity_enqueue() */
+	if (task_running_on_cpu(rq->cpu, p) &&
+	    p->preferred_llc != mm_sched_llc) {
+		account_llc_dequeue(rq, p);
 		p->preferred_llc = mm_sched_llc;
+		account_llc_enqueue(rq, p);
+	}
 }
 
 static void task_tick_cache(struct rq *rq, struct task_struct *p)
@@ -1475,6 +1519,10 @@ void init_sched_mm(struct task_struct *p) { }
 
 static void task_tick_cache(struct rq *rq, struct task_struct *p) { }
 
+static void account_llc_enqueue(struct rq *rq, struct task_struct *p) {}
+
+static void account_llc_dequeue(struct rq *rq, struct task_struct *p) {}
+
 #endif
 
 /*
@@ -3965,9 +4013,11 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	update_load_add(&cfs_rq->load, se->load.weight);
 	if (entity_is_task(se)) {
+		struct task_struct *p = task_of(se);
 		struct rq *rq = rq_of(cfs_rq);
 
-		account_numa_enqueue(rq, task_of(se));
+		account_numa_enqueue(rq, p);
+		account_llc_enqueue(rq, p);
 		list_add(&se->group_node, &rq->cfs_tasks);
 	}
 	cfs_rq->nr_queued++;
@@ -3978,7 +4028,11 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	update_load_sub(&cfs_rq->load, se->load.weight);
 	if (entity_is_task(se)) {
-		account_numa_dequeue(rq_of(cfs_rq), task_of(se));
+		struct task_struct *p = task_of(se);
+		struct rq *rq = rq_of(cfs_rq);
+
+		account_numa_dequeue(rq, p);
+		account_llc_dequeue(rq, p);
 		list_del_init(&se->group_node);
 	}
 	cfs_rq->nr_queued--;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 728737641847..ee8b70647835 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1126,6 +1126,10 @@ struct rq {
 	unsigned int		nr_preferred_running;
 	unsigned int		numa_migrate_on;
 #endif
+#ifdef CONFIG_SCHED_CACHE
+	unsigned int		nr_pref_llc_running;
+	unsigned int		nr_llc_running;
+#endif
 #ifdef CONFIG_NO_HZ_COMMON
 	unsigned long		last_blocked_load_update_tick;
 	unsigned int		has_blocked_load;
@@ -1980,6 +1984,8 @@ init_numa_balancing(u64 clone_flags, struct task_struct *p)
 
 #endif /* !CONFIG_NUMA_BALANCING */
 
+int task_llc(const struct task_struct *p);
+
 static inline void
 queue_balance_callback(struct rq *rq,
 		       struct balance_callback *head,
-- 
2.32.0

Re: [PATCH v2 06/23] sched/cache: Track LLC-preferred tasks per runqueue

Posted by Vern Hao 1 month, 3 weeks ago

On 2025/12/4 07:07, Tim Chen wrote:
> For each runqueue, track the number of tasks with an LLC preference
> and how many of them are running on their preferred LLC. This mirrors
> nr_numa_running and nr_preferred_running for NUMA balancing, and will
> be used by cache-aware load balancing in later patches.
>
> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> ---
>
> Notes:
>      v1->v2: Invoke task_of() once and reuse its result afterwards.
>              (Peter Zijlstra)
>              Remove hacky reset_llc_stats() and introduce sched_llc_active flag
>              to properly pair enqueue/dequeue statistics update (Peter Zijlstra, K Prateek Nayak)
>
>   include/linux/sched.h |  2 ++
>   init/init_task.c      |  1 +
>   kernel/sched/core.c   |  5 ++++
>   kernel/sched/fair.c   | 60 ++++++++++++++++++++++++++++++++++++++++---
>   kernel/sched/sched.h  |  6 +++++
>   5 files changed, 71 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 1ad46220cd04..466ba8b7398c 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1408,6 +1408,8 @@ struct task_struct {
>   
>   #ifdef CONFIG_SCHED_CACHE
>   	struct callback_head		cache_work;
> +	/*the p is currently refcounted in a rq's preferred llc stats*/
> +	bool				sched_llc_active;
>   	int				preferred_llc;
>   #endif
>   
> diff --git a/init/init_task.c b/init/init_task.c
> index 44bae72b5b7d..ee78837b0aa2 100644
> --- a/init/init_task.c
> +++ b/init/init_task.c
> @@ -192,6 +192,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
>   	.numa_faults	= NULL,
>   #endif
>   #ifdef CONFIG_SCHED_CACHE
> +	.sched_llc_active = false,
>   	.preferred_llc  = -1,
>   #endif
>   #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index e8bdf03a4b7f..48626c81ba8e 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -531,6 +531,11 @@ void __trace_set_current_state(int state_value)
>   }
>   EXPORT_SYMBOL(__trace_set_current_state);
>   
> +int task_llc(const struct task_struct *p)
> +{
> +	return per_cpu(sd_llc_id, task_cpu(p));
> +}
> +
>   /*
>    * Serialization rules:
>    *
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 10cec83f65d5..d46a70a9d9fb 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1223,6 +1223,43 @@ static int llc_id(int cpu)
>   	return llc;
>   }
>   
> +static void account_llc_enqueue(struct rq *rq, struct task_struct *p)
> +{
> +	int pref_llc;
> +
> +	if (!sched_cache_enabled())
> +		return;
> +
> +	pref_llc = p->preferred_llc;
> +	if (pref_llc < 0)
> +		return;
> +
> +	rq->nr_llc_running++;
> +	rq->nr_pref_llc_running += (pref_llc == task_llc(p));
> +	p->sched_llc_active = true;
> +}
> +
> +static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
> +{
> +	int pref_llc;
> +
> +	/*
> +	 * Borrow the uc_se->active from uclamp_rq_inc_id(),
> +	 * uclamp_rq_dec_id() to avoid the unbalanced calculation
> +	 * of rq statistics.
> +	 */
> +	if (unlikely(!p->sched_llc_active))
> +		return;
> +
> +	pref_llc = p->preferred_llc;
> +	if (pref_llc < 0)
> +		return;
> +
> +	rq->nr_llc_running--;
> +	rq->nr_pref_llc_running -= (pref_llc == task_llc(p));
> +	p->sched_llc_active = false;
> +}
> +
>   void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *_pcpu_sched)
>   {
>   	unsigned long epoch;
> @@ -1294,6 +1331,8 @@ static unsigned long __no_profile fraction_mm_sched(struct rq *rq, struct mm_sch
>   	return div64_u64(NICE_0_LOAD * pcpu_sched->runtime, rq->cpu_runtime + 1);
>   }
>   
> +static unsigned int task_running_on_cpu(int cpu, struct task_struct *p);
> +
>   static inline
>   void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
>   {
> @@ -1346,8 +1385,13 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
>   #endif
>   	}
>   
> -	if (p->preferred_llc != mm_sched_llc)
> +	/* task not on rq accounted later in account_entity_enqueue() */
> +	if (task_running_on_cpu(rq->cpu, p) &&
> +	    p->preferred_llc != mm_sched_llc) {
#ifdef CONFIG_NUMA_BALANCING
                 /*
                  * Don't assign preferred LLC if it
                  * conflicts with NUMA balancing.
                  */
                 if (p->numa_preferred_nid >= 0 &&
                     cpu_to_node(mm->mm_sched_cpu) != p->numa_preferred_nid)
                         mm_sched_llc = -1;
#endif
         }

         /* task not on rq accounted later in account_entity_enqueue() */
         if (task_running_on_cpu(rq->cpu, p) &&
             p->preferred_llc != mm_sched_llc) {
                 account_llc_dequeue(rq, p);
                 p->preferred_llc = mm_sched_llc;
                 account_llc_enqueue(rq, p);

         }

I am a little concerned that there might be cases where both 
|p->preferred_llc| and |mm_sched_llc| are equal to -1 at this point.", 
Is it necessary to add a check here?



> +		account_llc_dequeue(rq, p);
>   		p->preferred_llc = mm_sched_llc;
> +		account_llc_enqueue(rq, p);
> +	}
>   }
>   
>   static void task_tick_cache(struct rq *rq, struct task_struct *p)
> @@ -1475,6 +1519,10 @@ void init_sched_mm(struct task_struct *p) { }
>   
>   static void task_tick_cache(struct rq *rq, struct task_struct *p) { }
>   
> +static void account_llc_enqueue(struct rq *rq, struct task_struct *p) {}
> +
> +static void account_llc_dequeue(struct rq *rq, struct task_struct *p) {}
> +
>   #endif
>   
>   /*
> @@ -3965,9 +4013,11 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
>   {
>   	update_load_add(&cfs_rq->load, se->load.weight);
>   	if (entity_is_task(se)) {
> +		struct task_struct *p = task_of(se);
>   		struct rq *rq = rq_of(cfs_rq);
>   
> -		account_numa_enqueue(rq, task_of(se));
> +		account_numa_enqueue(rq, p);
> +		account_llc_enqueue(rq, p);
>   		list_add(&se->group_node, &rq->cfs_tasks);
>   	}
>   	cfs_rq->nr_queued++;
> @@ -3978,7 +4028,11 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
>   {
>   	update_load_sub(&cfs_rq->load, se->load.weight);
>   	if (entity_is_task(se)) {
> -		account_numa_dequeue(rq_of(cfs_rq), task_of(se));
> +		struct task_struct *p = task_of(se);
> +		struct rq *rq = rq_of(cfs_rq);
> +
> +		account_numa_dequeue(rq, p);
> +		account_llc_dequeue(rq, p);
>   		list_del_init(&se->group_node);
>   	}
>   	cfs_rq->nr_queued--;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 728737641847..ee8b70647835 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1126,6 +1126,10 @@ struct rq {
>   	unsigned int		nr_preferred_running;
>   	unsigned int		numa_migrate_on;
>   #endif
> +#ifdef CONFIG_SCHED_CACHE
> +	unsigned int		nr_pref_llc_running;
> +	unsigned int		nr_llc_running;
> +#endif
>   #ifdef CONFIG_NO_HZ_COMMON
>   	unsigned long		last_blocked_load_update_tick;
>   	unsigned int		has_blocked_load;
> @@ -1980,6 +1984,8 @@ init_numa_balancing(u64 clone_flags, struct task_struct *p)
>   
>   #endif /* !CONFIG_NUMA_BALANCING */
>   
> +int task_llc(const struct task_struct *p);
> +
>   static inline void
>   queue_balance_callback(struct rq *rq,
>   		       struct balance_callback *head,

Re: [PATCH v2 06/23] sched/cache: Track LLC-preferred tasks per runqueue

Posted by Chen, Yu C 1 month, 3 weeks ago

On 12/17/2025 6:04 PM, Vern Hao wrote:
> 
> On 2025/12/4 07:07, Tim Chen wrote:
>> @@ -1346,8 +1385,13 @@ void account_mm_sched(struct rq *rq, struct 
>> task_struct *p, s64 delta_exec)
>>   #endif
>>       }
>> -    if (p->preferred_llc != mm_sched_llc)
>> +    /* task not on rq accounted later in account_entity_enqueue() */
>> +    if (task_running_on_cpu(rq->cpu, p) &&
>> +        p->preferred_llc != mm_sched_llc) {
>> #ifdef CONFIG_NUMA_BALANCING
>>                  /*
>>                   * Don't assign preferred LLC if it
>>                   * conflicts with NUMA balancing.
>>                   */
>>                  if (p->numa_preferred_nid >= 0 &&
>>                      cpu_to_node(mm->mm_sched_cpu) != p- 
>>  >numa_preferred_nid)
>>                          mm_sched_llc = -1;
>> #endif
>>          }
>> 
>>          /* task not on rq accounted later in account_entity_enqueue() */
>>          if (task_running_on_cpu(rq->cpu, p) &&
>>              p->preferred_llc != mm_sched_llc) {
>>                  account_llc_dequeue(rq, p);
>>                  p->preferred_llc = mm_sched_llc;
>>                  account_llc_enqueue(rq, p);
>> 
>>          }
>> 
> I am a little concerned that there might be cases where both |p- 
>  >preferred_llc| and |mm_sched_llc| are equal to -1 at this point.", Is 
> it necessary to add a check here?
>

Are you concerned about the mismatch between the percpu runqueue values
of nr_pref_llc_running, nr_pref_llc, and nr_llc_running? This should not
be an issue, because account_llc_dequeue() and account_llc_enqueue() are
always invoked together in account_mm_sched(). If p->preferred_llc = 
mm_sched_llc = -1,
account_llc_dequeue/enqueue() will not be invoked, so it is still paired.
Please let me know if I understand your comments correctly.

thanks,
Chenyu

Re: [PATCH v2 06/23] sched/cache: Track LLC-preferred tasks per runqueue

Posted by Peter Zijlstra 2 months ago

On Wed, Dec 03, 2025 at 03:07:25PM -0800, Tim Chen wrote:


>  #ifdef CONFIG_SCHED_CACHE
>  	struct callback_head		cache_work;
> +	/*the p is currently refcounted in a rq's preferred llc stats*/

Shall we have spaces after and before the comment marks?

Also, comment confuses me, I don't see get_task_struct() /
put_task_struct() usage. Did you mean something else with refcount?

> +	bool				sched_llc_active;
>  	int				preferred_llc;
>  #endif

> +static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
> +{
> +	int pref_llc;
> +
> +	/*
> +	 * Borrow the uc_se->active from uclamp_rq_inc_id(),
> +	 * uclamp_rq_dec_id() to avoid the unbalanced calculation
> +	 * of rq statistics.
> +	 */
> +	if (unlikely(!p->sched_llc_active))
> +		return;

Another very confusing comment; what? Also, can you please explain (in
the new comment) how we get here without having llc_active set?

> +
> +	pref_llc = p->preferred_llc;
> +	if (pref_llc < 0)
> +		return;
> +
> +	rq->nr_llc_running--;
> +	rq->nr_pref_llc_running -= (pref_llc == task_llc(p));
> +	p->sched_llc_active = false;
> +}

Re: [PATCH v2 06/23] sched/cache: Track LLC-preferred tasks per runqueue

Posted by Tim Chen 1 month, 4 weeks ago

On Tue, 2025-12-09 at 13:16 +0100, Peter Zijlstra wrote:
> On Wed, Dec 03, 2025 at 03:07:25PM -0800, Tim Chen wrote:
> 
> 
> >  #ifdef CONFIG_SCHED_CACHE
> >  	struct callback_head		cache_work;
> > +	/*the p is currently refcounted in a rq's preferred llc stats*/
> 
> Shall we have spaces after and before the comment marks?
> 
> Also, comment confuses me, I don't see get_task_struct() /
> put_task_struct() usage. Did you mean something else with refcount?

It is the accounting for number of tasks preferring
a certain LLC on a runqueue during enqueue/dequeue,
or when a task's LLC preference changes, by
account_llc_enqueue() and account_llc_dequeue()

How about change he comment to

	/* LLC preference accounting should be done in dequeue */
> 
> > +	bool				sched_llc_active;
> >  	int				preferred_llc;
> >  #endif
> 
> > +static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
> > +{
> > +	int pref_llc;
> > +
> > +	/*
> > +	 * Borrow the uc_se->active from uclamp_rq_inc_id(),
> > +	 * uclamp_rq_dec_id() to avoid the unbalanced calculation
> > +	 * of rq statistics.
> > +	 */
> > +	if (unlikely(!p->sched_llc_active))
> > +		return;
> 
> Another very confusing comment; what? Also, can you please explain (in
> the new comment) how we get here without having llc_active set?

The comment meant to say that we are using a similar mechanism as
accounting done in uc_se->active from uclamp_rq_inc_id(). I agree that
it confuses more than making things clearer.

How about the following comment to make things clearer:

	/*
	 * Cache aware scheduling was active when the task was enqueued.
	 * Admin has disabled cache aware scheduling before task was dequeued
	 * but the accounting has to be kept straight in case cache aware scheduling
	 * is re-enabled.
	 */

> 
> > +
> > +	pref_llc = p->preferred_llc;
> > +	if (pref_llc < 0)
> > +		return;
> > +
> > +	rq->nr_llc_running--;
> > +	rq->nr_pref_llc_running -= (pref_llc == task_llc(p));
> > +	p->sched_llc_active = false;
> > +}

Re: [PATCH v2 06/23] sched/cache: Track LLC-preferred tasks per runqueue

Posted by Peter Zijlstra 1 month, 4 weeks ago

On Tue, Dec 09, 2025 at 02:55:21PM -0800, Tim Chen wrote:

> > > +static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
> > > +{
> > > +	int pref_llc;
> > > +
> > > +	/*
> > > +	 * Borrow the uc_se->active from uclamp_rq_inc_id(),
> > > +	 * uclamp_rq_dec_id() to avoid the unbalanced calculation
> > > +	 * of rq statistics.
> > > +	 */
> > > +	if (unlikely(!p->sched_llc_active))
> > > +		return;
> > 
> > Another very confusing comment; what? Also, can you please explain (in
> > the new comment) how we get here without having llc_active set?
> 
> The comment meant to say that we are using a similar mechanism as
> accounting done in uc_se->active from uclamp_rq_inc_id(). I agree that
> it confuses more than making things clearer.
> 
> How about the following comment to make things clearer:
> 
> 	/*
> 	 * Cache aware scheduling was active when the task was enqueued.
> 	 * Admin has disabled cache aware scheduling before task was dequeued
> 	 * but the accounting has to be kept straight in case cache aware scheduling
> 	 * is re-enabled.
> 	 */

Is having that sched_cache_enabled() test worth it?
account_numa_{en,de}queue() don't seem to have any of this.


> > > +	pref_llc = p->preferred_llc;
> > > +	if (pref_llc < 0)
> > > +		return;
> > > +
> > > +	rq->nr_llc_running--;
> > > +	rq->nr_pref_llc_running -= (pref_llc == task_llc(p));
> > > +	p->sched_llc_active = false;
> > > +}

Re: [PATCH v2 06/23] sched/cache: Track LLC-preferred tasks per runqueue

Posted by Chen, Yu C 1 month, 3 weeks ago

On 12/10/2025 5:42 PM, Peter Zijlstra wrote:
> On Tue, Dec 09, 2025 at 02:55:21PM -0800, Tim Chen wrote:
> 
>>>> +static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
>>>> +{
>>>> +	int pref_llc;
>>>> +
>>>> +	/*
>>>> +	 * Borrow the uc_se->active from uclamp_rq_inc_id(),
>>>> +	 * uclamp_rq_dec_id() to avoid the unbalanced calculation
>>>> +	 * of rq statistics.
>>>> +	 */
>>>> +	if (unlikely(!p->sched_llc_active))
>>>> +		return;
>>>
>>> Another very confusing comment; what? Also, can you please explain (in
>>> the new comment) how we get here without having llc_active set?
>>
>> The comment meant to say that we are using a similar mechanism as
>> accounting done in uc_se->active from uclamp_rq_inc_id(). I agree that
>> it confuses more than making things clearer.
>>
>> How about the following comment to make things clearer:
>>
>> 	/*
>> 	 * Cache aware scheduling was active when the task was enqueued.
>> 	 * Admin has disabled cache aware scheduling before task was dequeued
>> 	 * but the accounting has to be kept straight in case cache aware scheduling
>> 	 * is re-enabled.
>> 	 */
> 
> Is having that sched_cache_enabled() test worth it?
> account_numa_{en,de}queue() don't seem to have any of this.
> 
> 

OK, I think we can remove the sched_cache_enabled() check and
make the account_llc_{en,de}queue() depending on CONFIG_SCHED_CACHE,
so the sched_llc_active can be removed.

thanks,
Chenyu