Cache aware scheduling

[RFC patch v3 02/20] sched: Several fixes for cache aware scheduling

Posted by Tim Chen 3 months, 3 weeks ago

From: Chen Yu <yu.c.chen@intel.com>

1. Fix compile error on percpu allocation.
2. Enqueue to the target CPU rather than the current CPU.
3. NULL LLC sched domain check(Libo Chen).
4. Introduce sched feature SCHED_CACHE to control cache aware scheduling
5. Fix unsigned occupancy initialization to -1.
6. If there is only 1 thread in the process, no need to enable cache
   awareness
7. Add __maybe_unused to __migrate_degrades_locality() to
   avoid compile warnings.

Signed-off-by: Chen Yu <yu.c.chen@intel.com>
---
 include/linux/mm_types.h |  4 ++--
 kernel/sched/fair.c      | 27 ++++++++++++++++-----------
 kernel/sched/features.h  |  1 +
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 013291c6aaa2..9de4a0a13c4d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1411,11 +1411,11 @@ static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumas
 #endif /* CONFIG_SCHED_MM_CID */
 
 #ifdef CONFIG_SCHED_CACHE
-extern void mm_init_sched(struct mm_struct *mm, struct mm_sched *pcpu_sched);
+extern void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *pcpu_sched);
 
 static inline int mm_alloc_sched_noprof(struct mm_struct *mm)
 {
-	struct mm_sched *pcpu_sched = alloc_percpu_noprof(struct mm_sched);
+	struct mm_sched __percpu *pcpu_sched = alloc_percpu_noprof(struct mm_sched);
 	if (!pcpu_sched)
 		return -ENOMEM;
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index df7d4a324fbe..89db97f8ef02 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1175,7 +1175,7 @@ static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
 #define EPOCH_PERIOD	(HZ/100)	/* 10 ms */
 #define EPOCH_OLD	5		/* 50 ms */
 
-void mm_init_sched(struct mm_struct *mm, struct mm_sched *_pcpu_sched)
+void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *_pcpu_sched)
 {
 	unsigned long epoch;
 	int i;
@@ -1186,7 +1186,7 @@ void mm_init_sched(struct mm_struct *mm, struct mm_sched *_pcpu_sched)
 
 		pcpu_sched->runtime = 0;
 		pcpu_sched->epoch = epoch = rq->cpu_epoch;
-		pcpu_sched->occ = -1;
+		pcpu_sched->occ = 0;
 	}
 
 	raw_spin_lock_init(&mm->mm_sched_lock);
@@ -1254,7 +1254,7 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
 	if (!mm || !mm->pcpu_sched)
 		return;
 
-	pcpu_sched = this_cpu_ptr(p->mm->pcpu_sched);
+	pcpu_sched = per_cpu_ptr(p->mm->pcpu_sched, cpu_of(rq));
 
 	scoped_guard (raw_spinlock, &rq->cpu_epoch_lock) {
 		__update_mm_sched(rq, pcpu_sched);
@@ -1264,12 +1264,14 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
 	}
 
 	/*
-	 * If this task hasn't hit task_cache_work() for a while, invalidate
+	 * If this task hasn't hit task_cache_work() for a while, or it
+	 * has only 1 thread, invalidate
 	 * it's preferred state.
 	 */
-	if (epoch - READ_ONCE(mm->mm_sched_epoch) > EPOCH_OLD) {
+	if (epoch - READ_ONCE(mm->mm_sched_epoch) > EPOCH_OLD ||
+	    get_nr_threads(p) <= 1) {
 		mm->mm_sched_cpu = -1;
-		pcpu_sched->occ = -1;
+		pcpu_sched->occ = 0;
 	}
 }
 
@@ -1286,9 +1288,6 @@ static void task_tick_cache(struct rq *rq, struct task_struct *p)
 
 	guard(raw_spinlock)(&mm->mm_sched_lock);
 
-	if (mm->mm_sched_epoch == rq->cpu_epoch)
-		return;
-
 	if (work->next == work) {
 		task_work_add(p, work, TWA_RESUME);
 		WRITE_ONCE(mm->mm_sched_epoch, rq->cpu_epoch);
@@ -1322,6 +1321,9 @@ static void task_cache_work(struct callback_head *work)
 			unsigned long occ, m_occ = 0, a_occ = 0;
 			int m_cpu = -1, nr = 0, i;
 
+			if (!sd)
+				continue;
+
 			for_each_cpu(i, sched_domain_span(sd)) {
 				occ = fraction_mm_sched(cpu_rq(i),
 							per_cpu_ptr(mm->pcpu_sched, i));
@@ -8801,6 +8803,9 @@ static int select_cache_cpu(struct task_struct *p, int prev_cpu)
 	struct mm_struct *mm = p->mm;
 	int cpu;
 
+	if (!sched_feat(SCHED_CACHE))
+		return prev_cpu;
+
 	if (!mm || p->nr_cpus_allowed == 1)
 		return prev_cpu;
 
@@ -9555,7 +9560,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
 		return 0;
 
 #ifdef CONFIG_SCHED_CACHE
-	if (p->mm && p->mm->pcpu_sched) {
+	if (sched_feat(SCHED_CACHE) && p->mm && p->mm->pcpu_sched) {
 		/*
 		 * XXX things like Skylake have non-inclusive L3 and might not
 		 * like this L3 centric view. What to do about L2 stickyness ?
@@ -9633,7 +9638,7 @@ static long migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
 }
 
 #else
-static long __migrate_degrades_locality(struct task_struct *p, int src_cpu, int dst_cpu, bool idle)
+static __maybe_unused long __migrate_degrades_locality(struct task_struct *p, int src_cpu, int dst_cpu, bool idle)
 {
 	return 0;
 }
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 3c12d9f93331..d2af7bfd36bf 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -87,6 +87,7 @@ SCHED_FEAT(TTWU_QUEUE, true)
  */
 SCHED_FEAT(SIS_UTIL, true)
 
+SCHED_FEAT(SCHED_CACHE, true)
 /*
  * Issue a WARN when we do multiple update_rq_clock() calls
  * in a single rq->lock section. Default disabled because the
-- 
2.32.0

Re: [RFC patch v3 02/20] sched: Several fixes for cache aware scheduling

Posted by Libo Chen 3 months ago

Hi Chenyu

On 6/18/25 11:27, Tim Chen wrote:
> From: Chen Yu <yu.c.chen@intel.com>
> 
> 1. Fix compile error on percpu allocation.
> 2. Enqueue to the target CPU rather than the current CPU.
> 3. NULL LLC sched domain check(Libo Chen).

Can I suggest we completely disable cache-aware scheduling
for systems without any LLC in the next version? No more added
fields, function code for them. This info should be easily
determinable during bootup while building up the topology,
and cannot be modified during runtime. Sometimes it's not
possible for distros to disable it in kconfig just for one
particular CPU, and SCHED_CACHE_LB isn't enough for removing
the added fields and users can turn it back on anyway.

Thanks,
Libo


> 4. Introduce sched feature SCHED_CACHE to control cache aware scheduling
> 5. Fix unsigned occupancy initialization to -1.
> 6. If there is only 1 thread in the process, no need to enable cache
>    awareness
> 7. Add __maybe_unused to __migrate_degrades_locality() to
>    avoid compile warnings.
> 
> Signed-off-by: Chen Yu <yu.c.chen@intel.com>
> ---
>  include/linux/mm_types.h |  4 ++--
>  kernel/sched/fair.c      | 27 ++++++++++++++++-----------
>  kernel/sched/features.h  |  1 +
>  3 files changed, 19 insertions(+), 13 deletions(-)
> 
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 013291c6aaa2..9de4a0a13c4d 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -1411,11 +1411,11 @@ static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumas
>  #endif /* CONFIG_SCHED_MM_CID */
>  
>  #ifdef CONFIG_SCHED_CACHE
> -extern void mm_init_sched(struct mm_struct *mm, struct mm_sched *pcpu_sched);
> +extern void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *pcpu_sched);
>  
>  static inline int mm_alloc_sched_noprof(struct mm_struct *mm)
>  {
> -	struct mm_sched *pcpu_sched = alloc_percpu_noprof(struct mm_sched);
> +	struct mm_sched __percpu *pcpu_sched = alloc_percpu_noprof(struct mm_sched);
>  	if (!pcpu_sched)
>  		return -ENOMEM;
>  
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index df7d4a324fbe..89db97f8ef02 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1175,7 +1175,7 @@ static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
>  #define EPOCH_PERIOD	(HZ/100)	/* 10 ms */
>  #define EPOCH_OLD	5		/* 50 ms */
>  
> -void mm_init_sched(struct mm_struct *mm, struct mm_sched *_pcpu_sched)
> +void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *_pcpu_sched)
>  {
>  	unsigned long epoch;
>  	int i;
> @@ -1186,7 +1186,7 @@ void mm_init_sched(struct mm_struct *mm, struct mm_sched *_pcpu_sched)
>  
>  		pcpu_sched->runtime = 0;
>  		pcpu_sched->epoch = epoch = rq->cpu_epoch;
> -		pcpu_sched->occ = -1;
> +		pcpu_sched->occ = 0;
>  	}
>  
>  	raw_spin_lock_init(&mm->mm_sched_lock);
> @@ -1254,7 +1254,7 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
>  	if (!mm || !mm->pcpu_sched)
>  		return;
>  
> -	pcpu_sched = this_cpu_ptr(p->mm->pcpu_sched);
> +	pcpu_sched = per_cpu_ptr(p->mm->pcpu_sched, cpu_of(rq));
>  
>  	scoped_guard (raw_spinlock, &rq->cpu_epoch_lock) {
>  		__update_mm_sched(rq, pcpu_sched);
> @@ -1264,12 +1264,14 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
>  	}
>  
>  	/*
> -	 * If this task hasn't hit task_cache_work() for a while, invalidate
> +	 * If this task hasn't hit task_cache_work() for a while, or it
> +	 * has only 1 thread, invalidate
>  	 * it's preferred state.
>  	 */
> -	if (epoch - READ_ONCE(mm->mm_sched_epoch) > EPOCH_OLD) {
> +	if (epoch - READ_ONCE(mm->mm_sched_epoch) > EPOCH_OLD ||
> +	    get_nr_threads(p) <= 1) {
>  		mm->mm_sched_cpu = -1;
> -		pcpu_sched->occ = -1;
> +		pcpu_sched->occ = 0;
>  	}
>  }
>  
> @@ -1286,9 +1288,6 @@ static void task_tick_cache(struct rq *rq, struct task_struct *p)
>  
>  	guard(raw_spinlock)(&mm->mm_sched_lock);
>  
> -	if (mm->mm_sched_epoch == rq->cpu_epoch)
> -		return;
> -
>  	if (work->next == work) {
>  		task_work_add(p, work, TWA_RESUME);
>  		WRITE_ONCE(mm->mm_sched_epoch, rq->cpu_epoch);
> @@ -1322,6 +1321,9 @@ static void task_cache_work(struct callback_head *work)
>  			unsigned long occ, m_occ = 0, a_occ = 0;
>  			int m_cpu = -1, nr = 0, i;
>  
> +			if (!sd)
> +				continue;
> +
>  			for_each_cpu(i, sched_domain_span(sd)) {
>  				occ = fraction_mm_sched(cpu_rq(i),
>  							per_cpu_ptr(mm->pcpu_sched, i));
> @@ -8801,6 +8803,9 @@ static int select_cache_cpu(struct task_struct *p, int prev_cpu)
>  	struct mm_struct *mm = p->mm;
>  	int cpu;
>  
> +	if (!sched_feat(SCHED_CACHE))
> +		return prev_cpu;
> +
>  	if (!mm || p->nr_cpus_allowed == 1)
>  		return prev_cpu;
>  
> @@ -9555,7 +9560,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
>  		return 0;
>  
>  #ifdef CONFIG_SCHED_CACHE
> -	if (p->mm && p->mm->pcpu_sched) {
> +	if (sched_feat(SCHED_CACHE) && p->mm && p->mm->pcpu_sched) {
>  		/*
>  		 * XXX things like Skylake have non-inclusive L3 and might not
>  		 * like this L3 centric view. What to do about L2 stickyness ?
> @@ -9633,7 +9638,7 @@ static long migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
>  }
>  
>  #else
> -static long __migrate_degrades_locality(struct task_struct *p, int src_cpu, int dst_cpu, bool idle)
> +static __maybe_unused long __migrate_degrades_locality(struct task_struct *p, int src_cpu, int dst_cpu, bool idle)
>  {
>  	return 0;
>  }
> diff --git a/kernel/sched/features.h b/kernel/sched/features.h
> index 3c12d9f93331..d2af7bfd36bf 100644
> --- a/kernel/sched/features.h
> +++ b/kernel/sched/features.h
> @@ -87,6 +87,7 @@ SCHED_FEAT(TTWU_QUEUE, true)
>   */
>  SCHED_FEAT(SIS_UTIL, true)
>  
> +SCHED_FEAT(SCHED_CACHE, true)
>  /*
>   * Issue a WARN when we do multiple update_rq_clock() calls
>   * in a single rq->lock section. Default disabled because the

Re: [RFC patch v3 02/20] sched: Several fixes for cache aware scheduling

Posted by Chen, Yu C 3 months ago

On 7/8/2025 9:15 AM, Libo Chen wrote:
> Hi Chenyu
> 
> On 6/18/25 11:27, Tim Chen wrote:
>> From: Chen Yu <yu.c.chen@intel.com>
>>
>> 1. Fix compile error on percpu allocation.
>> 2. Enqueue to the target CPU rather than the current CPU.
>> 3. NULL LLC sched domain check(Libo Chen).
> 
> Can I suggest we completely disable cache-aware scheduling
> for systems without any LLC in the next version? No more added
> fields, function code for them. This info should be easily
> determinable during bootup while building up the topology,
> and cannot be modified during runtime. Sometimes it's not
> possible for distros to disable it in kconfig just for one
> particular CPU, and SCHED_CACHE_LB isn't enough for removing
> the added fields and users can turn it back on anyway.
> 

Good point, my understanding is that we should introduce
a static key similar to sched_smt_present to get rid of the
cache-aware scheduling code path if either LLC is not present
or there is only 1 LLC within the Node.

Thanks,
Chenyu

> Thanks,
> Libo
> 
>

Re: [RFC patch v3 02/20] sched: Several fixes for cache aware scheduling

Posted by Libo Chen 3 months ago


On 7/8/25 00:54, Chen, Yu C wrote:
> On 7/8/2025 9:15 AM, Libo Chen wrote:
>> Hi Chenyu
>>
>> On 6/18/25 11:27, Tim Chen wrote:
>>> From: Chen Yu <yu.c.chen@intel.com>
>>>
>>> 1. Fix compile error on percpu allocation.
>>> 2. Enqueue to the target CPU rather than the current CPU.
>>> 3. NULL LLC sched domain check(Libo Chen).
>>
>> Can I suggest we completely disable cache-aware scheduling
>> for systems without any LLC in the next version? No more added
>> fields, function code for them. This info should be easily
>> determinable during bootup while building up the topology,
>> and cannot be modified during runtime. Sometimes it's not
>> possible for distros to disable it in kconfig just for one
>> particular CPU, and SCHED_CACHE_LB isn't enough for removing
>> the added fields and users can turn it back on anyway.
>>
> 
> Good point, my understanding is that we should introduce
> a static key similar to sched_smt_present to get rid of the
Exactly!
> cache-aware scheduling code path if either LLC is not present
> or there is only 1 LLC within the Node.
> 
> Thanks,
> Chenyu
> 
>> Thanks,
>> Libo
>>
>>
>

Re: [RFC patch v3 02/20] sched: Several fixes for cache aware scheduling

Posted by Shrikanth Hegde 3 months, 1 week ago


On 6/18/25 23:57, Tim Chen wrote:
> From: Chen Yu <yu.c.chen@intel.com>
> 
> 1. Fix compile error on percpu allocation.
> 2. Enqueue to the target CPU rather than the current CPU.
> 3. NULL LLC sched domain check(Libo Chen).
> 4. Introduce sched feature SCHED_CACHE to control cache aware scheduling
> 5. Fix unsigned occupancy initialization to -1.
> 6. If there is only 1 thread in the process, no need to enable cache
>     awareness
> 7. Add __maybe_unused to __migrate_degrades_locality() to
>     avoid compile warnings.
> 
> Signed-off-by: Chen Yu <yu.c.chen@intel.com>
> ---
>   include/linux/mm_types.h |  4 ++--
>   kernel/sched/fair.c      | 27 ++++++++++++++++-----------
>   kernel/sched/features.h  |  1 +
>   3 files changed, 19 insertions(+), 13 deletions(-)
> 
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 013291c6aaa2..9de4a0a13c4d 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -1411,11 +1411,11 @@ static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumas
>   #endif /* CONFIG_SCHED_MM_CID */
>   
>   #ifdef CONFIG_SCHED_CACHE
> -extern void mm_init_sched(struct mm_struct *mm, struct mm_sched *pcpu_sched);
> +extern void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *pcpu_sched);
>   
>   static inline int mm_alloc_sched_noprof(struct mm_struct *mm)
>   {
> -	struct mm_sched *pcpu_sched = alloc_percpu_noprof(struct mm_sched);
> +	struct mm_sched __percpu *pcpu_sched = alloc_percpu_noprof(struct mm_sched);
>   	if (!pcpu_sched)
>   		return -ENOMEM;
>   
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index df7d4a324fbe..89db97f8ef02 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1175,7 +1175,7 @@ static s64 update_curr_se(struct rq *rq, struct sched_entity *curr)
>   #define EPOCH_PERIOD	(HZ/100)	/* 10 ms */
>   #define EPOCH_OLD	5		/* 50 ms */
>   
> -void mm_init_sched(struct mm_struct *mm, struct mm_sched *_pcpu_sched)
> +void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *_pcpu_sched)
>   {
>   	unsigned long epoch;
>   	int i;
> @@ -1186,7 +1186,7 @@ void mm_init_sched(struct mm_struct *mm, struct mm_sched *_pcpu_sched)
>   
>   		pcpu_sched->runtime = 0;
>   		pcpu_sched->epoch = epoch = rq->cpu_epoch;
> -		pcpu_sched->occ = -1;
> +		pcpu_sched->occ = 0;
>   	}
>   
>   	raw_spin_lock_init(&mm->mm_sched_lock);
> @@ -1254,7 +1254,7 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
>   	if (!mm || !mm->pcpu_sched)
>   		return;
>   
> -	pcpu_sched = this_cpu_ptr(p->mm->pcpu_sched);
> +	pcpu_sched = per_cpu_ptr(p->mm->pcpu_sched, cpu_of(rq));
>   
>   	scoped_guard (raw_spinlock, &rq->cpu_epoch_lock) {
>   		__update_mm_sched(rq, pcpu_sched);
> @@ -1264,12 +1264,14 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
>   	}
>   
>   	/*
> -	 * If this task hasn't hit task_cache_work() for a while, invalidate
> +	 * If this task hasn't hit task_cache_work() for a while, or it
> +	 * has only 1 thread, invalidate
>   	 * it's preferred state.
>   	 */
> -	if (epoch - READ_ONCE(mm->mm_sched_epoch) > EPOCH_OLD) {
> +	if (epoch - READ_ONCE(mm->mm_sched_epoch) > EPOCH_OLD ||
> +	    get_nr_threads(p) <= 1) {
>   		mm->mm_sched_cpu = -1;
> -		pcpu_sched->occ = -1;
> +		pcpu_sched->occ = 0;
>   	}
>   }
>   
> @@ -1286,9 +1288,6 @@ static void task_tick_cache(struct rq *rq, struct task_struct *p)
>   
>   	guard(raw_spinlock)(&mm->mm_sched_lock);
>   
> -	if (mm->mm_sched_epoch == rq->cpu_epoch)
> -		return;
> -
>   	if (work->next == work) {
>   		task_work_add(p, work, TWA_RESUME);
>   		WRITE_ONCE(mm->mm_sched_epoch, rq->cpu_epoch);
> @@ -1322,6 +1321,9 @@ static void task_cache_work(struct callback_head *work)
>   			unsigned long occ, m_occ = 0, a_occ = 0;
>   			int m_cpu = -1, nr = 0, i;
>   
> +			if (!sd)
> +				continue;
> +
>   			for_each_cpu(i, sched_domain_span(sd)) {
>   				occ = fraction_mm_sched(cpu_rq(i),
>   							per_cpu_ptr(mm->pcpu_sched, i));
> @@ -8801,6 +8803,9 @@ static int select_cache_cpu(struct task_struct *p, int prev_cpu)
>   	struct mm_struct *mm = p->mm;
>   	int cpu;
>   
> +	if (!sched_feat(SCHED_CACHE))
> +		return prev_cpu;
> +
>   	if (!mm || p->nr_cpus_allowed == 1)
>   		return prev_cpu;
>   
> @@ -9555,7 +9560,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
>   		return 0;
>   
>   #ifdef CONFIG_SCHED_CACHE
> -	if (p->mm && p->mm->pcpu_sched) {
> +	if (sched_feat(SCHED_CACHE) && p->mm && p->mm->pcpu_sched) {
>   		/*
>   		 * XXX things like Skylake have non-inclusive L3 and might not
>   		 * like this L3 centric view. What to do about L2 stickyness ?
> @@ -9633,7 +9638,7 @@ static long migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
>   }
>   
>   #else
> -static long __migrate_degrades_locality(struct task_struct *p, int src_cpu, int dst_cpu, bool idle)
> +static __maybe_unused long __migrate_degrades_locality(struct task_struct *p, int src_cpu, int dst_cpu, bool idle)
>   {
>   	return 0;
>   }
> diff --git a/kernel/sched/features.h b/kernel/sched/features.h
> index 3c12d9f93331..d2af7bfd36bf 100644
> --- a/kernel/sched/features.h
> +++ b/kernel/sched/features.h
> @@ -87,6 +87,7 @@ SCHED_FEAT(TTWU_QUEUE, true)
>    */
>   SCHED_FEAT(SIS_UTIL, true)
>   
> +SCHED_FEAT(SCHED_CACHE, true)

Having both SCHED_FEAT and CONFIG_SCHED_CACHE seems like overkill.
Is it really necessary to have both?

Also, given the complexity it brings and only a workloads which spawns threads
which have data sharing among them benefit, it could be false by default.

>   /*
>    * Issue a WARN when we do multiple update_rq_clock() calls
>    * in a single rq->lock section. Default disabled because the

Re: [RFC patch v3 02/20] sched: Several fixes for cache aware scheduling

Posted by Tim Chen 3 months ago

On Fri, 2025-07-04 at 01:03 +0530, Shrikanth Hegde wrote:
> 
> 
> > diff --git a/kernel/sched/features.h b/kernel/sched/features.h
> > index 3c12d9f93331..d2af7bfd36bf 100644
> > --- a/kernel/sched/features.h
> > +++ b/kernel/sched/features.h
> > @@ -87,6 +87,7 @@ SCHED_FEAT(TTWU_QUEUE, true)
> >    */
> >   SCHED_FEAT(SIS_UTIL, true)
> >   
> > +SCHED_FEAT(SCHED_CACHE, true)
> 
> Having both SCHED_FEAT and CONFIG_SCHED_CACHE seems like overkill.
> Is it really necessary to have both?

As Peter pointed out previously, a runtime knob is still preferable.

> 
> Also, given the complexity it brings and only a workloads which spawns threads
> which have data sharing among them benefit, it could be false by default.

That's true. We'll try to address such cases in the next version with a
default behavior that's more conservative.

Tim
> 
> >   /*
> >    * Issue a WARN when we do multiple update_rq_clock() calls
> >    * in a single rq->lock section. Default disabled because the
>