kernel/sched/core.c | 15 +++++--- kernel/sched/sched.h | 86 ++++++++++++++++++++++++++------------------ 2 files changed, 62 insertions(+), 39 deletions(-)
The issue is that we are checking if we are switching from {kerel,user}
to {kernel, user} multiple times unnecessarily.
To fix this, refactor switch_mm_cid() and break it into multiple methods
to hand the cases of switching from {kernel,user} to {kernel, user}.
Hence, we avoid any redundant checks.
Signed-off-by: Ahmed Ehab <bottaawesome633@gmail.com>
---
kernel/sched/core.c | 15 +++++---
kernel/sched/sched.h | 86 ++++++++++++++++++++++++++------------------
2 files changed, 62 insertions(+), 39 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f3951e4a55e5..abfa73f9c845 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5155,9 +5155,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
enter_lazy_tlb(prev->active_mm, next);
next->active_mm = prev->active_mm;
- if (prev->mm) // from user
+ if (prev->mm) { // from user
mmgrab_lazy_tlb(prev->active_mm);
+ switch_mm_cid_from_user_to_kernel(rq, prev, next);
+ }
else
+ /*
+ * kernel -> kernel transition does not change rq->curr->mm
+ * state. It stays NULL.
+ */
prev->active_mm = NULL;
} else { // to user
membarrier_switch_mm(rq, prev->active_mm, next->mm);
@@ -5176,12 +5182,11 @@ context_switch(struct rq *rq, struct task_struct *prev,
/* will mmdrop_lazy_tlb() in finish_task_switch(). */
rq->prev_mm = prev->active_mm;
prev->active_mm = NULL;
- }
+ switch_mm_cid_from_kernel_to_user(rq, prev, next);
+ } else
+ switch_mm_cid_from_user_to_user(rq, prev, next);
}
- /* switch_mm_cid() requires the memory barriers above. */
- switch_mm_cid(rq, prev, next);
-
prepare_lock_switch(rq, next, rf);
/* Here we just switch the register state and the stack. */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4c36cc680361..27fa050b81f5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -5,6 +5,7 @@
#ifndef _KERNEL_SCHED_SCHED_H
#define _KERNEL_SCHED_SCHED_H
+#include "asm-generic/barrier.h"
#include <linux/sched/affinity.h>
#include <linux/sched/autogroup.h>
#include <linux/sched/cpufreq.h>
@@ -3515,8 +3516,8 @@ static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
}
static inline void switch_mm_cid(struct rq *rq,
- struct task_struct *prev,
- struct task_struct *next)
+ struct task_struct *prev,
+ struct task_struct *next)
{
/*
* Provide a memory barrier between rq->curr store and load of
@@ -3524,38 +3525,6 @@ static inline void switch_mm_cid(struct rq *rq,
*
* Should be adapted if context_switch() is modified.
*/
- if (!next->mm) { // to kernel
- /*
- * user -> kernel transition does not guarantee a barrier, but
- * we can use the fact that it performs an atomic operation in
- * mmgrab().
- */
- if (prev->mm) // from user
- smp_mb__after_mmgrab();
- /*
- * kernel -> kernel transition does not change rq->curr->mm
- * state. It stays NULL.
- */
- } else { // to user
- /*
- * kernel -> user transition does not provide a barrier
- * between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
- * Provide it here.
- */
- if (!prev->mm) { // from kernel
- smp_mb();
- } else { // from user
- /*
- * user->user transition relies on an implicit
- * memory barrier in switch_mm() when
- * current->mm changes. If the architecture
- * switch_mm() does not have an implicit memory
- * barrier, it is emitted here. If current->mm
- * is unchanged, no barrier is needed.
- */
- smp_mb__after_switch_mm();
- }
- }
if (prev->mm_cid_active) {
mm_cid_snapshot_time(rq, prev->mm);
mm_cid_put_lazy(prev);
@@ -3565,6 +3534,55 @@ static inline void switch_mm_cid(struct rq *rq,
next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm);
}
+static inline void switch_mm_cid_from_user_to_kernel(struct rq *rq,
+ struct task_struct *prev,
+ struct task_struct *next)
+
+{
+ /**
+ * user -> kernel transition does not guarantee a barrier, but
+ * we can use the fact that it performs an atomic operation in
+ * mmgrab().
+ */
+ smp_mb__after_mmgrab();
+ switch_mm_cid(rq, prev, next);
+
+}
+
+static inline void switch_mm_cid_from_kernel_to_user(struct rq *rq,
+ struct task_struct *prev,
+ struct task_struct *next)
+
+{
+ /*
+ * kernel -> user transition does not provide a barrier
+ * between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
+ * Provide it here.
+ */
+ smp_mb();
+ switch_mm_cid(rq, prev, next);
+
+}
+
+
+static inline void switch_mm_cid_from_user_to_user(struct rq *rq,
+ struct task_struct *prev,
+ struct task_struct *next)
+
+{
+ /*
+ * user->user transition relies on an implicit
+ * memory barrier in switch_mm() when
+ * current->mm changes. If the architecture
+ * switch_mm() does not have an implicit memory
+ * barrier, it is emitted here. If current->mm
+ * is unchanged, no barrier is needed.
+ */
+ smp_mb__after_switch_mm();
+ switch_mm_cid(rq, prev, next);
+
+}
+
#else /* !CONFIG_SCHED_MM_CID: */
static inline void switch_mm_cid(struct rq *rq, struct task_struct *prev, struct task_struct *next) { }
static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
--
2.46.0
Hi Ahmed,
kernel test robot noticed the following build errors:
[auto build test ERROR on tip/sched/core]
[also build test ERROR on linus/master v6.11-rc5 next-20240826]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ahmed-Ehab/Refactor-switch_mm_cid-to-avoid-unnecessary-checks/20240826-153216
base: tip/sched/core
patch link: https://lore.kernel.org/r/20240824223132.11925-1-bottaawesome633%40gmail.com
patch subject: [PATCH] Refactor switch_mm_cid() to avoid unnecessary checks
config: s390-allnoconfig (https://download.01.org/0day-ci/archive/20240827/202408270315.58WsW5Fq-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 08e5a1de8227512d4774a534b91cb2353cef6284)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240827/202408270315.58WsW5Fq-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408270315.58WsW5Fq-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from kernel/sched/core.c:10:
In file included from include/linux/highmem.h:10:
In file included from include/linux/mm.h:2228:
include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
514 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
| ~~~~~~~~~~~ ^ ~~~
In file included from kernel/sched/core.c:34:
In file included from include/linux/sched/isolation.h:7:
In file included from include/linux/tick.h:8:
In file included from include/linux/clockchips.h:14:
In file included from include/linux/clocksource.h:22:
In file included from arch/s390/include/asm/io.h:93:
include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
548 | val = __raw_readb(PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
561 | val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
| ^
include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
| ^
In file included from kernel/sched/core.c:34:
In file included from include/linux/sched/isolation.h:7:
In file included from include/linux/tick.h:8:
In file included from include/linux/clockchips.h:14:
In file included from include/linux/clocksource.h:22:
In file included from arch/s390/include/asm/io.h:93:
include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
574 | val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
| ^
include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
| ^
In file included from kernel/sched/core.c:34:
In file included from include/linux/sched/isolation.h:7:
In file included from include/linux/tick.h:8:
In file included from include/linux/clockchips.h:14:
In file included from include/linux/clocksource.h:22:
In file included from arch/s390/include/asm/io.h:93:
include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
585 | __raw_writeb(value, PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
595 | __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
605 | __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:693:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
693 | readsb(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:701:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
701 | readsw(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:709:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
709 | readsl(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:718:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
718 | writesb(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:727:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
727 | writesw(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:736:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
736 | writesl(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
>> kernel/sched/core.c:5232:4: error: call to undeclared function 'switch_mm_cid_from_user_to_kernel'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
5232 | switch_mm_cid_from_user_to_kernel(rq, prev, next);
| ^
>> kernel/sched/core.c:5257:4: error: call to undeclared function 'switch_mm_cid_from_kernel_to_user'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
5257 | switch_mm_cid_from_kernel_to_user(rq, prev, next);
| ^
>> kernel/sched/core.c:5259:4: error: call to undeclared function 'switch_mm_cid_from_user_to_user'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
5259 | switch_mm_cid_from_user_to_user(rq, prev, next);
| ^
13 warnings and 3 errors generated.
vim +/switch_mm_cid_from_user_to_kernel +5232 kernel/sched/core.c
5199
5200 /*
5201 * context_switch - switch to the new MM and the new thread's register state.
5202 */
5203 static __always_inline struct rq *
5204 context_switch(struct rq *rq, struct task_struct *prev,
5205 struct task_struct *next, struct rq_flags *rf)
5206 {
5207 prepare_task_switch(rq, prev, next);
5208
5209 /*
5210 * For paravirt, this is coupled with an exit in switch_to to
5211 * combine the page table reload and the switch backend into
5212 * one hypercall.
5213 */
5214 arch_start_context_switch(prev);
5215
5216 /*
5217 * kernel -> kernel lazy + transfer active
5218 * user -> kernel lazy + mmgrab_lazy_tlb() active
5219 *
5220 * kernel -> user switch + mmdrop_lazy_tlb() active
5221 * user -> user switch
5222 *
5223 * switch_mm_cid() needs to be updated if the barriers provided
5224 * by context_switch() are modified.
5225 */
5226 if (!next->mm) { // to kernel
5227 enter_lazy_tlb(prev->active_mm, next);
5228
5229 next->active_mm = prev->active_mm;
5230 if (prev->mm) { // from user
5231 mmgrab_lazy_tlb(prev->active_mm);
> 5232 switch_mm_cid_from_user_to_kernel(rq, prev, next);
5233 }
5234 else
5235 /*
5236 * kernel -> kernel transition does not change rq->curr->mm
5237 * state. It stays NULL.
5238 */
5239 prev->active_mm = NULL;
5240 } else { // to user
5241 membarrier_switch_mm(rq, prev->active_mm, next->mm);
5242 /*
5243 * sys_membarrier() requires an smp_mb() between setting
5244 * rq->curr / membarrier_switch_mm() and returning to userspace.
5245 *
5246 * The below provides this either through switch_mm(), or in
5247 * case 'prev->active_mm == next->mm' through
5248 * finish_task_switch()'s mmdrop().
5249 */
5250 switch_mm_irqs_off(prev->active_mm, next->mm, next);
5251 lru_gen_use_mm(next->mm);
5252
5253 if (!prev->mm) { // from kernel
5254 /* will mmdrop_lazy_tlb() in finish_task_switch(). */
5255 rq->prev_mm = prev->active_mm;
5256 prev->active_mm = NULL;
> 5257 switch_mm_cid_from_kernel_to_user(rq, prev, next);
5258 } else
> 5259 switch_mm_cid_from_user_to_user(rq, prev, next);
5260 }
5261
5262 prepare_lock_switch(rq, next, rf);
5263
5264 /* Here we just switch the register state and the stack. */
5265 switch_to(prev, next, prev);
5266 barrier();
5267
5268 return finish_task_switch(prev);
5269 }
5270
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Ahmed,
kernel test robot noticed the following build errors:
[auto build test ERROR on tip/sched/core]
[also build test ERROR on linus/master v6.11-rc5 next-20240826]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ahmed-Ehab/Refactor-switch_mm_cid-to-avoid-unnecessary-checks/20240826-153216
base: tip/sched/core
patch link: https://lore.kernel.org/r/20240824223132.11925-1-bottaawesome633%40gmail.com
patch subject: [PATCH] Refactor switch_mm_cid() to avoid unnecessary checks
config: arm-ep93xx_defconfig (https://download.01.org/0day-ci/archive/20240827/202408270455.R85TrPfw-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240827/202408270455.R85TrPfw-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408270455.R85TrPfw-lkp@intel.com/
All errors (new ones prefixed by >>):
>> kernel/sched/core.c:5232:4: error: implicit declaration of function 'switch_mm_cid_from_user_to_kernel' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
switch_mm_cid_from_user_to_kernel(rq, prev, next);
^
>> kernel/sched/core.c:5257:4: error: implicit declaration of function 'switch_mm_cid_from_kernel_to_user' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
switch_mm_cid_from_kernel_to_user(rq, prev, next);
^
>> kernel/sched/core.c:5259:4: error: implicit declaration of function 'switch_mm_cid_from_user_to_user' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
switch_mm_cid_from_user_to_user(rq, prev, next);
^
3 errors generated.
vim +/switch_mm_cid_from_user_to_kernel +5232 kernel/sched/core.c
5199
5200 /*
5201 * context_switch - switch to the new MM and the new thread's register state.
5202 */
5203 static __always_inline struct rq *
5204 context_switch(struct rq *rq, struct task_struct *prev,
5205 struct task_struct *next, struct rq_flags *rf)
5206 {
5207 prepare_task_switch(rq, prev, next);
5208
5209 /*
5210 * For paravirt, this is coupled with an exit in switch_to to
5211 * combine the page table reload and the switch backend into
5212 * one hypercall.
5213 */
5214 arch_start_context_switch(prev);
5215
5216 /*
5217 * kernel -> kernel lazy + transfer active
5218 * user -> kernel lazy + mmgrab_lazy_tlb() active
5219 *
5220 * kernel -> user switch + mmdrop_lazy_tlb() active
5221 * user -> user switch
5222 *
5223 * switch_mm_cid() needs to be updated if the barriers provided
5224 * by context_switch() are modified.
5225 */
5226 if (!next->mm) { // to kernel
5227 enter_lazy_tlb(prev->active_mm, next);
5228
5229 next->active_mm = prev->active_mm;
5230 if (prev->mm) { // from user
5231 mmgrab_lazy_tlb(prev->active_mm);
> 5232 switch_mm_cid_from_user_to_kernel(rq, prev, next);
5233 }
5234 else
5235 /*
5236 * kernel -> kernel transition does not change rq->curr->mm
5237 * state. It stays NULL.
5238 */
5239 prev->active_mm = NULL;
5240 } else { // to user
5241 membarrier_switch_mm(rq, prev->active_mm, next->mm);
5242 /*
5243 * sys_membarrier() requires an smp_mb() between setting
5244 * rq->curr / membarrier_switch_mm() and returning to userspace.
5245 *
5246 * The below provides this either through switch_mm(), or in
5247 * case 'prev->active_mm == next->mm' through
5248 * finish_task_switch()'s mmdrop().
5249 */
5250 switch_mm_irqs_off(prev->active_mm, next->mm, next);
5251 lru_gen_use_mm(next->mm);
5252
5253 if (!prev->mm) { // from kernel
5254 /* will mmdrop_lazy_tlb() in finish_task_switch(). */
5255 rq->prev_mm = prev->active_mm;
5256 prev->active_mm = NULL;
> 5257 switch_mm_cid_from_kernel_to_user(rq, prev, next);
5258 } else
> 5259 switch_mm_cid_from_user_to_user(rq, prev, next);
5260 }
5261
5262 prepare_lock_switch(rq, next, rf);
5263
5264 /* Here we just switch the register state and the stack. */
5265 switch_to(prev, next, prev);
5266 barrier();
5267
5268 return finish_task_switch(prev);
5269 }
5270
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.