[PATCH] Refactor switch_mm_cid() to avoid unnecessary checks

Ahmed Ehab posted 1 patch 1 year, 3 months ago
There is a newer version of this series
kernel/sched/core.c  | 15 +++++---
kernel/sched/sched.h | 86 ++++++++++++++++++++++++++------------------
2 files changed, 62 insertions(+), 39 deletions(-)
[PATCH] Refactor switch_mm_cid() to avoid unnecessary checks
Posted by Ahmed Ehab 1 year, 3 months ago
The issue is that we are checking if we are switching from {kerel,user}
to {kernel, user} multiple times unnecessarily.

To fix this, refactor switch_mm_cid() and break it into multiple methods
to hand the cases of switching from {kernel,user} to {kernel, user}.
Hence, we avoid any redundant checks.

Signed-off-by: Ahmed Ehab <bottaawesome633@gmail.com>
---
 kernel/sched/core.c  | 15 +++++---
 kernel/sched/sched.h | 86 ++++++++++++++++++++++++++------------------
 2 files changed, 62 insertions(+), 39 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f3951e4a55e5..abfa73f9c845 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5155,9 +5155,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
 		enter_lazy_tlb(prev->active_mm, next);
 
 		next->active_mm = prev->active_mm;
-		if (prev->mm)                           // from user
+		if (prev->mm) {                           // from user
 			mmgrab_lazy_tlb(prev->active_mm);
+			switch_mm_cid_from_user_to_kernel(rq, prev, next);
+		}
 		else
+			/*
+			 * kernel -> kernel transition does not change rq->curr->mm
+			 * state. It stays NULL.
+			 */
 			prev->active_mm = NULL;
 	} else {                                        // to user
 		membarrier_switch_mm(rq, prev->active_mm, next->mm);
@@ -5176,12 +5182,11 @@ context_switch(struct rq *rq, struct task_struct *prev,
 			/* will mmdrop_lazy_tlb() in finish_task_switch(). */
 			rq->prev_mm = prev->active_mm;
 			prev->active_mm = NULL;
-		}
+			switch_mm_cid_from_kernel_to_user(rq, prev, next);
+		} else
+			switch_mm_cid_from_user_to_user(rq, prev, next);
 	}
 
-	/* switch_mm_cid() requires the memory barriers above. */
-	switch_mm_cid(rq, prev, next);
-
 	prepare_lock_switch(rq, next, rf);
 
 	/* Here we just switch the register state and the stack. */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4c36cc680361..27fa050b81f5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -5,6 +5,7 @@
 #ifndef _KERNEL_SCHED_SCHED_H
 #define _KERNEL_SCHED_SCHED_H
 
+#include "asm-generic/barrier.h"
 #include <linux/sched/affinity.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/cpufreq.h>
@@ -3515,8 +3516,8 @@ static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
 }
 
 static inline void switch_mm_cid(struct rq *rq,
-				 struct task_struct *prev,
-				 struct task_struct *next)
+		struct task_struct *prev,
+		struct task_struct *next)
 {
 	/*
 	 * Provide a memory barrier between rq->curr store and load of
@@ -3524,38 +3525,6 @@ static inline void switch_mm_cid(struct rq *rq,
 	 *
 	 * Should be adapted if context_switch() is modified.
 	 */
-	if (!next->mm) {                                // to kernel
-		/*
-		 * user -> kernel transition does not guarantee a barrier, but
-		 * we can use the fact that it performs an atomic operation in
-		 * mmgrab().
-		 */
-		if (prev->mm)                           // from user
-			smp_mb__after_mmgrab();
-		/*
-		 * kernel -> kernel transition does not change rq->curr->mm
-		 * state. It stays NULL.
-		 */
-	} else {                                        // to user
-		/*
-		 * kernel -> user transition does not provide a barrier
-		 * between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
-		 * Provide it here.
-		 */
-		if (!prev->mm) {                        // from kernel
-			smp_mb();
-		} else {				// from user
-			/*
-			 * user->user transition relies on an implicit
-			 * memory barrier in switch_mm() when
-			 * current->mm changes. If the architecture
-			 * switch_mm() does not have an implicit memory
-			 * barrier, it is emitted here.  If current->mm
-			 * is unchanged, no barrier is needed.
-			 */
-			smp_mb__after_switch_mm();
-		}
-	}
 	if (prev->mm_cid_active) {
 		mm_cid_snapshot_time(rq, prev->mm);
 		mm_cid_put_lazy(prev);
@@ -3565,6 +3534,55 @@ static inline void switch_mm_cid(struct rq *rq,
 		next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm);
 }
 
+static inline void switch_mm_cid_from_user_to_kernel(struct rq *rq,
+		struct task_struct *prev,
+		struct task_struct *next)
+
+{
+	/**
+	 * user -> kernel transition does not guarantee a barrier, but
+	 * we can use the fact that it performs an atomic operation in
+	 * mmgrab().
+	 */
+	smp_mb__after_mmgrab();
+	switch_mm_cid(rq, prev, next);
+
+}
+
+static inline void switch_mm_cid_from_kernel_to_user(struct rq *rq,
+		struct task_struct *prev,
+		struct task_struct *next)
+
+{
+	/*
+	 * kernel -> user transition does not provide a barrier
+	 * between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
+	 * Provide it here.
+	 */
+	smp_mb();
+	switch_mm_cid(rq, prev, next);
+
+}
+
+
+static inline void switch_mm_cid_from_user_to_user(struct rq *rq,
+		struct task_struct *prev,
+		struct task_struct *next)
+
+{
+	/*
+	 * user->user transition relies on an implicit
+	 * memory barrier in switch_mm() when
+	 * current->mm changes. If the architecture
+	 * switch_mm() does not have an implicit memory
+	 * barrier, it is emitted here.  If current->mm
+	 * is unchanged, no barrier is needed.
+	 */
+	smp_mb__after_switch_mm();
+	switch_mm_cid(rq, prev, next);
+
+}
+
 #else /* !CONFIG_SCHED_MM_CID: */
 static inline void switch_mm_cid(struct rq *rq, struct task_struct *prev, struct task_struct *next) { }
 static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
-- 
2.46.0
Re: [PATCH] Refactor switch_mm_cid() to avoid unnecessary checks
Posted by kernel test robot 1 year, 3 months ago
Hi Ahmed,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on linus/master v6.11-rc5 next-20240826]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Ahmed-Ehab/Refactor-switch_mm_cid-to-avoid-unnecessary-checks/20240826-153216
base:   tip/sched/core
patch link:    https://lore.kernel.org/r/20240824223132.11925-1-bottaawesome633%40gmail.com
patch subject: [PATCH] Refactor switch_mm_cid() to avoid unnecessary checks
config: s390-allnoconfig (https://download.01.org/0day-ci/archive/20240827/202408270315.58WsW5Fq-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 08e5a1de8227512d4774a534b91cb2353cef6284)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240827/202408270315.58WsW5Fq-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408270315.58WsW5Fq-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from kernel/sched/core.c:10:
   In file included from include/linux/highmem.h:10:
   In file included from include/linux/mm.h:2228:
   include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from kernel/sched/core.c:34:
   In file included from include/linux/sched/isolation.h:7:
   In file included from include/linux/tick.h:8:
   In file included from include/linux/clockchips.h:14:
   In file included from include/linux/clocksource.h:22:
   In file included from arch/s390/include/asm/io.h:93:
   include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     548 |         val = __raw_readb(PCI_IOBASE + addr);
         |                           ~~~~~~~~~~ ^
   include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     561 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
      37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
         |                                                           ^
   include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
     102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
         |                                                      ^
   In file included from kernel/sched/core.c:34:
   In file included from include/linux/sched/isolation.h:7:
   In file included from include/linux/tick.h:8:
   In file included from include/linux/clockchips.h:14:
   In file included from include/linux/clocksource.h:22:
   In file included from arch/s390/include/asm/io.h:93:
   include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     574 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
      35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
         |                                                           ^
   include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
     115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
         |                                                      ^
   In file included from kernel/sched/core.c:34:
   In file included from include/linux/sched/isolation.h:7:
   In file included from include/linux/tick.h:8:
   In file included from include/linux/clockchips.h:14:
   In file included from include/linux/clocksource.h:22:
   In file included from arch/s390/include/asm/io.h:93:
   include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     585 |         __raw_writeb(value, PCI_IOBASE + addr);
         |                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     595 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     605 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:693:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     693 |         readsb(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:701:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     701 |         readsw(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:709:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     709 |         readsl(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:718:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     718 |         writesb(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:727:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     727 |         writesw(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:736:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     736 |         writesl(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
>> kernel/sched/core.c:5232:4: error: call to undeclared function 'switch_mm_cid_from_user_to_kernel'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
    5232 |                         switch_mm_cid_from_user_to_kernel(rq, prev, next);
         |                         ^
>> kernel/sched/core.c:5257:4: error: call to undeclared function 'switch_mm_cid_from_kernel_to_user'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
    5257 |                         switch_mm_cid_from_kernel_to_user(rq, prev, next);
         |                         ^
>> kernel/sched/core.c:5259:4: error: call to undeclared function 'switch_mm_cid_from_user_to_user'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
    5259 |                         switch_mm_cid_from_user_to_user(rq, prev, next);
         |                         ^
   13 warnings and 3 errors generated.


vim +/switch_mm_cid_from_user_to_kernel +5232 kernel/sched/core.c

  5199	
  5200	/*
  5201	 * context_switch - switch to the new MM and the new thread's register state.
  5202	 */
  5203	static __always_inline struct rq *
  5204	context_switch(struct rq *rq, struct task_struct *prev,
  5205		       struct task_struct *next, struct rq_flags *rf)
  5206	{
  5207		prepare_task_switch(rq, prev, next);
  5208	
  5209		/*
  5210		 * For paravirt, this is coupled with an exit in switch_to to
  5211		 * combine the page table reload and the switch backend into
  5212		 * one hypercall.
  5213		 */
  5214		arch_start_context_switch(prev);
  5215	
  5216		/*
  5217		 * kernel -> kernel   lazy + transfer active
  5218		 *   user -> kernel   lazy + mmgrab_lazy_tlb() active
  5219		 *
  5220		 * kernel ->   user   switch + mmdrop_lazy_tlb() active
  5221		 *   user ->   user   switch
  5222		 *
  5223		 * switch_mm_cid() needs to be updated if the barriers provided
  5224		 * by context_switch() are modified.
  5225		 */
  5226		if (!next->mm) {                                // to kernel
  5227			enter_lazy_tlb(prev->active_mm, next);
  5228	
  5229			next->active_mm = prev->active_mm;
  5230			if (prev->mm) {                           // from user
  5231				mmgrab_lazy_tlb(prev->active_mm);
> 5232				switch_mm_cid_from_user_to_kernel(rq, prev, next);
  5233			}
  5234			else
  5235				/*
  5236				 * kernel -> kernel transition does not change rq->curr->mm
  5237				 * state. It stays NULL.
  5238				 */
  5239				prev->active_mm = NULL;
  5240		} else {                                        // to user
  5241			membarrier_switch_mm(rq, prev->active_mm, next->mm);
  5242			/*
  5243			 * sys_membarrier() requires an smp_mb() between setting
  5244			 * rq->curr / membarrier_switch_mm() and returning to userspace.
  5245			 *
  5246			 * The below provides this either through switch_mm(), or in
  5247			 * case 'prev->active_mm == next->mm' through
  5248			 * finish_task_switch()'s mmdrop().
  5249			 */
  5250			switch_mm_irqs_off(prev->active_mm, next->mm, next);
  5251			lru_gen_use_mm(next->mm);
  5252	
  5253			if (!prev->mm) {                        // from kernel
  5254				/* will mmdrop_lazy_tlb() in finish_task_switch(). */
  5255				rq->prev_mm = prev->active_mm;
  5256				prev->active_mm = NULL;
> 5257				switch_mm_cid_from_kernel_to_user(rq, prev, next);
  5258			} else
> 5259				switch_mm_cid_from_user_to_user(rq, prev, next);
  5260		}
  5261	
  5262		prepare_lock_switch(rq, next, rf);
  5263	
  5264		/* Here we just switch the register state and the stack. */
  5265		switch_to(prev, next, prev);
  5266		barrier();
  5267	
  5268		return finish_task_switch(prev);
  5269	}
  5270	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] Refactor switch_mm_cid() to avoid unnecessary checks
Posted by kernel test robot 1 year, 3 months ago
Hi Ahmed,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on linus/master v6.11-rc5 next-20240826]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Ahmed-Ehab/Refactor-switch_mm_cid-to-avoid-unnecessary-checks/20240826-153216
base:   tip/sched/core
patch link:    https://lore.kernel.org/r/20240824223132.11925-1-bottaawesome633%40gmail.com
patch subject: [PATCH] Refactor switch_mm_cid() to avoid unnecessary checks
config: arm-ep93xx_defconfig (https://download.01.org/0day-ci/archive/20240827/202408270455.R85TrPfw-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240827/202408270455.R85TrPfw-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408270455.R85TrPfw-lkp@intel.com/

All errors (new ones prefixed by >>):

>> kernel/sched/core.c:5232:4: error: implicit declaration of function 'switch_mm_cid_from_user_to_kernel' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
                           switch_mm_cid_from_user_to_kernel(rq, prev, next);
                           ^
>> kernel/sched/core.c:5257:4: error: implicit declaration of function 'switch_mm_cid_from_kernel_to_user' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
                           switch_mm_cid_from_kernel_to_user(rq, prev, next);
                           ^
>> kernel/sched/core.c:5259:4: error: implicit declaration of function 'switch_mm_cid_from_user_to_user' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
                           switch_mm_cid_from_user_to_user(rq, prev, next);
                           ^
   3 errors generated.


vim +/switch_mm_cid_from_user_to_kernel +5232 kernel/sched/core.c

  5199	
  5200	/*
  5201	 * context_switch - switch to the new MM and the new thread's register state.
  5202	 */
  5203	static __always_inline struct rq *
  5204	context_switch(struct rq *rq, struct task_struct *prev,
  5205		       struct task_struct *next, struct rq_flags *rf)
  5206	{
  5207		prepare_task_switch(rq, prev, next);
  5208	
  5209		/*
  5210		 * For paravirt, this is coupled with an exit in switch_to to
  5211		 * combine the page table reload and the switch backend into
  5212		 * one hypercall.
  5213		 */
  5214		arch_start_context_switch(prev);
  5215	
  5216		/*
  5217		 * kernel -> kernel   lazy + transfer active
  5218		 *   user -> kernel   lazy + mmgrab_lazy_tlb() active
  5219		 *
  5220		 * kernel ->   user   switch + mmdrop_lazy_tlb() active
  5221		 *   user ->   user   switch
  5222		 *
  5223		 * switch_mm_cid() needs to be updated if the barriers provided
  5224		 * by context_switch() are modified.
  5225		 */
  5226		if (!next->mm) {                                // to kernel
  5227			enter_lazy_tlb(prev->active_mm, next);
  5228	
  5229			next->active_mm = prev->active_mm;
  5230			if (prev->mm) {                           // from user
  5231				mmgrab_lazy_tlb(prev->active_mm);
> 5232				switch_mm_cid_from_user_to_kernel(rq, prev, next);
  5233			}
  5234			else
  5235				/*
  5236				 * kernel -> kernel transition does not change rq->curr->mm
  5237				 * state. It stays NULL.
  5238				 */
  5239				prev->active_mm = NULL;
  5240		} else {                                        // to user
  5241			membarrier_switch_mm(rq, prev->active_mm, next->mm);
  5242			/*
  5243			 * sys_membarrier() requires an smp_mb() between setting
  5244			 * rq->curr / membarrier_switch_mm() and returning to userspace.
  5245			 *
  5246			 * The below provides this either through switch_mm(), or in
  5247			 * case 'prev->active_mm == next->mm' through
  5248			 * finish_task_switch()'s mmdrop().
  5249			 */
  5250			switch_mm_irqs_off(prev->active_mm, next->mm, next);
  5251			lru_gen_use_mm(next->mm);
  5252	
  5253			if (!prev->mm) {                        // from kernel
  5254				/* will mmdrop_lazy_tlb() in finish_task_switch(). */
  5255				rq->prev_mm = prev->active_mm;
  5256				prev->active_mm = NULL;
> 5257				switch_mm_cid_from_kernel_to_user(rq, prev, next);
  5258			} else
> 5259				switch_mm_cid_from_user_to_user(rq, prev, next);
  5260		}
  5261	
  5262		prepare_lock_switch(rq, next, rf);
  5263	
  5264		/* Here we just switch the register state and the stack. */
  5265		switch_to(prev, next, prev);
  5266		barrier();
  5267	
  5268		return finish_task_switch(prev);
  5269	}
  5270	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki