big header dependency cleanup targeting sched.h

[PATCH 11/50] nodemask: Split out include/linux/nodemask_types.h

Posted by Kent Overstreet 2 years, 1 month ago

sched.h, which defines task_struct, needs nodemask_t - but sched.h is a
frequently used header and ideally shouldn't be pulling in any more code
that it needs to.

This splits out nodemask_types.h which has the definition sched.h needs,
which will avoid a circular header dependency in the alloc tagging patch
series, and as a bonus should speed up kernel build times.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/nodemask.h       |  2 +-
 include/linux/nodemask_types.h | 10 ++++++++++
 include/linux/sched.h          |  2 +-
 3 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/nodemask_types.h

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 8d07116caaf1..b61438313a73 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -93,10 +93,10 @@
 #include <linux/threads.h>
 #include <linux/bitmap.h>
 #include <linux/minmax.h>
+#include <linux/nodemask_types.h>
 #include <linux/numa.h>
 #include <linux/random.h>
 
-typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
 extern nodemask_t _unused_nodemask_arg_;
 
 /**
diff --git a/include/linux/nodemask_types.h b/include/linux/nodemask_types.h
new file mode 100644
index 000000000000..6b28d97ea6ed
--- /dev/null
+++ b/include/linux/nodemask_types.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_NODEMASK_TYPES_H
+#define __LINUX_NODEMASK_TYPES_H
+
+#include <linux/bitops.h>
+#include <linux/numa.h>
+
+typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
+
+#endif /* __LINUX_NODEMASK_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 292c31697248..5a5b7b122682 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -20,7 +20,7 @@
 #include <linux/hrtimer.h>
 #include <linux/irqflags.h>
 #include <linux/seccomp.h>
-#include <linux/nodemask.h>
+#include <linux/nodemask_types.h>
 #include <linux/rcupdate.h>
 #include <linux/refcount.h>
 #include <linux/resource.h>
-- 
2.43.0

[PATCH 12/50] prandom: Remove unused include

Posted by Kent Overstreet 2 years, 1 month ago

prandom.h doesn't use percpu.h - this fixes some circular header issues.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 include/linux/prandom.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index f2ed5b72b3d6..f7f1e5251c67 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -10,7 +10,6 @@
 
 #include <linux/types.h>
 #include <linux/once.h>
-#include <linux/percpu.h>
 #include <linux/random.h>
 
 struct rnd_state {
-- 
2.43.0

Re: [PATCH 12/50] prandom: Remove unused include

Posted by Randy Dunlap 2 years, 1 month ago


On 12/15/23 19:26, Kent Overstreet wrote:
> prandom.h doesn't use percpu.h - this fixes some circular header issues.
> 
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> ---
>  include/linux/prandom.h | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/include/linux/prandom.h b/include/linux/prandom.h
> index f2ed5b72b3d6..f7f1e5251c67 100644
> --- a/include/linux/prandom.h
> +++ b/include/linux/prandom.h
> @@ -10,7 +10,6 @@
>  
>  #include <linux/types.h>
>  #include <linux/once.h>
> -#include <linux/percpu.h>
>  #include <linux/random.h>
>  
>  struct rnd_state {

In this header file:

    22	void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);

so where does it get __percpu from?

-- 
#Randy
https://people.kernel.org/tglx/notes-about-netiquette
https://subspace.kernel.org/etiquette.html

Re: [PATCH 12/50] prandom: Remove unused include

Posted by Kent Overstreet 2 years, 1 month ago

On Sat, Dec 16, 2023 at 10:52:04AM -0800, Randy Dunlap wrote:
> 
> 
> On 12/15/23 19:26, Kent Overstreet wrote:
> > prandom.h doesn't use percpu.h - this fixes some circular header issues.
> > 
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> > ---
> >  include/linux/prandom.h | 1 -
> >  1 file changed, 1 deletion(-)
> > 
> > diff --git a/include/linux/prandom.h b/include/linux/prandom.h
> > index f2ed5b72b3d6..f7f1e5251c67 100644
> > --- a/include/linux/prandom.h
> > +++ b/include/linux/prandom.h
> > @@ -10,7 +10,6 @@
> >  
> >  #include <linux/types.h>
> >  #include <linux/once.h>
> > -#include <linux/percpu.h>
> >  #include <linux/random.h>
> >  
> >  struct rnd_state {
> 
> In this header file:
> 
>     22	void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
> 
> so where does it get __percpu from?

That comes from compiler.h -> compiler_types.h... cscope :)

[PATCH 13/50] timekeeping: Kill percpu.h dependency

Posted by Kent Overstreet 2 years, 1 month ago

Slimming down recursive header includes.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h        | 2 +-
 include/linux/time_namespace.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index f2044d5a652b..02d264ca9dce 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -16,7 +16,7 @@
 #include <linux/rbtree.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/percpu.h>
+#include <linux/percpu-defs.h>
 #include <linux/seqlock.h>
 #include <linux/timer.h>
 #include <linux/timerqueue.h>
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 5258d81cef17..876e31b4461d 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -12,6 +12,8 @@
 struct user_namespace;
 extern struct user_namespace init_user_ns;
 
+struct vm_area_struct;
+
 struct timens_offsets {
 	struct timespec64 monotonic;
 	struct timespec64 boottime;
-- 
2.43.0

[PATCH 14/50] arm64: Fix circular header dependency

Posted by Kent Overstreet 2 years, 1 month ago

Replace linux/percpu.h include with asm/percpu.h to avoid circular
dependency.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 arch/arm64/include/asm/spectre.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index 06c357d83b13..0c4d9045c31f 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -13,8 +13,8 @@
 #define __BP_HARDEN_HYP_VECS_SZ	((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K)
 
 #ifndef __ASSEMBLY__
-
-#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <asm/percpu.h>
 
 #include <asm/cpufeature.h>
 #include <asm/virt.h>
-- 
2.43.0

[PATCH 15/50] kernel/numa.c: Move logging out of numa.h

Posted by Kent Overstreet 2 years, 1 month ago

Moving these stub functions to a .c file means we can kill a sched.h
dependency on printk.h.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/numa.h | 18 +++++-------------
 kernel/Makefile      |  1 +
 kernel/numa.c        | 24 ++++++++++++++++++++++++
 3 files changed, 30 insertions(+), 13 deletions(-)
 create mode 100644 kernel/numa.c

diff --git a/include/linux/numa.h b/include/linux/numa.h
index a904861de800..aeab3d9f57ae 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -22,34 +22,26 @@
 #endif
 
 #ifdef CONFIG_NUMA
-#include <linux/printk.h>
 #include <asm/sparsemem.h>
 
 /* Generic implementation available */
 int numa_nearest_node(int node, unsigned int state);
 
 #ifndef memory_add_physaddr_to_nid
-static inline int memory_add_physaddr_to_nid(u64 start)
-{
-	pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
-			start);
-	return 0;
-}
+int memory_add_physaddr_to_nid(u64 start);
 #endif
+
 #ifndef phys_to_target_node
-static inline int phys_to_target_node(u64 start)
-{
-	pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
-			start);
-	return 0;
-}
+int phys_to_target_node(u64 start);
 #endif
+
 #ifndef numa_fill_memblks
 static inline int __init numa_fill_memblks(u64 start, u64 end)
 {
 	return NUMA_NO_MEMBLK;
 }
 #endif
+
 #else /* !CONFIG_NUMA */
 static inline int numa_nearest_node(int node, unsigned int state)
 {
diff --git a/kernel/Makefile b/kernel/Makefile
index 3947122d618b..ce105a5558fc 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -114,6 +114,7 @@ obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
 obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
 obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o
 obj-$(CONFIG_CFI_CLANG) += cfi.o
+obj-$(CONFIG_NUMA) += numa.o
 
 obj-$(CONFIG_PERF_EVENTS) += events/
 
diff --git a/kernel/numa.c b/kernel/numa.c
new file mode 100644
index 000000000000..c24c72f45989
--- /dev/null
+++ b/kernel/numa.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/printk.h>
+#include <linux/numa.h>
+
+/* Stub functions: */
+
+#ifndef memory_add_physaddr_to_nid
+int memory_add_physaddr_to_nid(u64 start)
+{
+	pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
+			start);
+	return 0;
+}
+#endif
+
+#ifndef phys_to_target_node
+int phys_to_target_node(u64 start)
+{
+	pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
+			start);
+	return 0;
+}
+#endif
-- 
2.43.0

Re: [PATCH 15/50] kernel/numa.c: Move logging out of numa.h

Posted by Matthew Wilcox 2 years, 1 month ago

On Fri, Dec 15, 2023 at 10:26:14PM -0500, Kent Overstreet wrote:
> diff --git a/kernel/numa.c b/kernel/numa.c
> new file mode 100644
> index 000000000000..c24c72f45989
> --- /dev/null
> +++ b/kernel/numa.c

Should this be a new file or would these functions fit better in, eg,
mempolicy.c which is already built only if CONFIG_NUMA?

Re: [PATCH 15/50] kernel/numa.c: Move logging out of numa.h

Posted by Kent Overstreet 2 years, 1 month ago

On Tue, Dec 19, 2023 at 10:52:33PM +0000, Matthew Wilcox wrote:
> On Fri, Dec 15, 2023 at 10:26:14PM -0500, Kent Overstreet wrote:
> > diff --git a/kernel/numa.c b/kernel/numa.c
> > new file mode 100644
> > index 000000000000..c24c72f45989
> > --- /dev/null
> > +++ b/kernel/numa.c
> 
> Should this be a new file or would these functions fit better in, eg,
> mempolicy.c which is already built only if CONFIG_NUMA?

that does look like a bit of a disorganized dumping ground though, I
wonder if anyone would want to start mm/numa/ and do a bit of
organizing?

Re: [PATCH 15/50] kernel/numa.c: Move logging out of numa.h

Posted by Nathan Chancellor 2 years, 1 month ago

On Fri, Dec 15, 2023 at 10:26:14PM -0500, Kent Overstreet wrote:
> Moving these stub functions to a .c file means we can kill a sched.h
> dependency on printk.h.
> 
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
>  include/linux/numa.h | 18 +++++-------------
>  kernel/Makefile      |  1 +
>  kernel/numa.c        | 24 ++++++++++++++++++++++++
>  3 files changed, 30 insertions(+), 13 deletions(-)
>  create mode 100644 kernel/numa.c
> 
> diff --git a/include/linux/numa.h b/include/linux/numa.h
> index a904861de800..aeab3d9f57ae 100644
> --- a/include/linux/numa.h
> +++ b/include/linux/numa.h
> @@ -22,34 +22,26 @@
>  #endif
>  
>  #ifdef CONFIG_NUMA
> -#include <linux/printk.h>
>  #include <asm/sparsemem.h>
>  
>  /* Generic implementation available */
>  int numa_nearest_node(int node, unsigned int state);
>  
>  #ifndef memory_add_physaddr_to_nid
> -static inline int memory_add_physaddr_to_nid(u64 start)
> -{
> -	pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
> -			start);
> -	return 0;
> -}
> +int memory_add_physaddr_to_nid(u64 start);
>  #endif
> +
>  #ifndef phys_to_target_node
> -static inline int phys_to_target_node(u64 start)
> -{
> -	pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
> -			start);
> -	return 0;
> -}
> +int phys_to_target_node(u64 start);
>  #endif
> +
>  #ifndef numa_fill_memblks
>  static inline int __init numa_fill_memblks(u64 start, u64 end)
>  {
>  	return NUMA_NO_MEMBLK;
>  }
>  #endif
> +
>  #else /* !CONFIG_NUMA */
>  static inline int numa_nearest_node(int node, unsigned int state)
>  {
> diff --git a/kernel/Makefile b/kernel/Makefile
> index 3947122d618b..ce105a5558fc 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -114,6 +114,7 @@ obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
>  obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
>  obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o
>  obj-$(CONFIG_CFI_CLANG) += cfi.o
> +obj-$(CONFIG_NUMA) += numa.o
>  
>  obj-$(CONFIG_PERF_EVENTS) += events/
>  
> diff --git a/kernel/numa.c b/kernel/numa.c
> new file mode 100644
> index 000000000000..c24c72f45989
> --- /dev/null
> +++ b/kernel/numa.c
> @@ -0,0 +1,24 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#include <linux/printk.h>
> +#include <linux/numa.h>
> +
> +/* Stub functions: */
> +
> +#ifndef memory_add_physaddr_to_nid
> +int memory_add_physaddr_to_nid(u64 start)
> +{
> +	pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
> +			start);
> +	return 0;
> +}
> +#endif
> +
> +#ifndef phys_to_target_node
> +int phys_to_target_node(u64 start)
> +{
> +	pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
> +			start);
> +	return 0;
> +}
> +#endif
> -- 
> 2.43.0
> 

These need EXPORT_SYMBOL_GPL() now like the architecture specific
implementations because they are no longer inlined. My arm64 builds fail
with:

  ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/acpi/nfit/nfit.ko] undefined!
  ERROR: modpost: "phys_to_target_node" [drivers/acpi/nfit/nfit.ko] undefined!
  ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/virtio/virtio_mem.ko] undefined!
  ERROR: modpost: "phys_to_target_node" [drivers/dax/dax_cxl.ko] undefined!
  ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/dax/dax_cxl.ko] undefined!
  ERROR: modpost: "phys_to_target_node" [drivers/cxl/cxl_acpi.ko] undefined!
  ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/cxl/cxl_pmem.ko] undefined!
  ERROR: modpost: "phys_to_target_node" [drivers/cxl/cxl_pmem.ko] undefined!
  ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/hv/hv_balloon.ko] undefined!

Cheers,
Nathan

Re: [PATCH 15/50] kernel/numa.c: Move logging out of numa.h

Posted by Kent Overstreet 2 years, 1 month ago

On Tue, Dec 19, 2023 at 09:36:44AM -0700, Nathan Chancellor wrote:
> On Fri, Dec 15, 2023 at 10:26:14PM -0500, Kent Overstreet wrote:
> These need EXPORT_SYMBOL_GPL() now like the architecture specific
> implementations because they are no longer inlined. My arm64 builds fail
> with:
> 
>   ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/acpi/nfit/nfit.ko] undefined!
>   ERROR: modpost: "phys_to_target_node" [drivers/acpi/nfit/nfit.ko] undefined!
>   ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/virtio/virtio_mem.ko] undefined!
>   ERROR: modpost: "phys_to_target_node" [drivers/dax/dax_cxl.ko] undefined!
>   ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/dax/dax_cxl.ko] undefined!
>   ERROR: modpost: "phys_to_target_node" [drivers/cxl/cxl_acpi.ko] undefined!
>   ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/cxl/cxl_pmem.ko] undefined!
>   ERROR: modpost: "phys_to_target_node" [drivers/cxl/cxl_pmem.ko] undefined!
>   ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/hv/hv_balloon.ko] undefined!

Applied the following:


commit 7ae175e405b44b9897c04bbf177e3e08ab25710a
Author: Kent Overstreet <kent.overstreet@linux.dev>
Date:   Tue Dec 19 16:02:26 2023 -0500

    fixup! kernel/numa.c: Move logging out of numa.h

diff --git a/kernel/numa.c b/kernel/numa.c
index c24c72f45989..67ca6b8585c0 100644
--- a/kernel/numa.c
+++ b/kernel/numa.c
@@ -12,6 +12,7 @@ int memory_add_physaddr_to_nid(u64 start)
 			start);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 
 #ifndef phys_to_target_node
@@ -21,4 +22,5 @@ int phys_to_target_node(u64 start)
 			start);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(phys_to_target_node);
 #endif

[PATCH 16/50] sched.h: Move (spin|rwlock)_needbreak() to spinlock.h

Posted by Kent Overstreet 2 years, 1 month ago

This lets us kill the dependency on spinlock.h.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/sched.h    | 31 -------------------------------
 include/linux/spinlock.h | 31 +++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a5b7b122682..7501a3451a20 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2227,37 +2227,6 @@ static inline bool preempt_model_preemptible(void)
 	return preempt_model_full() || preempt_model_rt();
 }
 
-/*
- * Does a critical section need to be broken due to another
- * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
- * but a general need for low latency)
- */
-static inline int spin_needbreak(spinlock_t *lock)
-{
-#ifdef CONFIG_PREEMPTION
-	return spin_is_contended(lock);
-#else
-	return 0;
-#endif
-}
-
-/*
- * Check if a rwlock is contended.
- * Returns non-zero if there is another task waiting on the rwlock.
- * Returns zero if the lock is not contended or the system / underlying
- * rwlock implementation does not support contention detection.
- * Technically does not depend on CONFIG_PREEMPTION, but a general need
- * for low latency.
- */
-static inline int rwlock_needbreak(rwlock_t *lock)
-{
-#ifdef CONFIG_PREEMPTION
-	return rwlock_is_contended(lock);
-#else
-	return 0;
-#endif
-}
-
 static __always_inline bool need_resched(void)
 {
 	return unlikely(tif_need_resched());
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 31d3d747a9db..0c71f06454d9 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -449,6 +449,37 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
 	return raw_spin_is_contended(&lock->rlock);
 }
 
+/*
+ * Does a critical section need to be broken due to another
+ * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
+ * but a general need for low latency)
+ */
+static inline int spin_needbreak(spinlock_t *lock)
+{
+#ifdef CONFIG_PREEMPTION
+	return spin_is_contended(lock);
+#else
+	return 0;
+#endif
+}
+
+/*
+ * Check if a rwlock is contended.
+ * Returns non-zero if there is another task waiting on the rwlock.
+ * Returns zero if the lock is not contended or the system / underlying
+ * rwlock implementation does not support contention detection.
+ * Technically does not depend on CONFIG_PREEMPTION, but a general need
+ * for low latency.
+ */
+static inline int rwlock_needbreak(rwlock_t *lock)
+{
+#ifdef CONFIG_PREEMPTION
+	return rwlock_is_contended(lock);
+#else
+	return 0;
+#endif
+}
+
 #define assert_spin_locked(lock)	assert_raw_spin_locked(&(lock)->rlock)
 
 #else  /* !CONFIG_PREEMPT_RT */
-- 
2.43.0

Re: [PATCH 16/50] sched.h: Move (spin|rwlock)_needbreak() to spinlock.h

Posted by Leonardo Bras 2 years ago

On Fri, Dec 15, 2023 at 10:26:15PM -0500, Kent Overstreet wrote:
> This lets us kill the dependency on spinlock.h.
> 
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
>  include/linux/sched.h    | 31 -------------------------------
>  include/linux/spinlock.h | 31 +++++++++++++++++++++++++++++++
>  2 files changed, 31 insertions(+), 31 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 5a5b7b122682..7501a3451a20 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -2227,37 +2227,6 @@ static inline bool preempt_model_preemptible(void)
>  	return preempt_model_full() || preempt_model_rt();
>  }
>  
> -/*
> - * Does a critical section need to be broken due to another
> - * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
> - * but a general need for low latency)
> - */
> -static inline int spin_needbreak(spinlock_t *lock)
> -{
> -#ifdef CONFIG_PREEMPTION
> -	return spin_is_contended(lock);
> -#else
> -	return 0;
> -#endif
> -}
> -
> -/*
> - * Check if a rwlock is contended.
> - * Returns non-zero if there is another task waiting on the rwlock.
> - * Returns zero if the lock is not contended or the system / underlying
> - * rwlock implementation does not support contention detection.
> - * Technically does not depend on CONFIG_PREEMPTION, but a general need
> - * for low latency.
> - */
> -static inline int rwlock_needbreak(rwlock_t *lock)
> -{
> -#ifdef CONFIG_PREEMPTION
> -	return rwlock_is_contended(lock);
> -#else
> -	return 0;
> -#endif
> -}
> -
>  static __always_inline bool need_resched(void)
>  {
>  	return unlikely(tif_need_resched());
> diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
> index 31d3d747a9db..0c71f06454d9 100644
> --- a/include/linux/spinlock.h
> +++ b/include/linux/spinlock.h
> @@ -449,6 +449,37 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
>  	return raw_spin_is_contended(&lock->rlock);
>  }
>  
> +/*
> + * Does a critical section need to be broken due to another
> + * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
> + * but a general need for low latency)
> + */
> +static inline int spin_needbreak(spinlock_t *lock)
> +{
> +#ifdef CONFIG_PREEMPTION
> +	return spin_is_contended(lock);
> +#else
> +	return 0;
> +#endif
> +}
> +
> +/*
> + * Check if a rwlock is contended.
> + * Returns non-zero if there is another task waiting on the rwlock.
> + * Returns zero if the lock is not contended or the system / underlying
> + * rwlock implementation does not support contention detection.
> + * Technically does not depend on CONFIG_PREEMPTION, but a general need
> + * for low latency.
> + */
> +static inline int rwlock_needbreak(rwlock_t *lock)
> +{
> +#ifdef CONFIG_PREEMPTION
> +	return rwlock_is_contended(lock);
> +#else
> +	return 0;
> +#endif
> +}
> +
>  #define assert_spin_locked(lock)	assert_raw_spin_locked(&(lock)->rlock)
>  
>  #else  /* !CONFIG_PREEMPT_RT */
> -- 
> 2.43.0



Hello Kent,

This patch is breaking PREEMPT_RT builds, but it can be easily fixed.

I sent a patch on the fix, please take a look:
https://lore.kernel.org/all/20240115201935.2326400-1-leobras@redhat.com/

Thanks!
Leo

[PATCH 17/50] ktime.h: move ktime_t to types.h

Posted by Kent Overstreet 2 years, 1 month ago

ktime.h pulls in quite a few headers recursively (including printk.h) -
this is going to help with trimming sched.h dependencies.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/ktime.h | 8 +++-----
 include/linux/types.h | 3 +++
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 73f20deb497d..3a4e723eae0f 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -21,12 +21,10 @@
 #ifndef _LINUX_KTIME_H
 #define _LINUX_KTIME_H
 
-#include <linux/time.h>
-#include <linux/jiffies.h>
 #include <asm/bug.h>
-
-/* Nanosecond scalar representation for kernel time values */
-typedef s64	ktime_t;
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/types.h>
 
 /**
  * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value
diff --git a/include/linux/types.h b/include/linux/types.h
index 253168bb3fe1..2bc8766ba20c 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -120,6 +120,9 @@ typedef s64			int64_t;
 #define aligned_be64		__aligned_be64
 #define aligned_le64		__aligned_le64
 
+/* Nanosecond scalar representation for kernel time values */
+typedef s64	ktime_t;
+
 /**
  * The type used for indexing onto a disc or disc partition.
  *
-- 
2.43.0

[PATCH 18/50] hrtimers: Split out hrtimer_types.h

Posted by Kent Overstreet 2 years, 1 month ago

We need to reduce the scope of what's included in sched.h: task_struct
includes a hrtimer, so split out the core types into their own header.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h       | 44 ++----------------------------
 include/linux/hrtimer_types.h | 50 +++++++++++++++++++++++++++++++++++
 include/linux/sched.h         |  2 +-
 3 files changed, 53 insertions(+), 43 deletions(-)
 create mode 100644 include/linux/hrtimer_types.h

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 02d264ca9dce..87e3bedf8eb0 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -13,13 +13,13 @@
 #define _LINUX_HRTIMER_H
 
 #include <linux/hrtimer_defs.h>
-#include <linux/rbtree.h>
+#include <linux/hrtimer_types.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/percpu-defs.h>
+#include <linux/rbtree.h>
 #include <linux/seqlock.h>
 #include <linux/timer.h>
-#include <linux/timerqueue.h>
 
 struct hrtimer_clock_base;
 struct hrtimer_cpu_base;
@@ -59,14 +59,6 @@ enum hrtimer_mode {
 	HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
 };
 
-/*
- * Return values for the callback function
- */
-enum hrtimer_restart {
-	HRTIMER_NORESTART,	/* Timer is not restarted */
-	HRTIMER_RESTART,	/* Timer must be restarted */
-};
-
 /*
  * Values to track state of the timer
  *
@@ -94,38 +86,6 @@ enum hrtimer_restart {
 #define HRTIMER_STATE_INACTIVE	0x00
 #define HRTIMER_STATE_ENQUEUED	0x01
 
-/**
- * struct hrtimer - the basic hrtimer structure
- * @node:	timerqueue node, which also manages node.expires,
- *		the absolute expiry time in the hrtimers internal
- *		representation. The time is related to the clock on
- *		which the timer is based. Is setup by adding
- *		slack to the _softexpires value. For non range timers
- *		identical to _softexpires.
- * @_softexpires: the absolute earliest expiry time of the hrtimer.
- *		The time which was given as expiry time when the timer
- *		was armed.
- * @function:	timer expiry callback function
- * @base:	pointer to the timer base (per cpu and per clock)
- * @state:	state information (See bit values above)
- * @is_rel:	Set if the timer was armed relative
- * @is_soft:	Set if hrtimer will be expired in soft interrupt context.
- * @is_hard:	Set if hrtimer will be expired in hard interrupt context
- *		even on RT.
- *
- * The hrtimer structure must be initialized by hrtimer_init()
- */
-struct hrtimer {
-	struct timerqueue_node		node;
-	ktime_t				_softexpires;
-	enum hrtimer_restart		(*function)(struct hrtimer *);
-	struct hrtimer_clock_base	*base;
-	u8				state;
-	u8				is_rel;
-	u8				is_soft;
-	u8				is_hard;
-};
-
 /**
  * struct hrtimer_sleeper - simple sleeper structure
  * @timer:	embedded timer structure
diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h
new file mode 100644
index 000000000000..f4ef391b96a7
--- /dev/null
+++ b/include/linux/hrtimer_types.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_HRTIMER_TYPES_H
+#define _LINUX_HRTIMER_TYPES_H
+
+#include <linux/types.h>
+#include <linux/timerqueue.h>
+
+struct hrtimer_clock_base;
+
+/*
+ * Return values for the callback function
+ */
+enum hrtimer_restart {
+	HRTIMER_NORESTART,	/* Timer is not restarted */
+	HRTIMER_RESTART,	/* Timer must be restarted */
+};
+
+/**
+ * struct hrtimer - the basic hrtimer structure
+ * @node:	timerqueue node, which also manages node.expires,
+ *		the absolute expiry time in the hrtimers internal
+ *		representation. The time is related to the clock on
+ *		which the timer is based. Is setup by adding
+ *		slack to the _softexpires value. For non range timers
+ *		identical to _softexpires.
+ * @_softexpires: the absolute earliest expiry time of the hrtimer.
+ *		The time which was given as expiry time when the timer
+ *		was armed.
+ * @function:	timer expiry callback function
+ * @base:	pointer to the timer base (per cpu and per clock)
+ * @state:	state information (See bit values above)
+ * @is_rel:	Set if the timer was armed relative
+ * @is_soft:	Set if hrtimer will be expired in soft interrupt context.
+ * @is_hard:	Set if hrtimer will be expired in hard interrupt context
+ *		even on RT.
+ *
+ * The hrtimer structure must be initialized by hrtimer_init()
+ */
+struct hrtimer {
+	struct timerqueue_node		node;
+	ktime_t				_softexpires;
+	enum hrtimer_restart		(*function)(struct hrtimer *);
+	struct hrtimer_clock_base	*base;
+	u8				state;
+	u8				is_rel;
+	u8				is_soft;
+	u8				is_hard;
+};
+
+#endif /* _LINUX_HRTIMER_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7501a3451a20..3762809652da 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -17,7 +17,7 @@
 #include <linux/kmsan_types.h>
 #include <linux/mutex.h>
 #include <linux/plist.h>
-#include <linux/hrtimer.h>
+#include <linux/hrtimer_types.h>
 #include <linux/irqflags.h>
 #include <linux/seccomp.h>
 #include <linux/nodemask_types.h>
-- 
2.43.0

[PATCH 19/50] locking/mutex: split out mutex_types.h

Posted by Kent Overstreet 2 years, 1 month ago

Trimming down sched.h dependencies: we don't want to include more than
the base types.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/mutex.h       | 52 +--------------------------
 include/linux/mutex_types.h | 71 +++++++++++++++++++++++++++++++++++++
 include/linux/sched.h       |  2 +-
 3 files changed, 73 insertions(+), 52 deletions(-)
 create mode 100644 include/linux/mutex_types.h

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index a33aa9eb9fc3..0dfba5df6524 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -20,6 +20,7 @@
 #include <linux/osq_lock.h>
 #include <linux/debug_locks.h>
 #include <linux/cleanup.h>
+#include <linux/mutex_types.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define __DEP_MAP_MUTEX_INITIALIZER(lockname)			\
@@ -33,49 +34,6 @@
 
 #ifndef CONFIG_PREEMPT_RT
 
-/*
- * Simple, straightforward mutexes with strict semantics:
- *
- * - only one task can hold the mutex at a time
- * - only the owner can unlock the mutex
- * - multiple unlocks are not permitted
- * - recursive locking is not permitted
- * - a mutex object must be initialized via the API
- * - a mutex object must not be initialized via memset or copying
- * - task may not exit with mutex held
- * - memory areas where held locks reside must not be freed
- * - held mutexes must not be reinitialized
- * - mutexes may not be used in hardware or software interrupt
- *   contexts such as tasklets and timers
- *
- * These semantics are fully enforced when DEBUG_MUTEXES is
- * enabled. Furthermore, besides enforcing the above rules, the mutex
- * debugging code also implements a number of additional features
- * that make lock debugging easier and faster:
- *
- * - uses symbolic names of mutexes, whenever they are printed in debug output
- * - point-of-acquire tracking, symbolic lookup of function names
- * - list of all locks held in the system, printout of them
- * - owner tracking
- * - detects self-recursing locks and prints out all relevant info
- * - detects multi-task circular deadlocks and prints out all affected
- *   locks and tasks (and only those tasks)
- */
-struct mutex {
-	atomic_long_t		owner;
-	raw_spinlock_t		wait_lock;
-#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	struct optimistic_spin_queue osq; /* Spinner MCS lock */
-#endif
-	struct list_head	wait_list;
-#ifdef CONFIG_DEBUG_MUTEXES
-	void			*magic;
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map	dep_map;
-#endif
-};
-
 #ifdef CONFIG_DEBUG_MUTEXES
 
 #define __DEBUG_MUTEX_INITIALIZER(lockname)				\
@@ -131,14 +89,6 @@ extern bool mutex_is_locked(struct mutex *lock);
 /*
  * Preempt-RT variant based on rtmutexes.
  */
-#include <linux/rtmutex.h>
-
-struct mutex {
-	struct rt_mutex_base	rtmutex;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map	dep_map;
-#endif
-};
 
 #define __MUTEX_INITIALIZER(mutexname)					\
 {									\
diff --git a/include/linux/mutex_types.h b/include/linux/mutex_types.h
new file mode 100644
index 000000000000..fdf7f515fde8
--- /dev/null
+++ b/include/linux/mutex_types.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_MUTEX_TYPES_H
+#define __LINUX_MUTEX_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/lockdep_types.h>
+#include <linux/osq_lock.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+
+#ifndef CONFIG_PREEMPT_RT
+
+/*
+ * Simple, straightforward mutexes with strict semantics:
+ *
+ * - only one task can hold the mutex at a time
+ * - only the owner can unlock the mutex
+ * - multiple unlocks are not permitted
+ * - recursive locking is not permitted
+ * - a mutex object must be initialized via the API
+ * - a mutex object must not be initialized via memset or copying
+ * - task may not exit with mutex held
+ * - memory areas where held locks reside must not be freed
+ * - held mutexes must not be reinitialized
+ * - mutexes may not be used in hardware or software interrupt
+ *   contexts such as tasklets and timers
+ *
+ * These semantics are fully enforced when DEBUG_MUTEXES is
+ * enabled. Furthermore, besides enforcing the above rules, the mutex
+ * debugging code also implements a number of additional features
+ * that make lock debugging easier and faster:
+ *
+ * - uses symbolic names of mutexes, whenever they are printed in debug output
+ * - point-of-acquire tracking, symbolic lookup of function names
+ * - list of all locks held in the system, printout of them
+ * - owner tracking
+ * - detects self-recursing locks and prints out all relevant info
+ * - detects multi-task circular deadlocks and prints out all affected
+ *   locks and tasks (and only those tasks)
+ */
+struct mutex {
+	atomic_long_t		owner;
+	raw_spinlock_t		wait_lock;
+#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
+	struct optimistic_spin_queue osq; /* Spinner MCS lock */
+#endif
+	struct list_head	wait_list;
+#ifdef CONFIG_DEBUG_MUTEXES
+	void			*magic;
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+#endif
+};
+
+#else /* !CONFIG_PREEMPT_RT */
+/*
+ * Preempt-RT variant based on rtmutexes.
+ */
+#include <linux/rtmutex.h>
+
+struct mutex {
+	struct rt_mutex_base	rtmutex;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+#endif
+};
+
+#endif /* CONFIG_PREEMPT_RT */
+
+#endif /* __LINUX_MUTEX_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3762809652da..e8892789969b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -15,7 +15,7 @@
 #include <linux/sem.h>
 #include <linux/shm.h>
 #include <linux/kmsan_types.h>
-#include <linux/mutex.h>
+#include <linux/mutex_types.h>
 #include <linux/plist.h>
 #include <linux/hrtimer_types.h>
 #include <linux/irqflags.h>
-- 
2.43.0

[PATCH 20/50] posix-cpu-timers: Split out posix-timers_types.h

Posted by Kent Overstreet 2 years, 1 month ago

Trimming down sched.h dependencies: we don't want to include more than
the base types.

Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/posix-timers.h       | 68 ++--------------------------
 include/linux/posix-timers_types.h | 72 ++++++++++++++++++++++++++++++
 include/linux/sched.h              |  2 +-
 3 files changed, 76 insertions(+), 66 deletions(-)
 create mode 100644 include/linux/posix-timers_types.h

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index d607f51404fc..750b0647258d 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -2,40 +2,16 @@
 #ifndef _linux_POSIX_TIMERS_H
 #define _linux_POSIX_TIMERS_H
 
-#include <linux/spinlock.h>
+#include <linux/alarmtimer.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
-#include <linux/alarmtimer.h>
+#include <linux/posix-timers_types.h>
+#include <linux/spinlock.h>
 #include <linux/timerqueue.h>
 
 struct kernel_siginfo;
 struct task_struct;
 
-/*
- * Bit fields within a clockid:
- *
- * The most significant 29 bits hold either a pid or a file descriptor.
- *
- * Bit 2 indicates whether a cpu clock refers to a thread or a process.
- *
- * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
- *
- * A clockid is invalid if bits 2, 1, and 0 are all set.
- */
-#define CPUCLOCK_PID(clock)		((pid_t) ~((clock) >> 3))
-#define CPUCLOCK_PERTHREAD(clock) \
-	(((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
-
-#define CPUCLOCK_PERTHREAD_MASK	4
-#define CPUCLOCK_WHICH(clock)	((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
-#define CPUCLOCK_CLOCK_MASK	3
-#define CPUCLOCK_PROF		0
-#define CPUCLOCK_VIRT		1
-#define CPUCLOCK_SCHED		2
-#define CPUCLOCK_MAX		3
-#define CLOCKFD			CPUCLOCK_MAX
-#define CLOCKFD_MASK		(CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
-
 static inline clockid_t make_process_cpuclock(const unsigned int pid,
 		const clockid_t clock)
 {
@@ -109,44 +85,6 @@ static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp)
 	ctmr->node.expires = exp;
 }
 
-/**
- * posix_cputimer_base - Container per posix CPU clock
- * @nextevt:		Earliest-expiration cache
- * @tqhead:		timerqueue head for cpu_timers
- */
-struct posix_cputimer_base {
-	u64			nextevt;
-	struct timerqueue_head	tqhead;
-};
-
-/**
- * posix_cputimers - Container for posix CPU timer related data
- * @bases:		Base container for posix CPU clocks
- * @timers_active:	Timers are queued.
- * @expiry_active:	Timer expiry is active. Used for
- *			process wide timers to avoid multiple
- *			task trying to handle expiry concurrently
- *
- * Used in task_struct and signal_struct
- */
-struct posix_cputimers {
-	struct posix_cputimer_base	bases[CPUCLOCK_MAX];
-	unsigned int			timers_active;
-	unsigned int			expiry_active;
-};
-
-/**
- * posix_cputimers_work - Container for task work based posix CPU timer expiry
- * @work:	The task work to be scheduled
- * @mutex:	Mutex held around expiry in context of this task work
- * @scheduled:  @work has been scheduled already, no further processing
- */
-struct posix_cputimers_work {
-	struct callback_head	work;
-	struct mutex		mutex;
-	unsigned int		scheduled;
-};
-
 static inline void posix_cputimers_init(struct posix_cputimers *pct)
 {
 	memset(pct, 0, sizeof(*pct));
diff --git a/include/linux/posix-timers_types.h b/include/linux/posix-timers_types.h
new file mode 100644
index 000000000000..57fec639a9bb
--- /dev/null
+++ b/include/linux/posix-timers_types.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _linux_POSIX_TIMERS_TYPES_H
+#define _linux_POSIX_TIMERS_TYPES_H
+
+#include <linux/mutex_types.h>
+#include <linux/timerqueue.h>
+#include <linux/types.h>
+
+/*
+ * Bit fields within a clockid:
+ *
+ * The most significant 29 bits hold either a pid or a file descriptor.
+ *
+ * Bit 2 indicates whether a cpu clock refers to a thread or a process.
+ *
+ * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
+ *
+ * A clockid is invalid if bits 2, 1, and 0 are all set.
+ */
+#define CPUCLOCK_PID(clock)		((pid_t) ~((clock) >> 3))
+#define CPUCLOCK_PERTHREAD(clock) \
+	(((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
+
+#define CPUCLOCK_PERTHREAD_MASK	4
+#define CPUCLOCK_WHICH(clock)	((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
+#define CPUCLOCK_CLOCK_MASK	3
+#define CPUCLOCK_PROF		0
+#define CPUCLOCK_VIRT		1
+#define CPUCLOCK_SCHED		2
+#define CPUCLOCK_MAX		3
+#define CLOCKFD			CPUCLOCK_MAX
+#define CLOCKFD_MASK		(CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
+
+/**
+ * posix_cputimer_base - Container per posix CPU clock
+ * @nextevt:		Earliest-expiration cache
+ * @tqhead:		timerqueue head for cpu_timers
+ */
+struct posix_cputimer_base {
+	u64			nextevt;
+	struct timerqueue_head	tqhead;
+};
+
+/**
+ * posix_cputimers - Container for posix CPU timer related data
+ * @bases:		Base container for posix CPU clocks
+ * @timers_active:	Timers are queued.
+ * @expiry_active:	Timer expiry is active. Used for
+ *			process wide timers to avoid multiple
+ *			task trying to handle expiry concurrently
+ *
+ * Used in task_struct and signal_struct
+ */
+struct posix_cputimers {
+	struct posix_cputimer_base	bases[CPUCLOCK_MAX];
+	unsigned int			timers_active;
+	unsigned int			expiry_active;
+};
+
+/**
+ * posix_cputimers_work - Container for task work based posix CPU timer expiry
+ * @work:	The task work to be scheduled
+ * @mutex:	Mutex held around expiry in context of this task work
+ * @scheduled:  @work has been scheduled already, no further processing
+ */
+struct posix_cputimers_work {
+	struct callback_head	work;
+	struct mutex		mutex;
+	unsigned int		scheduled;
+};
+
+#endif /* _linux_POSIX_TIMERS_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e8892789969b..6d803d0904d9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -31,7 +31,7 @@
 #include <linux/syscall_user_dispatch.h>
 #include <linux/mm_types_task.h>
 #include <linux/task_io_accounting.h>
-#include <linux/posix-timers.h>
+#include <linux/posix-timers_types.h>
 #include <linux/rseq.h>
 #include <linux/seqlock.h>
 #include <linux/kcsan.h>
-- 
2.43.0

[PATCH 21/50] locking/seqlock: Split out seqlock_types.h

Posted by Kent Overstreet 2 years, 1 month ago

Trimming down sched.h dependencies: we don't want to include more than
the base types.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/linux/sched.h         |  2 +-
 include/linux/seqlock.h       | 79 +----------------------------
 include/linux/seqlock_types.h | 93 +++++++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+), 78 deletions(-)
 create mode 100644 include/linux/seqlock_types.h

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d803d0904d9..436f7ce1450a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -33,7 +33,7 @@
 #include <linux/task_io_accounting.h>
 #include <linux/posix-timers_types.h>
 #include <linux/rseq.h>
-#include <linux/seqlock.h>
+#include <linux/seqlock_types.h>
 #include <linux/kcsan.h>
 #include <linux/rv.h>
 #include <linux/livepatch_sched.h>
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index e92f9d5577ba..d90d8ee29d81 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -18,6 +18,7 @@
 #include <linux/lockdep.h>
 #include <linux/mutex.h>
 #include <linux/preempt.h>
+#include <linux/seqlock_types.h>
 #include <linux/spinlock.h>
 
 #include <asm/processor.h>
@@ -37,37 +38,6 @@
  */
 #define KCSAN_SEQLOCK_REGION_MAX 1000
 
-/*
- * Sequence counters (seqcount_t)
- *
- * This is the raw counting mechanism, without any writer protection.
- *
- * Write side critical sections must be serialized and non-preemptible.
- *
- * If readers can be invoked from hardirq or softirq contexts,
- * interrupts or bottom halves must also be respectively disabled before
- * entering the write section.
- *
- * This mechanism can't be used if the protected data contains pointers,
- * as the writer can invalidate a pointer that a reader is following.
- *
- * If the write serialization mechanism is one of the common kernel
- * locking primitives, use a sequence counter with associated lock
- * (seqcount_LOCKNAME_t) instead.
- *
- * If it's desired to automatically handle the sequence counter writer
- * serialization and non-preemptibility requirements, use a sequential
- * lock (seqlock_t) instead.
- *
- * See Documentation/locking/seqlock.rst
- */
-typedef struct seqcount {
-	unsigned sequence;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map dep_map;
-#endif
-} seqcount_t;
-
 static inline void __seqcount_init(seqcount_t *s, const char *name,
 					  struct lock_class_key *key)
 {
@@ -131,28 +101,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  * See Documentation/locking/seqlock.rst
  */
 
-/*
- * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
- * disable preemption. It can lead to higher latencies, and the write side
- * sections will not be able to acquire locks which become sleeping locks
- * (e.g. spinlock_t).
- *
- * To remain preemptible while avoiding a possible livelock caused by the
- * reader preempting the writer, use a different technique: let the reader
- * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
- * case, acquire then release the associated LOCKNAME writer serialization
- * lock. This will allow any possibly-preempted writer to make progress
- * until the end of its writer serialization lock critical section.
- *
- * This lock-unlock technique must be implemented for all of PREEMPT_RT
- * sleeping locks.  See Documentation/locking/locktypes.rst
- */
-#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
-#define __SEQ_LOCK(expr)	expr
-#else
-#define __SEQ_LOCK(expr)
-#endif
-
 /*
  * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated
  * @seqcount:	The real sequence counter
@@ -194,11 +142,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
  * @lockbase:		prefix for associated lock/unlock
  */
 #define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase)	\
-typedef struct seqcount_##lockname {					\
-	seqcount_t		seqcount;				\
-	__SEQ_LOCK(locktype	*lock);					\
-} seqcount_##lockname##_t;						\
-									\
 static __always_inline seqcount_t *					\
 __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s)			\
 {									\
@@ -284,6 +227,7 @@ SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t,  false,    raw_spin)
 SEQCOUNT_LOCKNAME(spinlock,     spinlock_t,      __SEQ_RT, spin)
 SEQCOUNT_LOCKNAME(rwlock,       rwlock_t,        __SEQ_RT, read)
 SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
+#undef SEQCOUNT_LOCKNAME
 
 /*
  * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
@@ -794,25 +738,6 @@ static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
 	smp_wmb();      /* increment "sequence" before following stores */
 }
 
-/*
- * Sequential locks (seqlock_t)
- *
- * Sequence counters with an embedded spinlock for writer serialization
- * and non-preemptibility.
- *
- * For more info, see:
- *    - Comments on top of seqcount_t
- *    - Documentation/locking/seqlock.rst
- */
-typedef struct {
-	/*
-	 * Make sure that readers don't starve writers on PREEMPT_RT: use
-	 * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
-	 */
-	seqcount_spinlock_t seqcount;
-	spinlock_t lock;
-} seqlock_t;
-
 #define __SEQLOCK_UNLOCKED(lockname)					\
 	{								\
 		.seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \
diff --git a/include/linux/seqlock_types.h b/include/linux/seqlock_types.h
new file mode 100644
index 000000000000..dfdf43e3fa3d
--- /dev/null
+++ b/include/linux/seqlock_types.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_SEQLOCK_TYPES_H
+#define __LINUX_SEQLOCK_TYPES_H
+
+#include <linux/lockdep_types.h>
+#include <linux/mutex_types.h>
+#include <linux/spinlock_types.h>
+
+/*
+ * Sequence counters (seqcount_t)
+ *
+ * This is the raw counting mechanism, without any writer protection.
+ *
+ * Write side critical sections must be serialized and non-preemptible.
+ *
+ * If readers can be invoked from hardirq or softirq contexts,
+ * interrupts or bottom halves must also be respectively disabled before
+ * entering the write section.
+ *
+ * This mechanism can't be used if the protected data contains pointers,
+ * as the writer can invalidate a pointer that a reader is following.
+ *
+ * If the write serialization mechanism is one of the common kernel
+ * locking primitives, use a sequence counter with associated lock
+ * (seqcount_LOCKNAME_t) instead.
+ *
+ * If it's desired to automatically handle the sequence counter writer
+ * serialization and non-preemptibility requirements, use a sequential
+ * lock (seqlock_t) instead.
+ *
+ * See Documentation/locking/seqlock.rst
+ */
+typedef struct seqcount {
+	unsigned sequence;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
+} seqcount_t;
+
+/*
+ * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
+ * disable preemption. It can lead to higher latencies, and the write side
+ * sections will not be able to acquire locks which become sleeping locks
+ * (e.g. spinlock_t).
+ *
+ * To remain preemptible while avoiding a possible livelock caused by the
+ * reader preempting the writer, use a different technique: let the reader
+ * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
+ * case, acquire then release the associated LOCKNAME writer serialization
+ * lock. This will allow any possibly-preempted writer to make progress
+ * until the end of its writer serialization lock critical section.
+ *
+ * This lock-unlock technique must be implemented for all of PREEMPT_RT
+ * sleeping locks.  See Documentation/locking/locktypes.rst
+ */
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
+#define __SEQ_LOCK(expr)	expr
+#else
+#define __SEQ_LOCK(expr)
+#endif
+
+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase)	\
+typedef struct seqcount_##lockname {					\
+	seqcount_t		seqcount;				\
+	__SEQ_LOCK(locktype	*lock);					\
+} seqcount_##lockname##_t;
+
+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t,  false,    raw_spin)
+SEQCOUNT_LOCKNAME(spinlock,     spinlock_t,      __SEQ_RT, spin)
+SEQCOUNT_LOCKNAME(rwlock,       rwlock_t,        __SEQ_RT, read)
+SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
+#undef SEQCOUNT_LOCKNAME
+
+/*
+ * Sequential locks (seqlock_t)
+ *
+ * Sequence counters with an embedded spinlock for writer serialization
+ * and non-preemptibility.
+ *
+ * For more info, see:
+ *    - Comments on top of seqcount_t
+ *    - Documentation/locking/seqlock.rst
+ */
+typedef struct {
+	/*
+	 * Make sure that readers don't starve writers on PREEMPT_RT: use
+	 * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
+	 */
+	seqcount_spinlock_t seqcount;
+	spinlock_t lock;
+} seqlock_t;
+
+#endif /* __LINUX_SEQLOCK_TYPES_H */
-- 
2.43.0

Re: [PATCH 21/50] locking/seqlock: Split out seqlock_types.h

Posted by Waiman Long 2 years, 1 month ago

On 12/15/23 22:26, Kent Overstreet wrote:
> Trimming down sched.h dependencies: we don't want to include more than
> the base types.
>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Boqun Feng <boqun.feng@gmail.com>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
>   include/linux/sched.h         |  2 +-
>   include/linux/seqlock.h       | 79 +----------------------------
>   include/linux/seqlock_types.h | 93 +++++++++++++++++++++++++++++++++++
>   3 files changed, 96 insertions(+), 78 deletions(-)
>   create mode 100644 include/linux/seqlock_types.h
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 6d803d0904d9..436f7ce1450a 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -33,7 +33,7 @@
>   #include <linux/task_io_accounting.h>
>   #include <linux/posix-timers_types.h>
>   #include <linux/rseq.h>
> -#include <linux/seqlock.h>
> +#include <linux/seqlock_types.h>
>   #include <linux/kcsan.h>
>   #include <linux/rv.h>
>   #include <linux/livepatch_sched.h>
> diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
> index e92f9d5577ba..d90d8ee29d81 100644
> --- a/include/linux/seqlock.h
> +++ b/include/linux/seqlock.h
> @@ -18,6 +18,7 @@
>   #include <linux/lockdep.h>
>   #include <linux/mutex.h>
>   #include <linux/preempt.h>
> +#include <linux/seqlock_types.h>
>   #include <linux/spinlock.h>
>   
>   #include <asm/processor.h>
> @@ -37,37 +38,6 @@
>    */
>   #define KCSAN_SEQLOCK_REGION_MAX 1000
>   
> -/*
> - * Sequence counters (seqcount_t)
> - *
> - * This is the raw counting mechanism, without any writer protection.
> - *
> - * Write side critical sections must be serialized and non-preemptible.
> - *
> - * If readers can be invoked from hardirq or softirq contexts,
> - * interrupts or bottom halves must also be respectively disabled before
> - * entering the write section.
> - *
> - * This mechanism can't be used if the protected data contains pointers,
> - * as the writer can invalidate a pointer that a reader is following.
> - *
> - * If the write serialization mechanism is one of the common kernel
> - * locking primitives, use a sequence counter with associated lock
> - * (seqcount_LOCKNAME_t) instead.
> - *
> - * If it's desired to automatically handle the sequence counter writer
> - * serialization and non-preemptibility requirements, use a sequential
> - * lock (seqlock_t) instead.
> - *
> - * See Documentation/locking/seqlock.rst
> - */
> -typedef struct seqcount {
> -	unsigned sequence;
> -#ifdef CONFIG_DEBUG_LOCK_ALLOC
> -	struct lockdep_map dep_map;
> -#endif
> -} seqcount_t;
> -
>   static inline void __seqcount_init(seqcount_t *s, const char *name,
>   					  struct lock_class_key *key)
>   {
> @@ -131,28 +101,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
>    * See Documentation/locking/seqlock.rst
>    */
>   
> -/*
> - * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
> - * disable preemption. It can lead to higher latencies, and the write side
> - * sections will not be able to acquire locks which become sleeping locks
> - * (e.g. spinlock_t).
> - *
> - * To remain preemptible while avoiding a possible livelock caused by the
> - * reader preempting the writer, use a different technique: let the reader
> - * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
> - * case, acquire then release the associated LOCKNAME writer serialization
> - * lock. This will allow any possibly-preempted writer to make progress
> - * until the end of its writer serialization lock critical section.
> - *
> - * This lock-unlock technique must be implemented for all of PREEMPT_RT
> - * sleeping locks.  See Documentation/locking/locktypes.rst
> - */
> -#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
> -#define __SEQ_LOCK(expr)	expr
> -#else
> -#define __SEQ_LOCK(expr)
> -#endif
> -
>   /*
>    * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated
>    * @seqcount:	The real sequence counter
> @@ -194,11 +142,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
>    * @lockbase:		prefix for associated lock/unlock
>    */
>   #define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase)	\
> -typedef struct seqcount_##lockname {					\
> -	seqcount_t		seqcount;				\
> -	__SEQ_LOCK(locktype	*lock);					\
> -} seqcount_##lockname##_t;						\
> -									\
>   static __always_inline seqcount_t *					\
>   __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s)			\
>   {									\
> @@ -284,6 +227,7 @@ SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t,  false,    raw_spin)
>   SEQCOUNT_LOCKNAME(spinlock,     spinlock_t,      __SEQ_RT, spin)
>   SEQCOUNT_LOCKNAME(rwlock,       rwlock_t,        __SEQ_RT, read)
>   SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
> +#undef SEQCOUNT_LOCKNAME
>   
>   /*
>    * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
> @@ -794,25 +738,6 @@ static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
>   	smp_wmb();      /* increment "sequence" before following stores */
>   }
>   
> -/*
> - * Sequential locks (seqlock_t)
> - *
> - * Sequence counters with an embedded spinlock for writer serialization
> - * and non-preemptibility.
> - *
> - * For more info, see:
> - *    - Comments on top of seqcount_t
> - *    - Documentation/locking/seqlock.rst
> - */
> -typedef struct {
> -	/*
> -	 * Make sure that readers don't starve writers on PREEMPT_RT: use
> -	 * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
> -	 */
> -	seqcount_spinlock_t seqcount;
> -	spinlock_t lock;
> -} seqlock_t;
> -
>   #define __SEQLOCK_UNLOCKED(lockname)					\
>   	{								\
>   		.seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \
> diff --git a/include/linux/seqlock_types.h b/include/linux/seqlock_types.h
> new file mode 100644
> index 000000000000..dfdf43e3fa3d
> --- /dev/null
> +++ b/include/linux/seqlock_types.h
> @@ -0,0 +1,93 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __LINUX_SEQLOCK_TYPES_H
> +#define __LINUX_SEQLOCK_TYPES_H
> +
> +#include <linux/lockdep_types.h>
> +#include <linux/mutex_types.h>
> +#include <linux/spinlock_types.h>
> +
> +/*
> + * Sequence counters (seqcount_t)
> + *
> + * This is the raw counting mechanism, without any writer protection.
> + *
> + * Write side critical sections must be serialized and non-preemptible.
> + *
> + * If readers can be invoked from hardirq or softirq contexts,
> + * interrupts or bottom halves must also be respectively disabled before
> + * entering the write section.
> + *
> + * This mechanism can't be used if the protected data contains pointers,
> + * as the writer can invalidate a pointer that a reader is following.
> + *
> + * If the write serialization mechanism is one of the common kernel
> + * locking primitives, use a sequence counter with associated lock
> + * (seqcount_LOCKNAME_t) instead.
> + *
> + * If it's desired to automatically handle the sequence counter writer
> + * serialization and non-preemptibility requirements, use a sequential
> + * lock (seqlock_t) instead.
> + *
> + * See Documentation/locking/seqlock.rst
> + */
> +typedef struct seqcount {
> +	unsigned sequence;
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +	struct lockdep_map dep_map;
> +#endif
> +} seqcount_t;
> +
> +/*
> + * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
> + * disable preemption. It can lead to higher latencies, and the write side
> + * sections will not be able to acquire locks which become sleeping locks
> + * (e.g. spinlock_t).
> + *
> + * To remain preemptible while avoiding a possible livelock caused by the
> + * reader preempting the writer, use a different technique: let the reader
> + * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
> + * case, acquire then release the associated LOCKNAME writer serialization
> + * lock. This will allow any possibly-preempted writer to make progress
> + * until the end of its writer serialization lock critical section.
> + *
> + * This lock-unlock technique must be implemented for all of PREEMPT_RT
> + * sleeping locks.  See Documentation/locking/locktypes.rst
> + */
> +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
> +#define __SEQ_LOCK(expr)	expr
> +#else
> +#define __SEQ_LOCK(expr)
> +#endif
> +
> +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase)	\
> +typedef struct seqcount_##lockname {					\
> +	seqcount_t		seqcount;				\
> +	__SEQ_LOCK(locktype	*lock);					\
> +} seqcount_##lockname##_t;
> +
> +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t,  false,    raw_spin)
> +SEQCOUNT_LOCKNAME(spinlock,     spinlock_t,      __SEQ_RT, spin)
> +SEQCOUNT_LOCKNAME(rwlock,       rwlock_t,        __SEQ_RT, read)
> +SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
> +#undef SEQCOUNT_LOCKNAME
> +
> +/*
> + * Sequential locks (seqlock_t)
> + *
> + * Sequence counters with an embedded spinlock for writer serialization
> + * and non-preemptibility.
> + *
> + * For more info, see:
> + *    - Comments on top of seqcount_t
> + *    - Documentation/locking/seqlock.rst
> + */
> +typedef struct {
> +	/*
> +	 * Make sure that readers don't starve writers on PREEMPT_RT: use
> +	 * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
> +	 */
> +	seqcount_spinlock_t seqcount;
> +	spinlock_t lock;
> +} seqlock_t;
> +
> +#endif /* __LINUX_SEQLOCK_TYPES_H */

seqlock.h is directly included in kernel/sched/sched.h, so breaking out 
seqlock_types.h and including only that in include/linux/sched.h should 
be OK.

Acked-by: Waiman Long <longman@redhat.com>