sched.h, which defines task_struct, needs nodemask_t - but sched.h is a
frequently used header and ideally shouldn't be pulling in any more code
that it needs to.
This splits out nodemask_types.h which has the definition sched.h needs,
which will avoid a circular header dependency in the alloc tagging patch
series, and as a bonus should speed up kernel build times.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
include/linux/nodemask.h | 2 +-
include/linux/nodemask_types.h | 10 ++++++++++
include/linux/sched.h | 2 +-
3 files changed, 12 insertions(+), 2 deletions(-)
create mode 100644 include/linux/nodemask_types.h
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 8d07116caaf1..b61438313a73 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -93,10 +93,10 @@
#include <linux/threads.h>
#include <linux/bitmap.h>
#include <linux/minmax.h>
+#include <linux/nodemask_types.h>
#include <linux/numa.h>
#include <linux/random.h>
-typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
extern nodemask_t _unused_nodemask_arg_;
/**
diff --git a/include/linux/nodemask_types.h b/include/linux/nodemask_types.h
new file mode 100644
index 000000000000..6b28d97ea6ed
--- /dev/null
+++ b/include/linux/nodemask_types.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_NODEMASK_TYPES_H
+#define __LINUX_NODEMASK_TYPES_H
+
+#include <linux/bitops.h>
+#include <linux/numa.h>
+
+typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
+
+#endif /* __LINUX_NODEMASK_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 292c31697248..5a5b7b122682 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -20,7 +20,7 @@
#include <linux/hrtimer.h>
#include <linux/irqflags.h>
#include <linux/seccomp.h>
-#include <linux/nodemask.h>
+#include <linux/nodemask_types.h>
#include <linux/rcupdate.h>
#include <linux/refcount.h>
#include <linux/resource.h>
--
2.43.0
prandom.h doesn't use percpu.h - this fixes some circular header issues.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
include/linux/prandom.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index f2ed5b72b3d6..f7f1e5251c67 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -10,7 +10,6 @@
#include <linux/types.h>
#include <linux/once.h>
-#include <linux/percpu.h>
#include <linux/random.h>
struct rnd_state {
--
2.43.0
On 12/15/23 19:26, Kent Overstreet wrote:
> prandom.h doesn't use percpu.h - this fixes some circular header issues.
>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> ---
> include/linux/prandom.h | 1 -
> 1 file changed, 1 deletion(-)
>
> diff --git a/include/linux/prandom.h b/include/linux/prandom.h
> index f2ed5b72b3d6..f7f1e5251c67 100644
> --- a/include/linux/prandom.h
> +++ b/include/linux/prandom.h
> @@ -10,7 +10,6 @@
>
> #include <linux/types.h>
> #include <linux/once.h>
> -#include <linux/percpu.h>
> #include <linux/random.h>
>
> struct rnd_state {
In this header file:
22 void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
so where does it get __percpu from?
--
#Randy
https://people.kernel.org/tglx/notes-about-netiquette
https://subspace.kernel.org/etiquette.html
On Sat, Dec 16, 2023 at 10:52:04AM -0800, Randy Dunlap wrote:
>
>
> On 12/15/23 19:26, Kent Overstreet wrote:
> > prandom.h doesn't use percpu.h - this fixes some circular header issues.
> >
> > Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> > Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> > ---
> > include/linux/prandom.h | 1 -
> > 1 file changed, 1 deletion(-)
> >
> > diff --git a/include/linux/prandom.h b/include/linux/prandom.h
> > index f2ed5b72b3d6..f7f1e5251c67 100644
> > --- a/include/linux/prandom.h
> > +++ b/include/linux/prandom.h
> > @@ -10,7 +10,6 @@
> >
> > #include <linux/types.h>
> > #include <linux/once.h>
> > -#include <linux/percpu.h>
> > #include <linux/random.h>
> >
> > struct rnd_state {
>
> In this header file:
>
> 22 void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
>
> so where does it get __percpu from?
That comes from compiler.h -> compiler_types.h... cscope :)
Slimming down recursive header includes.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/hrtimer.h | 2 +-
include/linux/time_namespace.h | 2 ++
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index f2044d5a652b..02d264ca9dce 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -16,7 +16,7 @@
#include <linux/rbtree.h>
#include <linux/init.h>
#include <linux/list.h>
-#include <linux/percpu.h>
+#include <linux/percpu-defs.h>
#include <linux/seqlock.h>
#include <linux/timer.h>
#include <linux/timerqueue.h>
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 5258d81cef17..876e31b4461d 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -12,6 +12,8 @@
struct user_namespace;
extern struct user_namespace init_user_ns;
+struct vm_area_struct;
+
struct timens_offsets {
struct timespec64 monotonic;
struct timespec64 boottime;
--
2.43.0
Replace linux/percpu.h include with asm/percpu.h to avoid circular
dependency.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
arch/arm64/include/asm/spectre.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index 06c357d83b13..0c4d9045c31f 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -13,8 +13,8 @@
#define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K)
#ifndef __ASSEMBLY__
-
-#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <asm/percpu.h>
#include <asm/cpufeature.h>
#include <asm/virt.h>
--
2.43.0
Moving these stub functions to a .c file means we can kill a sched.h
dependency on printk.h.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
include/linux/numa.h | 18 +++++-------------
kernel/Makefile | 1 +
kernel/numa.c | 24 ++++++++++++++++++++++++
3 files changed, 30 insertions(+), 13 deletions(-)
create mode 100644 kernel/numa.c
diff --git a/include/linux/numa.h b/include/linux/numa.h
index a904861de800..aeab3d9f57ae 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -22,34 +22,26 @@
#endif
#ifdef CONFIG_NUMA
-#include <linux/printk.h>
#include <asm/sparsemem.h>
/* Generic implementation available */
int numa_nearest_node(int node, unsigned int state);
#ifndef memory_add_physaddr_to_nid
-static inline int memory_add_physaddr_to_nid(u64 start)
-{
- pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
- start);
- return 0;
-}
+int memory_add_physaddr_to_nid(u64 start);
#endif
+
#ifndef phys_to_target_node
-static inline int phys_to_target_node(u64 start)
-{
- pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
- start);
- return 0;
-}
+int phys_to_target_node(u64 start);
#endif
+
#ifndef numa_fill_memblks
static inline int __init numa_fill_memblks(u64 start, u64 end)
{
return NUMA_NO_MEMBLK;
}
#endif
+
#else /* !CONFIG_NUMA */
static inline int numa_nearest_node(int node, unsigned int state)
{
diff --git a/kernel/Makefile b/kernel/Makefile
index 3947122d618b..ce105a5558fc 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -114,6 +114,7 @@ obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o
obj-$(CONFIG_CFI_CLANG) += cfi.o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_PERF_EVENTS) += events/
diff --git a/kernel/numa.c b/kernel/numa.c
new file mode 100644
index 000000000000..c24c72f45989
--- /dev/null
+++ b/kernel/numa.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/printk.h>
+#include <linux/numa.h>
+
+/* Stub functions: */
+
+#ifndef memory_add_physaddr_to_nid
+int memory_add_physaddr_to_nid(u64 start)
+{
+ pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
+ start);
+ return 0;
+}
+#endif
+
+#ifndef phys_to_target_node
+int phys_to_target_node(u64 start)
+{
+ pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
+ start);
+ return 0;
+}
+#endif
--
2.43.0
On Fri, Dec 15, 2023 at 10:26:14PM -0500, Kent Overstreet wrote: > diff --git a/kernel/numa.c b/kernel/numa.c > new file mode 100644 > index 000000000000..c24c72f45989 > --- /dev/null > +++ b/kernel/numa.c Should this be a new file or would these functions fit better in, eg, mempolicy.c which is already built only if CONFIG_NUMA?
On Tue, Dec 19, 2023 at 10:52:33PM +0000, Matthew Wilcox wrote: > On Fri, Dec 15, 2023 at 10:26:14PM -0500, Kent Overstreet wrote: > > diff --git a/kernel/numa.c b/kernel/numa.c > > new file mode 100644 > > index 000000000000..c24c72f45989 > > --- /dev/null > > +++ b/kernel/numa.c > > Should this be a new file or would these functions fit better in, eg, > mempolicy.c which is already built only if CONFIG_NUMA? that does look like a bit of a disorganized dumping ground though, I wonder if anyone would want to start mm/numa/ and do a bit of organizing?
On Fri, Dec 15, 2023 at 10:26:14PM -0500, Kent Overstreet wrote:
> Moving these stub functions to a .c file means we can kill a sched.h
> dependency on printk.h.
>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
> include/linux/numa.h | 18 +++++-------------
> kernel/Makefile | 1 +
> kernel/numa.c | 24 ++++++++++++++++++++++++
> 3 files changed, 30 insertions(+), 13 deletions(-)
> create mode 100644 kernel/numa.c
>
> diff --git a/include/linux/numa.h b/include/linux/numa.h
> index a904861de800..aeab3d9f57ae 100644
> --- a/include/linux/numa.h
> +++ b/include/linux/numa.h
> @@ -22,34 +22,26 @@
> #endif
>
> #ifdef CONFIG_NUMA
> -#include <linux/printk.h>
> #include <asm/sparsemem.h>
>
> /* Generic implementation available */
> int numa_nearest_node(int node, unsigned int state);
>
> #ifndef memory_add_physaddr_to_nid
> -static inline int memory_add_physaddr_to_nid(u64 start)
> -{
> - pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
> - start);
> - return 0;
> -}
> +int memory_add_physaddr_to_nid(u64 start);
> #endif
> +
> #ifndef phys_to_target_node
> -static inline int phys_to_target_node(u64 start)
> -{
> - pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
> - start);
> - return 0;
> -}
> +int phys_to_target_node(u64 start);
> #endif
> +
> #ifndef numa_fill_memblks
> static inline int __init numa_fill_memblks(u64 start, u64 end)
> {
> return NUMA_NO_MEMBLK;
> }
> #endif
> +
> #else /* !CONFIG_NUMA */
> static inline int numa_nearest_node(int node, unsigned int state)
> {
> diff --git a/kernel/Makefile b/kernel/Makefile
> index 3947122d618b..ce105a5558fc 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -114,6 +114,7 @@ obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
> obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
> obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o
> obj-$(CONFIG_CFI_CLANG) += cfi.o
> +obj-$(CONFIG_NUMA) += numa.o
>
> obj-$(CONFIG_PERF_EVENTS) += events/
>
> diff --git a/kernel/numa.c b/kernel/numa.c
> new file mode 100644
> index 000000000000..c24c72f45989
> --- /dev/null
> +++ b/kernel/numa.c
> @@ -0,0 +1,24 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#include <linux/printk.h>
> +#include <linux/numa.h>
> +
> +/* Stub functions: */
> +
> +#ifndef memory_add_physaddr_to_nid
> +int memory_add_physaddr_to_nid(u64 start)
> +{
> + pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
> + start);
> + return 0;
> +}
> +#endif
> +
> +#ifndef phys_to_target_node
> +int phys_to_target_node(u64 start)
> +{
> + pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
> + start);
> + return 0;
> +}
> +#endif
> --
> 2.43.0
>
These need EXPORT_SYMBOL_GPL() now like the architecture specific
implementations because they are no longer inlined. My arm64 builds fail
with:
ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/acpi/nfit/nfit.ko] undefined!
ERROR: modpost: "phys_to_target_node" [drivers/acpi/nfit/nfit.ko] undefined!
ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/virtio/virtio_mem.ko] undefined!
ERROR: modpost: "phys_to_target_node" [drivers/dax/dax_cxl.ko] undefined!
ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/dax/dax_cxl.ko] undefined!
ERROR: modpost: "phys_to_target_node" [drivers/cxl/cxl_acpi.ko] undefined!
ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/cxl/cxl_pmem.ko] undefined!
ERROR: modpost: "phys_to_target_node" [drivers/cxl/cxl_pmem.ko] undefined!
ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/hv/hv_balloon.ko] undefined!
Cheers,
Nathan
On Tue, Dec 19, 2023 at 09:36:44AM -0700, Nathan Chancellor wrote:
> On Fri, Dec 15, 2023 at 10:26:14PM -0500, Kent Overstreet wrote:
> These need EXPORT_SYMBOL_GPL() now like the architecture specific
> implementations because they are no longer inlined. My arm64 builds fail
> with:
>
> ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/acpi/nfit/nfit.ko] undefined!
> ERROR: modpost: "phys_to_target_node" [drivers/acpi/nfit/nfit.ko] undefined!
> ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/virtio/virtio_mem.ko] undefined!
> ERROR: modpost: "phys_to_target_node" [drivers/dax/dax_cxl.ko] undefined!
> ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/dax/dax_cxl.ko] undefined!
> ERROR: modpost: "phys_to_target_node" [drivers/cxl/cxl_acpi.ko] undefined!
> ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/cxl/cxl_pmem.ko] undefined!
> ERROR: modpost: "phys_to_target_node" [drivers/cxl/cxl_pmem.ko] undefined!
> ERROR: modpost: "memory_add_physaddr_to_nid" [drivers/hv/hv_balloon.ko] undefined!
Applied the following:
commit 7ae175e405b44b9897c04bbf177e3e08ab25710a
Author: Kent Overstreet <kent.overstreet@linux.dev>
Date: Tue Dec 19 16:02:26 2023 -0500
fixup! kernel/numa.c: Move logging out of numa.h
diff --git a/kernel/numa.c b/kernel/numa.c
index c24c72f45989..67ca6b8585c0 100644
--- a/kernel/numa.c
+++ b/kernel/numa.c
@@ -12,6 +12,7 @@ int memory_add_physaddr_to_nid(u64 start)
start);
return 0;
}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
#ifndef phys_to_target_node
@@ -21,4 +22,5 @@ int phys_to_target_node(u64 start)
start);
return 0;
}
+EXPORT_SYMBOL_GPL(phys_to_target_node);
#endif
This lets us kill the dependency on spinlock.h.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
include/linux/sched.h | 31 -------------------------------
include/linux/spinlock.h | 31 +++++++++++++++++++++++++++++++
2 files changed, 31 insertions(+), 31 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a5b7b122682..7501a3451a20 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2227,37 +2227,6 @@ static inline bool preempt_model_preemptible(void)
return preempt_model_full() || preempt_model_rt();
}
-/*
- * Does a critical section need to be broken due to another
- * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
- * but a general need for low latency)
- */
-static inline int spin_needbreak(spinlock_t *lock)
-{
-#ifdef CONFIG_PREEMPTION
- return spin_is_contended(lock);
-#else
- return 0;
-#endif
-}
-
-/*
- * Check if a rwlock is contended.
- * Returns non-zero if there is another task waiting on the rwlock.
- * Returns zero if the lock is not contended or the system / underlying
- * rwlock implementation does not support contention detection.
- * Technically does not depend on CONFIG_PREEMPTION, but a general need
- * for low latency.
- */
-static inline int rwlock_needbreak(rwlock_t *lock)
-{
-#ifdef CONFIG_PREEMPTION
- return rwlock_is_contended(lock);
-#else
- return 0;
-#endif
-}
-
static __always_inline bool need_resched(void)
{
return unlikely(tif_need_resched());
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 31d3d747a9db..0c71f06454d9 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -449,6 +449,37 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
return raw_spin_is_contended(&lock->rlock);
}
+/*
+ * Does a critical section need to be broken due to another
+ * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
+ * but a general need for low latency)
+ */
+static inline int spin_needbreak(spinlock_t *lock)
+{
+#ifdef CONFIG_PREEMPTION
+ return spin_is_contended(lock);
+#else
+ return 0;
+#endif
+}
+
+/*
+ * Check if a rwlock is contended.
+ * Returns non-zero if there is another task waiting on the rwlock.
+ * Returns zero if the lock is not contended or the system / underlying
+ * rwlock implementation does not support contention detection.
+ * Technically does not depend on CONFIG_PREEMPTION, but a general need
+ * for low latency.
+ */
+static inline int rwlock_needbreak(rwlock_t *lock)
+{
+#ifdef CONFIG_PREEMPTION
+ return rwlock_is_contended(lock);
+#else
+ return 0;
+#endif
+}
+
#define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock)
#else /* !CONFIG_PREEMPT_RT */
--
2.43.0
On Fri, Dec 15, 2023 at 10:26:15PM -0500, Kent Overstreet wrote:
> This lets us kill the dependency on spinlock.h.
>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
> include/linux/sched.h | 31 -------------------------------
> include/linux/spinlock.h | 31 +++++++++++++++++++++++++++++++
> 2 files changed, 31 insertions(+), 31 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 5a5b7b122682..7501a3451a20 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -2227,37 +2227,6 @@ static inline bool preempt_model_preemptible(void)
> return preempt_model_full() || preempt_model_rt();
> }
>
> -/*
> - * Does a critical section need to be broken due to another
> - * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
> - * but a general need for low latency)
> - */
> -static inline int spin_needbreak(spinlock_t *lock)
> -{
> -#ifdef CONFIG_PREEMPTION
> - return spin_is_contended(lock);
> -#else
> - return 0;
> -#endif
> -}
> -
> -/*
> - * Check if a rwlock is contended.
> - * Returns non-zero if there is another task waiting on the rwlock.
> - * Returns zero if the lock is not contended or the system / underlying
> - * rwlock implementation does not support contention detection.
> - * Technically does not depend on CONFIG_PREEMPTION, but a general need
> - * for low latency.
> - */
> -static inline int rwlock_needbreak(rwlock_t *lock)
> -{
> -#ifdef CONFIG_PREEMPTION
> - return rwlock_is_contended(lock);
> -#else
> - return 0;
> -#endif
> -}
> -
> static __always_inline bool need_resched(void)
> {
> return unlikely(tif_need_resched());
> diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
> index 31d3d747a9db..0c71f06454d9 100644
> --- a/include/linux/spinlock.h
> +++ b/include/linux/spinlock.h
> @@ -449,6 +449,37 @@ static __always_inline int spin_is_contended(spinlock_t *lock)
> return raw_spin_is_contended(&lock->rlock);
> }
>
> +/*
> + * Does a critical section need to be broken due to another
> + * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
> + * but a general need for low latency)
> + */
> +static inline int spin_needbreak(spinlock_t *lock)
> +{
> +#ifdef CONFIG_PREEMPTION
> + return spin_is_contended(lock);
> +#else
> + return 0;
> +#endif
> +}
> +
> +/*
> + * Check if a rwlock is contended.
> + * Returns non-zero if there is another task waiting on the rwlock.
> + * Returns zero if the lock is not contended or the system / underlying
> + * rwlock implementation does not support contention detection.
> + * Technically does not depend on CONFIG_PREEMPTION, but a general need
> + * for low latency.
> + */
> +static inline int rwlock_needbreak(rwlock_t *lock)
> +{
> +#ifdef CONFIG_PREEMPTION
> + return rwlock_is_contended(lock);
> +#else
> + return 0;
> +#endif
> +}
> +
> #define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock)
>
> #else /* !CONFIG_PREEMPT_RT */
> --
> 2.43.0
Hello Kent,
This patch is breaking PREEMPT_RT builds, but it can be easily fixed.
I sent a patch on the fix, please take a look:
https://lore.kernel.org/all/20240115201935.2326400-1-leobras@redhat.com/
Thanks!
Leo
ktime.h pulls in quite a few headers recursively (including printk.h) -
this is going to help with trimming sched.h dependencies.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
include/linux/ktime.h | 8 +++-----
include/linux/types.h | 3 +++
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 73f20deb497d..3a4e723eae0f 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -21,12 +21,10 @@
#ifndef _LINUX_KTIME_H
#define _LINUX_KTIME_H
-#include <linux/time.h>
-#include <linux/jiffies.h>
#include <asm/bug.h>
-
-/* Nanosecond scalar representation for kernel time values */
-typedef s64 ktime_t;
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/types.h>
/**
* ktime_set - Set a ktime_t variable from a seconds/nanoseconds value
diff --git a/include/linux/types.h b/include/linux/types.h
index 253168bb3fe1..2bc8766ba20c 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -120,6 +120,9 @@ typedef s64 int64_t;
#define aligned_be64 __aligned_be64
#define aligned_le64 __aligned_le64
+/* Nanosecond scalar representation for kernel time values */
+typedef s64 ktime_t;
+
/**
* The type used for indexing onto a disc or disc partition.
*
--
2.43.0
We need to reduce the scope of what's included in sched.h: task_struct
includes a hrtimer, so split out the core types into their own header.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/hrtimer.h | 44 ++----------------------------
include/linux/hrtimer_types.h | 50 +++++++++++++++++++++++++++++++++++
include/linux/sched.h | 2 +-
3 files changed, 53 insertions(+), 43 deletions(-)
create mode 100644 include/linux/hrtimer_types.h
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 02d264ca9dce..87e3bedf8eb0 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -13,13 +13,13 @@
#define _LINUX_HRTIMER_H
#include <linux/hrtimer_defs.h>
-#include <linux/rbtree.h>
+#include <linux/hrtimer_types.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/percpu-defs.h>
+#include <linux/rbtree.h>
#include <linux/seqlock.h>
#include <linux/timer.h>
-#include <linux/timerqueue.h>
struct hrtimer_clock_base;
struct hrtimer_cpu_base;
@@ -59,14 +59,6 @@ enum hrtimer_mode {
HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
};
-/*
- * Return values for the callback function
- */
-enum hrtimer_restart {
- HRTIMER_NORESTART, /* Timer is not restarted */
- HRTIMER_RESTART, /* Timer must be restarted */
-};
-
/*
* Values to track state of the timer
*
@@ -94,38 +86,6 @@ enum hrtimer_restart {
#define HRTIMER_STATE_INACTIVE 0x00
#define HRTIMER_STATE_ENQUEUED 0x01
-/**
- * struct hrtimer - the basic hrtimer structure
- * @node: timerqueue node, which also manages node.expires,
- * the absolute expiry time in the hrtimers internal
- * representation. The time is related to the clock on
- * which the timer is based. Is setup by adding
- * slack to the _softexpires value. For non range timers
- * identical to _softexpires.
- * @_softexpires: the absolute earliest expiry time of the hrtimer.
- * The time which was given as expiry time when the timer
- * was armed.
- * @function: timer expiry callback function
- * @base: pointer to the timer base (per cpu and per clock)
- * @state: state information (See bit values above)
- * @is_rel: Set if the timer was armed relative
- * @is_soft: Set if hrtimer will be expired in soft interrupt context.
- * @is_hard: Set if hrtimer will be expired in hard interrupt context
- * even on RT.
- *
- * The hrtimer structure must be initialized by hrtimer_init()
- */
-struct hrtimer {
- struct timerqueue_node node;
- ktime_t _softexpires;
- enum hrtimer_restart (*function)(struct hrtimer *);
- struct hrtimer_clock_base *base;
- u8 state;
- u8 is_rel;
- u8 is_soft;
- u8 is_hard;
-};
-
/**
* struct hrtimer_sleeper - simple sleeper structure
* @timer: embedded timer structure
diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h
new file mode 100644
index 000000000000..f4ef391b96a7
--- /dev/null
+++ b/include/linux/hrtimer_types.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_HRTIMER_TYPES_H
+#define _LINUX_HRTIMER_TYPES_H
+
+#include <linux/types.h>
+#include <linux/timerqueue.h>
+
+struct hrtimer_clock_base;
+
+/*
+ * Return values for the callback function
+ */
+enum hrtimer_restart {
+ HRTIMER_NORESTART, /* Timer is not restarted */
+ HRTIMER_RESTART, /* Timer must be restarted */
+};
+
+/**
+ * struct hrtimer - the basic hrtimer structure
+ * @node: timerqueue node, which also manages node.expires,
+ * the absolute expiry time in the hrtimers internal
+ * representation. The time is related to the clock on
+ * which the timer is based. Is setup by adding
+ * slack to the _softexpires value. For non range timers
+ * identical to _softexpires.
+ * @_softexpires: the absolute earliest expiry time of the hrtimer.
+ * The time which was given as expiry time when the timer
+ * was armed.
+ * @function: timer expiry callback function
+ * @base: pointer to the timer base (per cpu and per clock)
+ * @state: state information (See bit values above)
+ * @is_rel: Set if the timer was armed relative
+ * @is_soft: Set if hrtimer will be expired in soft interrupt context.
+ * @is_hard: Set if hrtimer will be expired in hard interrupt context
+ * even on RT.
+ *
+ * The hrtimer structure must be initialized by hrtimer_init()
+ */
+struct hrtimer {
+ struct timerqueue_node node;
+ ktime_t _softexpires;
+ enum hrtimer_restart (*function)(struct hrtimer *);
+ struct hrtimer_clock_base *base;
+ u8 state;
+ u8 is_rel;
+ u8 is_soft;
+ u8 is_hard;
+};
+
+#endif /* _LINUX_HRTIMER_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7501a3451a20..3762809652da 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -17,7 +17,7 @@
#include <linux/kmsan_types.h>
#include <linux/mutex.h>
#include <linux/plist.h>
-#include <linux/hrtimer.h>
+#include <linux/hrtimer_types.h>
#include <linux/irqflags.h>
#include <linux/seccomp.h>
#include <linux/nodemask_types.h>
--
2.43.0
Trimming down sched.h dependencies: we don't want to include more than
the base types.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
include/linux/mutex.h | 52 +--------------------------
include/linux/mutex_types.h | 71 +++++++++++++++++++++++++++++++++++++
include/linux/sched.h | 2 +-
3 files changed, 73 insertions(+), 52 deletions(-)
create mode 100644 include/linux/mutex_types.h
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index a33aa9eb9fc3..0dfba5df6524 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -20,6 +20,7 @@
#include <linux/osq_lock.h>
#include <linux/debug_locks.h>
#include <linux/cleanup.h>
+#include <linux/mutex_types.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
@@ -33,49 +34,6 @@
#ifndef CONFIG_PREEMPT_RT
-/*
- * Simple, straightforward mutexes with strict semantics:
- *
- * - only one task can hold the mutex at a time
- * - only the owner can unlock the mutex
- * - multiple unlocks are not permitted
- * - recursive locking is not permitted
- * - a mutex object must be initialized via the API
- * - a mutex object must not be initialized via memset or copying
- * - task may not exit with mutex held
- * - memory areas where held locks reside must not be freed
- * - held mutexes must not be reinitialized
- * - mutexes may not be used in hardware or software interrupt
- * contexts such as tasklets and timers
- *
- * These semantics are fully enforced when DEBUG_MUTEXES is
- * enabled. Furthermore, besides enforcing the above rules, the mutex
- * debugging code also implements a number of additional features
- * that make lock debugging easier and faster:
- *
- * - uses symbolic names of mutexes, whenever they are printed in debug output
- * - point-of-acquire tracking, symbolic lookup of function names
- * - list of all locks held in the system, printout of them
- * - owner tracking
- * - detects self-recursing locks and prints out all relevant info
- * - detects multi-task circular deadlocks and prints out all affected
- * locks and tasks (and only those tasks)
- */
-struct mutex {
- atomic_long_t owner;
- raw_spinlock_t wait_lock;
-#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
- struct optimistic_spin_queue osq; /* Spinner MCS lock */
-#endif
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_MUTEXES
- void *magic;
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
#ifdef CONFIG_DEBUG_MUTEXES
#define __DEBUG_MUTEX_INITIALIZER(lockname) \
@@ -131,14 +89,6 @@ extern bool mutex_is_locked(struct mutex *lock);
/*
* Preempt-RT variant based on rtmutexes.
*/
-#include <linux/rtmutex.h>
-
-struct mutex {
- struct rt_mutex_base rtmutex;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
#define __MUTEX_INITIALIZER(mutexname) \
{ \
diff --git a/include/linux/mutex_types.h b/include/linux/mutex_types.h
new file mode 100644
index 000000000000..fdf7f515fde8
--- /dev/null
+++ b/include/linux/mutex_types.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_MUTEX_TYPES_H
+#define __LINUX_MUTEX_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/lockdep_types.h>
+#include <linux/osq_lock.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+
+#ifndef CONFIG_PREEMPT_RT
+
+/*
+ * Simple, straightforward mutexes with strict semantics:
+ *
+ * - only one task can hold the mutex at a time
+ * - only the owner can unlock the mutex
+ * - multiple unlocks are not permitted
+ * - recursive locking is not permitted
+ * - a mutex object must be initialized via the API
+ * - a mutex object must not be initialized via memset or copying
+ * - task may not exit with mutex held
+ * - memory areas where held locks reside must not be freed
+ * - held mutexes must not be reinitialized
+ * - mutexes may not be used in hardware or software interrupt
+ * contexts such as tasklets and timers
+ *
+ * These semantics are fully enforced when DEBUG_MUTEXES is
+ * enabled. Furthermore, besides enforcing the above rules, the mutex
+ * debugging code also implements a number of additional features
+ * that make lock debugging easier and faster:
+ *
+ * - uses symbolic names of mutexes, whenever they are printed in debug output
+ * - point-of-acquire tracking, symbolic lookup of function names
+ * - list of all locks held in the system, printout of them
+ * - owner tracking
+ * - detects self-recursing locks and prints out all relevant info
+ * - detects multi-task circular deadlocks and prints out all affected
+ * locks and tasks (and only those tasks)
+ */
+struct mutex {
+ atomic_long_t owner;
+ raw_spinlock_t wait_lock;
+#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
+ struct optimistic_spin_queue osq; /* Spinner MCS lock */
+#endif
+ struct list_head wait_list;
+#ifdef CONFIG_DEBUG_MUTEXES
+ void *magic;
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
+#else /* !CONFIG_PREEMPT_RT */
+/*
+ * Preempt-RT variant based on rtmutexes.
+ */
+#include <linux/rtmutex.h>
+
+struct mutex {
+ struct rt_mutex_base rtmutex;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
+#endif /* CONFIG_PREEMPT_RT */
+
+#endif /* __LINUX_MUTEX_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3762809652da..e8892789969b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -15,7 +15,7 @@
#include <linux/sem.h>
#include <linux/shm.h>
#include <linux/kmsan_types.h>
-#include <linux/mutex.h>
+#include <linux/mutex_types.h>
#include <linux/plist.h>
#include <linux/hrtimer_types.h>
#include <linux/irqflags.h>
--
2.43.0
Trimming down sched.h dependencies: we don't want to include more than
the base types.
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
include/linux/posix-timers.h | 68 ++--------------------------
include/linux/posix-timers_types.h | 72 ++++++++++++++++++++++++++++++
include/linux/sched.h | 2 +-
3 files changed, 76 insertions(+), 66 deletions(-)
create mode 100644 include/linux/posix-timers_types.h
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index d607f51404fc..750b0647258d 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -2,40 +2,16 @@
#ifndef _linux_POSIX_TIMERS_H
#define _linux_POSIX_TIMERS_H
-#include <linux/spinlock.h>
+#include <linux/alarmtimer.h>
#include <linux/list.h>
#include <linux/mutex.h>
-#include <linux/alarmtimer.h>
+#include <linux/posix-timers_types.h>
+#include <linux/spinlock.h>
#include <linux/timerqueue.h>
struct kernel_siginfo;
struct task_struct;
-/*
- * Bit fields within a clockid:
- *
- * The most significant 29 bits hold either a pid or a file descriptor.
- *
- * Bit 2 indicates whether a cpu clock refers to a thread or a process.
- *
- * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
- *
- * A clockid is invalid if bits 2, 1, and 0 are all set.
- */
-#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3))
-#define CPUCLOCK_PERTHREAD(clock) \
- (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
-
-#define CPUCLOCK_PERTHREAD_MASK 4
-#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
-#define CPUCLOCK_CLOCK_MASK 3
-#define CPUCLOCK_PROF 0
-#define CPUCLOCK_VIRT 1
-#define CPUCLOCK_SCHED 2
-#define CPUCLOCK_MAX 3
-#define CLOCKFD CPUCLOCK_MAX
-#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
-
static inline clockid_t make_process_cpuclock(const unsigned int pid,
const clockid_t clock)
{
@@ -109,44 +85,6 @@ static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp)
ctmr->node.expires = exp;
}
-/**
- * posix_cputimer_base - Container per posix CPU clock
- * @nextevt: Earliest-expiration cache
- * @tqhead: timerqueue head for cpu_timers
- */
-struct posix_cputimer_base {
- u64 nextevt;
- struct timerqueue_head tqhead;
-};
-
-/**
- * posix_cputimers - Container for posix CPU timer related data
- * @bases: Base container for posix CPU clocks
- * @timers_active: Timers are queued.
- * @expiry_active: Timer expiry is active. Used for
- * process wide timers to avoid multiple
- * task trying to handle expiry concurrently
- *
- * Used in task_struct and signal_struct
- */
-struct posix_cputimers {
- struct posix_cputimer_base bases[CPUCLOCK_MAX];
- unsigned int timers_active;
- unsigned int expiry_active;
-};
-
-/**
- * posix_cputimers_work - Container for task work based posix CPU timer expiry
- * @work: The task work to be scheduled
- * @mutex: Mutex held around expiry in context of this task work
- * @scheduled: @work has been scheduled already, no further processing
- */
-struct posix_cputimers_work {
- struct callback_head work;
- struct mutex mutex;
- unsigned int scheduled;
-};
-
static inline void posix_cputimers_init(struct posix_cputimers *pct)
{
memset(pct, 0, sizeof(*pct));
diff --git a/include/linux/posix-timers_types.h b/include/linux/posix-timers_types.h
new file mode 100644
index 000000000000..57fec639a9bb
--- /dev/null
+++ b/include/linux/posix-timers_types.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _linux_POSIX_TIMERS_TYPES_H
+#define _linux_POSIX_TIMERS_TYPES_H
+
+#include <linux/mutex_types.h>
+#include <linux/timerqueue.h>
+#include <linux/types.h>
+
+/*
+ * Bit fields within a clockid:
+ *
+ * The most significant 29 bits hold either a pid or a file descriptor.
+ *
+ * Bit 2 indicates whether a cpu clock refers to a thread or a process.
+ *
+ * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
+ *
+ * A clockid is invalid if bits 2, 1, and 0 are all set.
+ */
+#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3))
+#define CPUCLOCK_PERTHREAD(clock) \
+ (((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
+
+#define CPUCLOCK_PERTHREAD_MASK 4
+#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
+#define CPUCLOCK_CLOCK_MASK 3
+#define CPUCLOCK_PROF 0
+#define CPUCLOCK_VIRT 1
+#define CPUCLOCK_SCHED 2
+#define CPUCLOCK_MAX 3
+#define CLOCKFD CPUCLOCK_MAX
+#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
+
+/**
+ * posix_cputimer_base - Container per posix CPU clock
+ * @nextevt: Earliest-expiration cache
+ * @tqhead: timerqueue head for cpu_timers
+ */
+struct posix_cputimer_base {
+ u64 nextevt;
+ struct timerqueue_head tqhead;
+};
+
+/**
+ * posix_cputimers - Container for posix CPU timer related data
+ * @bases: Base container for posix CPU clocks
+ * @timers_active: Timers are queued.
+ * @expiry_active: Timer expiry is active. Used for
+ * process wide timers to avoid multiple
+ * task trying to handle expiry concurrently
+ *
+ * Used in task_struct and signal_struct
+ */
+struct posix_cputimers {
+ struct posix_cputimer_base bases[CPUCLOCK_MAX];
+ unsigned int timers_active;
+ unsigned int expiry_active;
+};
+
+/**
+ * posix_cputimers_work - Container for task work based posix CPU timer expiry
+ * @work: The task work to be scheduled
+ * @mutex: Mutex held around expiry in context of this task work
+ * @scheduled: @work has been scheduled already, no further processing
+ */
+struct posix_cputimers_work {
+ struct callback_head work;
+ struct mutex mutex;
+ unsigned int scheduled;
+};
+
+#endif /* _linux_POSIX_TIMERS_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e8892789969b..6d803d0904d9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -31,7 +31,7 @@
#include <linux/syscall_user_dispatch.h>
#include <linux/mm_types_task.h>
#include <linux/task_io_accounting.h>
-#include <linux/posix-timers.h>
+#include <linux/posix-timers_types.h>
#include <linux/rseq.h>
#include <linux/seqlock.h>
#include <linux/kcsan.h>
--
2.43.0
Trimming down sched.h dependencies: we don't want to include more than
the base types.
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
include/linux/sched.h | 2 +-
include/linux/seqlock.h | 79 +----------------------------
include/linux/seqlock_types.h | 93 +++++++++++++++++++++++++++++++++++
3 files changed, 96 insertions(+), 78 deletions(-)
create mode 100644 include/linux/seqlock_types.h
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d803d0904d9..436f7ce1450a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -33,7 +33,7 @@
#include <linux/task_io_accounting.h>
#include <linux/posix-timers_types.h>
#include <linux/rseq.h>
-#include <linux/seqlock.h>
+#include <linux/seqlock_types.h>
#include <linux/kcsan.h>
#include <linux/rv.h>
#include <linux/livepatch_sched.h>
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index e92f9d5577ba..d90d8ee29d81 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -18,6 +18,7 @@
#include <linux/lockdep.h>
#include <linux/mutex.h>
#include <linux/preempt.h>
+#include <linux/seqlock_types.h>
#include <linux/spinlock.h>
#include <asm/processor.h>
@@ -37,37 +38,6 @@
*/
#define KCSAN_SEQLOCK_REGION_MAX 1000
-/*
- * Sequence counters (seqcount_t)
- *
- * This is the raw counting mechanism, without any writer protection.
- *
- * Write side critical sections must be serialized and non-preemptible.
- *
- * If readers can be invoked from hardirq or softirq contexts,
- * interrupts or bottom halves must also be respectively disabled before
- * entering the write section.
- *
- * This mechanism can't be used if the protected data contains pointers,
- * as the writer can invalidate a pointer that a reader is following.
- *
- * If the write serialization mechanism is one of the common kernel
- * locking primitives, use a sequence counter with associated lock
- * (seqcount_LOCKNAME_t) instead.
- *
- * If it's desired to automatically handle the sequence counter writer
- * serialization and non-preemptibility requirements, use a sequential
- * lock (seqlock_t) instead.
- *
- * See Documentation/locking/seqlock.rst
- */
-typedef struct seqcount {
- unsigned sequence;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-} seqcount_t;
-
static inline void __seqcount_init(seqcount_t *s, const char *name,
struct lock_class_key *key)
{
@@ -131,28 +101,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
* See Documentation/locking/seqlock.rst
*/
-/*
- * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
- * disable preemption. It can lead to higher latencies, and the write side
- * sections will not be able to acquire locks which become sleeping locks
- * (e.g. spinlock_t).
- *
- * To remain preemptible while avoiding a possible livelock caused by the
- * reader preempting the writer, use a different technique: let the reader
- * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
- * case, acquire then release the associated LOCKNAME writer serialization
- * lock. This will allow any possibly-preempted writer to make progress
- * until the end of its writer serialization lock critical section.
- *
- * This lock-unlock technique must be implemented for all of PREEMPT_RT
- * sleeping locks. See Documentation/locking/locktypes.rst
- */
-#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
-#define __SEQ_LOCK(expr) expr
-#else
-#define __SEQ_LOCK(expr)
-#endif
-
/*
* typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated
* @seqcount: The real sequence counter
@@ -194,11 +142,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
* @lockbase: prefix for associated lock/unlock
*/
#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \
-typedef struct seqcount_##lockname { \
- seqcount_t seqcount; \
- __SEQ_LOCK(locktype *lock); \
-} seqcount_##lockname##_t; \
- \
static __always_inline seqcount_t * \
__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \
{ \
@@ -284,6 +227,7 @@ SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin)
SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin)
SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read)
SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
+#undef SEQCOUNT_LOCKNAME
/*
* SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
@@ -794,25 +738,6 @@ static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
smp_wmb(); /* increment "sequence" before following stores */
}
-/*
- * Sequential locks (seqlock_t)
- *
- * Sequence counters with an embedded spinlock for writer serialization
- * and non-preemptibility.
- *
- * For more info, see:
- * - Comments on top of seqcount_t
- * - Documentation/locking/seqlock.rst
- */
-typedef struct {
- /*
- * Make sure that readers don't starve writers on PREEMPT_RT: use
- * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
- */
- seqcount_spinlock_t seqcount;
- spinlock_t lock;
-} seqlock_t;
-
#define __SEQLOCK_UNLOCKED(lockname) \
{ \
.seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \
diff --git a/include/linux/seqlock_types.h b/include/linux/seqlock_types.h
new file mode 100644
index 000000000000..dfdf43e3fa3d
--- /dev/null
+++ b/include/linux/seqlock_types.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_SEQLOCK_TYPES_H
+#define __LINUX_SEQLOCK_TYPES_H
+
+#include <linux/lockdep_types.h>
+#include <linux/mutex_types.h>
+#include <linux/spinlock_types.h>
+
+/*
+ * Sequence counters (seqcount_t)
+ *
+ * This is the raw counting mechanism, without any writer protection.
+ *
+ * Write side critical sections must be serialized and non-preemptible.
+ *
+ * If readers can be invoked from hardirq or softirq contexts,
+ * interrupts or bottom halves must also be respectively disabled before
+ * entering the write section.
+ *
+ * This mechanism can't be used if the protected data contains pointers,
+ * as the writer can invalidate a pointer that a reader is following.
+ *
+ * If the write serialization mechanism is one of the common kernel
+ * locking primitives, use a sequence counter with associated lock
+ * (seqcount_LOCKNAME_t) instead.
+ *
+ * If it's desired to automatically handle the sequence counter writer
+ * serialization and non-preemptibility requirements, use a sequential
+ * lock (seqlock_t) instead.
+ *
+ * See Documentation/locking/seqlock.rst
+ */
+typedef struct seqcount {
+ unsigned sequence;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+} seqcount_t;
+
+/*
+ * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
+ * disable preemption. It can lead to higher latencies, and the write side
+ * sections will not be able to acquire locks which become sleeping locks
+ * (e.g. spinlock_t).
+ *
+ * To remain preemptible while avoiding a possible livelock caused by the
+ * reader preempting the writer, use a different technique: let the reader
+ * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
+ * case, acquire then release the associated LOCKNAME writer serialization
+ * lock. This will allow any possibly-preempted writer to make progress
+ * until the end of its writer serialization lock critical section.
+ *
+ * This lock-unlock technique must be implemented for all of PREEMPT_RT
+ * sleeping locks. See Documentation/locking/locktypes.rst
+ */
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
+#define __SEQ_LOCK(expr) expr
+#else
+#define __SEQ_LOCK(expr)
+#endif
+
+#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \
+typedef struct seqcount_##lockname { \
+ seqcount_t seqcount; \
+ __SEQ_LOCK(locktype *lock); \
+} seqcount_##lockname##_t;
+
+SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin)
+SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin)
+SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read)
+SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
+#undef SEQCOUNT_LOCKNAME
+
+/*
+ * Sequential locks (seqlock_t)
+ *
+ * Sequence counters with an embedded spinlock for writer serialization
+ * and non-preemptibility.
+ *
+ * For more info, see:
+ * - Comments on top of seqcount_t
+ * - Documentation/locking/seqlock.rst
+ */
+typedef struct {
+ /*
+ * Make sure that readers don't starve writers on PREEMPT_RT: use
+ * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
+ */
+ seqcount_spinlock_t seqcount;
+ spinlock_t lock;
+} seqlock_t;
+
+#endif /* __LINUX_SEQLOCK_TYPES_H */
--
2.43.0
On 12/15/23 22:26, Kent Overstreet wrote:
> Trimming down sched.h dependencies: we don't want to include more than
> the base types.
>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Boqun Feng <boqun.feng@gmail.com>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
> include/linux/sched.h | 2 +-
> include/linux/seqlock.h | 79 +----------------------------
> include/linux/seqlock_types.h | 93 +++++++++++++++++++++++++++++++++++
> 3 files changed, 96 insertions(+), 78 deletions(-)
> create mode 100644 include/linux/seqlock_types.h
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 6d803d0904d9..436f7ce1450a 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -33,7 +33,7 @@
> #include <linux/task_io_accounting.h>
> #include <linux/posix-timers_types.h>
> #include <linux/rseq.h>
> -#include <linux/seqlock.h>
> +#include <linux/seqlock_types.h>
> #include <linux/kcsan.h>
> #include <linux/rv.h>
> #include <linux/livepatch_sched.h>
> diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
> index e92f9d5577ba..d90d8ee29d81 100644
> --- a/include/linux/seqlock.h
> +++ b/include/linux/seqlock.h
> @@ -18,6 +18,7 @@
> #include <linux/lockdep.h>
> #include <linux/mutex.h>
> #include <linux/preempt.h>
> +#include <linux/seqlock_types.h>
> #include <linux/spinlock.h>
>
> #include <asm/processor.h>
> @@ -37,37 +38,6 @@
> */
> #define KCSAN_SEQLOCK_REGION_MAX 1000
>
> -/*
> - * Sequence counters (seqcount_t)
> - *
> - * This is the raw counting mechanism, without any writer protection.
> - *
> - * Write side critical sections must be serialized and non-preemptible.
> - *
> - * If readers can be invoked from hardirq or softirq contexts,
> - * interrupts or bottom halves must also be respectively disabled before
> - * entering the write section.
> - *
> - * This mechanism can't be used if the protected data contains pointers,
> - * as the writer can invalidate a pointer that a reader is following.
> - *
> - * If the write serialization mechanism is one of the common kernel
> - * locking primitives, use a sequence counter with associated lock
> - * (seqcount_LOCKNAME_t) instead.
> - *
> - * If it's desired to automatically handle the sequence counter writer
> - * serialization and non-preemptibility requirements, use a sequential
> - * lock (seqlock_t) instead.
> - *
> - * See Documentation/locking/seqlock.rst
> - */
> -typedef struct seqcount {
> - unsigned sequence;
> -#ifdef CONFIG_DEBUG_LOCK_ALLOC
> - struct lockdep_map dep_map;
> -#endif
> -} seqcount_t;
> -
> static inline void __seqcount_init(seqcount_t *s, const char *name,
> struct lock_class_key *key)
> {
> @@ -131,28 +101,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
> * See Documentation/locking/seqlock.rst
> */
>
> -/*
> - * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
> - * disable preemption. It can lead to higher latencies, and the write side
> - * sections will not be able to acquire locks which become sleeping locks
> - * (e.g. spinlock_t).
> - *
> - * To remain preemptible while avoiding a possible livelock caused by the
> - * reader preempting the writer, use a different technique: let the reader
> - * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
> - * case, acquire then release the associated LOCKNAME writer serialization
> - * lock. This will allow any possibly-preempted writer to make progress
> - * until the end of its writer serialization lock critical section.
> - *
> - * This lock-unlock technique must be implemented for all of PREEMPT_RT
> - * sleeping locks. See Documentation/locking/locktypes.rst
> - */
> -#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
> -#define __SEQ_LOCK(expr) expr
> -#else
> -#define __SEQ_LOCK(expr)
> -#endif
> -
> /*
> * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated
> * @seqcount: The real sequence counter
> @@ -194,11 +142,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
> * @lockbase: prefix for associated lock/unlock
> */
> #define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \
> -typedef struct seqcount_##lockname { \
> - seqcount_t seqcount; \
> - __SEQ_LOCK(locktype *lock); \
> -} seqcount_##lockname##_t; \
> - \
> static __always_inline seqcount_t * \
> __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \
> { \
> @@ -284,6 +227,7 @@ SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin)
> SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin)
> SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read)
> SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
> +#undef SEQCOUNT_LOCKNAME
>
> /*
> * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
> @@ -794,25 +738,6 @@ static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
> smp_wmb(); /* increment "sequence" before following stores */
> }
>
> -/*
> - * Sequential locks (seqlock_t)
> - *
> - * Sequence counters with an embedded spinlock for writer serialization
> - * and non-preemptibility.
> - *
> - * For more info, see:
> - * - Comments on top of seqcount_t
> - * - Documentation/locking/seqlock.rst
> - */
> -typedef struct {
> - /*
> - * Make sure that readers don't starve writers on PREEMPT_RT: use
> - * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
> - */
> - seqcount_spinlock_t seqcount;
> - spinlock_t lock;
> -} seqlock_t;
> -
> #define __SEQLOCK_UNLOCKED(lockname) \
> { \
> .seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \
> diff --git a/include/linux/seqlock_types.h b/include/linux/seqlock_types.h
> new file mode 100644
> index 000000000000..dfdf43e3fa3d
> --- /dev/null
> +++ b/include/linux/seqlock_types.h
> @@ -0,0 +1,93 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __LINUX_SEQLOCK_TYPES_H
> +#define __LINUX_SEQLOCK_TYPES_H
> +
> +#include <linux/lockdep_types.h>
> +#include <linux/mutex_types.h>
> +#include <linux/spinlock_types.h>
> +
> +/*
> + * Sequence counters (seqcount_t)
> + *
> + * This is the raw counting mechanism, without any writer protection.
> + *
> + * Write side critical sections must be serialized and non-preemptible.
> + *
> + * If readers can be invoked from hardirq or softirq contexts,
> + * interrupts or bottom halves must also be respectively disabled before
> + * entering the write section.
> + *
> + * This mechanism can't be used if the protected data contains pointers,
> + * as the writer can invalidate a pointer that a reader is following.
> + *
> + * If the write serialization mechanism is one of the common kernel
> + * locking primitives, use a sequence counter with associated lock
> + * (seqcount_LOCKNAME_t) instead.
> + *
> + * If it's desired to automatically handle the sequence counter writer
> + * serialization and non-preemptibility requirements, use a sequential
> + * lock (seqlock_t) instead.
> + *
> + * See Documentation/locking/seqlock.rst
> + */
> +typedef struct seqcount {
> + unsigned sequence;
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> + struct lockdep_map dep_map;
> +#endif
> +} seqcount_t;
> +
> +/*
> + * For PREEMPT_RT, seqcount_LOCKNAME_t write side critical sections cannot
> + * disable preemption. It can lead to higher latencies, and the write side
> + * sections will not be able to acquire locks which become sleeping locks
> + * (e.g. spinlock_t).
> + *
> + * To remain preemptible while avoiding a possible livelock caused by the
> + * reader preempting the writer, use a different technique: let the reader
> + * detect if a seqcount_LOCKNAME_t writer is in progress. If that is the
> + * case, acquire then release the associated LOCKNAME writer serialization
> + * lock. This will allow any possibly-preempted writer to make progress
> + * until the end of its writer serialization lock critical section.
> + *
> + * This lock-unlock technique must be implemented for all of PREEMPT_RT
> + * sleeping locks. See Documentation/locking/locktypes.rst
> + */
> +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
> +#define __SEQ_LOCK(expr) expr
> +#else
> +#define __SEQ_LOCK(expr)
> +#endif
> +
> +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \
> +typedef struct seqcount_##lockname { \
> + seqcount_t seqcount; \
> + __SEQ_LOCK(locktype *lock); \
> +} seqcount_##lockname##_t;
> +
> +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin)
> +SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin)
> +SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read)
> +SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
> +#undef SEQCOUNT_LOCKNAME
> +
> +/*
> + * Sequential locks (seqlock_t)
> + *
> + * Sequence counters with an embedded spinlock for writer serialization
> + * and non-preemptibility.
> + *
> + * For more info, see:
> + * - Comments on top of seqcount_t
> + * - Documentation/locking/seqlock.rst
> + */
> +typedef struct {
> + /*
> + * Make sure that readers don't starve writers on PREEMPT_RT: use
> + * seqcount_spinlock_t instead of seqcount_t. Check __SEQ_LOCK().
> + */
> + seqcount_spinlock_t seqcount;
> + spinlock_t lock;
> +} seqlock_t;
> +
> +#endif /* __LINUX_SEQLOCK_TYPES_H */
seqlock.h is directly included in kernel/sched/sched.h, so breaking out
seqlock_types.h and including only that in include/linux/sched.h should
be OK.
Acked-by: Waiman Long <longman@redhat.com>
© 2016 - 2025 Red Hat, Inc.