[PATCH printk v8 31/35] printk: nbcon: Implement emergency sections

John Ogness posted 35 patches 1 year, 3 months ago
[PATCH printk v8 31/35] printk: nbcon: Implement emergency sections
Posted by John Ogness 1 year, 3 months ago
From: Thomas Gleixner <tglx@linutronix.de>

In emergency situations (something has gone wrong but the
system continues to operate), usually important information
(such as a backtrace) is generated via printk(). This
information should be pushed out to the consoles ASAP.

Add per-CPU emergency nesting tracking because an emergency
can arise while in an emergency situation.

Add functions to mark the beginning and end of emergency
sections where the urgent messages are generated.

Perform direct console flushing at the emergency priority if
the current CPU is in an emergency state and it is safe to do
so.

Note that the emergency state is not system-wide. While one CPU
is in an emergency state, another CPU may attempt to print
console messages at normal priority.

Also note that printk() already attempts to flush consoles in
the caller context for normal priority. However, follow-up
changes will introduce printing kthreads, in which case the
normal priority printk() calls will offload to the kthreads.

Co-developed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Thomas Gleixner (Intel) <tglx@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/console.h  |  4 +++
 kernel/printk/internal.h |  1 +
 kernel/printk/nbcon.c    | 75 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/include/linux/console.h b/include/linux/console.h
index 3706f944de46..9a13f91b0c43 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -553,10 +553,14 @@ static inline bool console_is_registered(const struct console *con)
 	hlist_for_each_entry(con, &console_list, node)
 
 #ifdef CONFIG_PRINTK
+extern void nbcon_cpu_emergency_enter(void);
+extern void nbcon_cpu_emergency_exit(void);
 extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt);
 extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt);
 extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt);
 #else
+static inline void nbcon_cpu_emergency_enter(void) { }
+static inline void nbcon_cpu_emergency_exit(void) { }
 static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; }
 static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; }
 static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; }
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index ba2e0f1940bd..8e36d8695f81 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -182,6 +182,7 @@ static inline void printk_get_console_flush_type(struct console_flush_type *ft)
 
 	switch (nbcon_get_default_prio()) {
 	case NBCON_PRIO_NORMAL:
+	case NBCON_PRIO_EMERGENCY:
 		if (have_nbcon_console && !have_boot_console)
 			ft->nbcon_atomic = true;
 
diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index 18488d6c17c0..92ac5c590927 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -972,6 +972,36 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt)
 	return nbcon_context_exit_unsafe(ctxt);
 }
 
+/* Track the nbcon emergency nesting per CPU. */
+static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting);
+static unsigned int early_nbcon_pcpu_emergency_nesting __initdata;
+
+/**
+ * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer
+ *
+ * Context:	For reading, any context. For writing, any context which could
+ *		not be migrated to another CPU.
+ * Return:	Either a pointer to the per CPU emergency nesting counter of
+ *		the current CPU or to the init data during early boot.
+ *
+ * The function is safe for reading per-CPU variables in any context because
+ * preemption is disabled if the current CPU is in the emergency state. See
+ * also nbcon_cpu_emergency_enter().
+ */
+static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void)
+{
+	/*
+	 * The value of __printk_percpu_data_ready gets set in normal
+	 * context and before SMP initialization. As a result it could
+	 * never change while inside an nbcon emergency section.
+	 */
+	if (!printk_percpu_data_ready())
+		return &early_nbcon_pcpu_emergency_nesting;
+
+	/* Open code this_cpu_ptr() without checking migration. */
+	return per_cpu_ptr(&nbcon_pcpu_emergency_nesting, raw_smp_processor_id());
+}
+
 /**
  * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon
  *				printing on the current CPU
@@ -981,13 +1011,20 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt)
  *		context for printing.
  *
  * The function is safe for reading per-CPU data in any context because
- * preemption is disabled if the current CPU is in the panic state.
+ * preemption is disabled if the current CPU is in the emergency or panic
+ * state.
  */
 enum nbcon_prio nbcon_get_default_prio(void)
 {
+	unsigned int *cpu_emergency_nesting;
+
 	if (this_cpu_in_panic())
 		return NBCON_PRIO_PANIC;
 
+	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
+	if (*cpu_emergency_nesting)
+		return NBCON_PRIO_EMERGENCY;
+
 	return NBCON_PRIO_NORMAL;
 }
 
@@ -1246,6 +1283,42 @@ void nbcon_atomic_flush_unsafe(void)
 	__nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), true);
 }
 
+/**
+ * nbcon_cpu_emergency_enter - Enter an emergency section where printk()
+ *				messages for that CPU are flushed directly
+ *
+ * Context:	Any context. Disables preemption.
+ *
+ * When within an emergency section, printk() calls will attempt to flush any
+ * pending messages in the ringbuffer.
+ */
+void nbcon_cpu_emergency_enter(void)
+{
+	unsigned int *cpu_emergency_nesting;
+
+	preempt_disable();
+
+	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
+	(*cpu_emergency_nesting)++;
+}
+
+/**
+ * nbcon_cpu_emergency_exit - Exit an emergency section
+ *
+ * Context:	Within an emergency section. Enables preemption.
+ */
+void nbcon_cpu_emergency_exit(void)
+{
+	unsigned int *cpu_emergency_nesting;
+
+	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
+
+	if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0))
+		(*cpu_emergency_nesting)--;
+
+	preempt_enable();
+}
+
 /**
  * nbcon_alloc - Allocate and init the nbcon console specific data
  * @con:	Console to initialize
-- 
2.39.2
Re: [PATCH printk v8 31/35] printk: nbcon: Implement emergency sections
Posted by John Ogness 1 year, 3 months ago
Hi Petr,

On 2024-08-20, John Ogness <john.ogness@linutronix.de> wrote:
> +static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void)
> +{
> +	/*
> +	 * The value of __printk_percpu_data_ready gets set in normal
> +	 * context and before SMP initialization. As a result it could
> +	 * never change while inside an nbcon emergency section.
> +	 */
> +	if (!printk_percpu_data_ready())
> +		return &early_nbcon_pcpu_emergency_nesting;
> +
> +	/* Open code this_cpu_ptr() without checking migration. */
> +	return per_cpu_ptr(&nbcon_pcpu_emergency_nesting, raw_smp_processor_id());
> +}

It was pointed out to me that raw_cpu_ptr() exists exactly for this
purpose. There is no need to open code it. Perhaps you can fold the
following patch into this one for linux-next?

John

------------8<--------------
From fe50e9646c44360d88749c2c24c109405b27ad9e Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Tue, 27 Aug 2024 14:06:19 +0000
Subject: [PATCH] printk: nbcon: Use raw_cpu_ptr() instead of open coding

There is no need to open code a non-migration-checking
this_cpu_ptr(). That is exactly what raw_cpu_ptr() is.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 kernel/printk/nbcon.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index 92ac5c590927..cf62f675c673 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -998,8 +998,7 @@ static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void)
 	if (!printk_percpu_data_ready())
 		return &early_nbcon_pcpu_emergency_nesting;
 
-	/* Open code this_cpu_ptr() without checking migration. */
-	return per_cpu_ptr(&nbcon_pcpu_emergency_nesting, raw_smp_processor_id());
+	return raw_cpu_ptr(&nbcon_pcpu_emergency_nesting);
 }
 
 /**
-- 
2.30.2
Re: [PATCH printk v8 31/35] printk: nbcon: Implement emergency sections
Posted by Petr Mladek 1 year, 3 months ago
On Tue 2024-08-27 16:25:31, John Ogness wrote:
> Hi Petr,
> 
> On 2024-08-20, John Ogness <john.ogness@linutronix.de> wrote:
> > +static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void)
> > +{
> > +	/*
> > +	 * The value of __printk_percpu_data_ready gets set in normal
> > +	 * context and before SMP initialization. As a result it could
> > +	 * never change while inside an nbcon emergency section.
> > +	 */
> > +	if (!printk_percpu_data_ready())
> > +		return &early_nbcon_pcpu_emergency_nesting;
> > +
> > +	/* Open code this_cpu_ptr() without checking migration. */
> > +	return per_cpu_ptr(&nbcon_pcpu_emergency_nesting, raw_smp_processor_id());
> > +}
> 
> It was pointed out to me that raw_cpu_ptr() exists exactly for this
> purpose. There is no need to open code it.

Good to know!

> ------------8<--------------
> >From fe50e9646c44360d88749c2c24c109405b27ad9e Mon Sep 17 00:00:00 2001
> From: John Ogness <john.ogness@linutronix.de>
> Date: Tue, 27 Aug 2024 14:06:19 +0000
> Subject: [PATCH] printk: nbcon: Use raw_cpu_ptr() instead of open coding
> 
> There is no need to open code a non-migration-checking
> this_cpu_ptr(). That is exactly what raw_cpu_ptr() is.
> 
> Signed-off-by: John Ogness <john.ogness@linutronix.de>

Reviewed-by: Petr Mladek <pmladek@suse.com>

I wanted to avoid rebase when it was not really necessary. So, I have
committed the patch into printk/linux.git, branch rework/write_atomic
on top of the existing patches.

Best Regards,
Petr
[tip: sched/rt] printk: nbcon: Use raw_cpu_ptr() instead of open coding
Posted by tip-bot2 for John Ogness 1 year, 3 months ago
The following commit has been merged into the sched/rt branch of tip:

Commit-ID:     d33d5e683b0d3b4f5fc6a49ce17583f8ca663944
Gitweb:        https://git.kernel.org/tip/d33d5e683b0d3b4f5fc6a49ce17583f8ca663944
Author:        John Ogness <john.ogness@linutronix.de>
AuthorDate:    Tue, 27 Aug 2024 16:25:31 +02:06
Committer:     Petr Mladek <pmladek@suse.com>
CommitterDate: Wed, 04 Sep 2024 12:28:25 +02:00

printk: nbcon: Use raw_cpu_ptr() instead of open coding

There is no need to open code a non-migration-checking
this_cpu_ptr(). That is exactly what raw_cpu_ptr() is.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/87plpum4jw.fsf@jogness.linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 kernel/printk/nbcon.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index 92ac5c5..cf62f67 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -998,8 +998,7 @@ static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void)
 	if (!printk_percpu_data_ready())
 		return &early_nbcon_pcpu_emergency_nesting;
 
-	/* Open code this_cpu_ptr() without checking migration. */
-	return per_cpu_ptr(&nbcon_pcpu_emergency_nesting, raw_smp_processor_id());
+	return raw_cpu_ptr(&nbcon_pcpu_emergency_nesting);
 }
 
 /**
[tip: sched/rt] printk: nbcon: Implement emergency sections
Posted by tip-bot2 for Thomas Gleixner 1 year, 3 months ago
The following commit has been merged into the sched/rt branch of tip:

Commit-ID:     ecb5e1aa82c86642ec1eaafefd4e317dfba3a238
Gitweb:        https://git.kernel.org/tip/ecb5e1aa82c86642ec1eaafefd4e317dfba3a238
Author:        Thomas Gleixner <tglx@linutronix.de>
AuthorDate:    Tue, 20 Aug 2024 08:35:57 +02:06
Committer:     Petr Mladek <pmladek@suse.com>
CommitterDate: Wed, 21 Aug 2024 15:03:04 +02:00

printk: nbcon: Implement emergency sections

In emergency situations (something has gone wrong but the
system continues to operate), usually important information
(such as a backtrace) is generated via printk(). This
information should be pushed out to the consoles ASAP.

Add per-CPU emergency nesting tracking because an emergency
can arise while in an emergency situation.

Add functions to mark the beginning and end of emergency
sections where the urgent messages are generated.

Perform direct console flushing at the emergency priority if
the current CPU is in an emergency state and it is safe to do
so.

Note that the emergency state is not system-wide. While one CPU
is in an emergency state, another CPU may attempt to print
console messages at normal priority.

Also note that printk() already attempts to flush consoles in
the caller context for normal priority. However, follow-up
changes will introduce printing kthreads, in which case the
normal priority printk() calls will offload to the kthreads.

Co-developed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Thomas Gleixner (Intel) <tglx@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20240820063001.36405-32-john.ogness@linutronix.de
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/console.h  |  4 ++-
 kernel/printk/internal.h |  1 +-
 kernel/printk/nbcon.c    | 75 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/include/linux/console.h b/include/linux/console.h
index 3706f94..9a13f91 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -553,10 +553,14 @@ static inline bool console_is_registered(const struct console *con)
 	hlist_for_each_entry(con, &console_list, node)
 
 #ifdef CONFIG_PRINTK
+extern void nbcon_cpu_emergency_enter(void);
+extern void nbcon_cpu_emergency_exit(void);
 extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt);
 extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt);
 extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt);
 #else
+static inline void nbcon_cpu_emergency_enter(void) { }
+static inline void nbcon_cpu_emergency_exit(void) { }
 static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; }
 static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; }
 static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; }
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index ba2e0f1..8e36d86 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -182,6 +182,7 @@ static inline void printk_get_console_flush_type(struct console_flush_type *ft)
 
 	switch (nbcon_get_default_prio()) {
 	case NBCON_PRIO_NORMAL:
+	case NBCON_PRIO_EMERGENCY:
 		if (have_nbcon_console && !have_boot_console)
 			ft->nbcon_atomic = true;
 
diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index 18488d6..92ac5c5 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -972,6 +972,36 @@ update_con:
 	return nbcon_context_exit_unsafe(ctxt);
 }
 
+/* Track the nbcon emergency nesting per CPU. */
+static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting);
+static unsigned int early_nbcon_pcpu_emergency_nesting __initdata;
+
+/**
+ * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer
+ *
+ * Context:	For reading, any context. For writing, any context which could
+ *		not be migrated to another CPU.
+ * Return:	Either a pointer to the per CPU emergency nesting counter of
+ *		the current CPU or to the init data during early boot.
+ *
+ * The function is safe for reading per-CPU variables in any context because
+ * preemption is disabled if the current CPU is in the emergency state. See
+ * also nbcon_cpu_emergency_enter().
+ */
+static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void)
+{
+	/*
+	 * The value of __printk_percpu_data_ready gets set in normal
+	 * context and before SMP initialization. As a result it could
+	 * never change while inside an nbcon emergency section.
+	 */
+	if (!printk_percpu_data_ready())
+		return &early_nbcon_pcpu_emergency_nesting;
+
+	/* Open code this_cpu_ptr() without checking migration. */
+	return per_cpu_ptr(&nbcon_pcpu_emergency_nesting, raw_smp_processor_id());
+}
+
 /**
  * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon
  *				printing on the current CPU
@@ -981,13 +1011,20 @@ update_con:
  *		context for printing.
  *
  * The function is safe for reading per-CPU data in any context because
- * preemption is disabled if the current CPU is in the panic state.
+ * preemption is disabled if the current CPU is in the emergency or panic
+ * state.
  */
 enum nbcon_prio nbcon_get_default_prio(void)
 {
+	unsigned int *cpu_emergency_nesting;
+
 	if (this_cpu_in_panic())
 		return NBCON_PRIO_PANIC;
 
+	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
+	if (*cpu_emergency_nesting)
+		return NBCON_PRIO_EMERGENCY;
+
 	return NBCON_PRIO_NORMAL;
 }
 
@@ -1247,6 +1284,42 @@ void nbcon_atomic_flush_unsafe(void)
 }
 
 /**
+ * nbcon_cpu_emergency_enter - Enter an emergency section where printk()
+ *				messages for that CPU are flushed directly
+ *
+ * Context:	Any context. Disables preemption.
+ *
+ * When within an emergency section, printk() calls will attempt to flush any
+ * pending messages in the ringbuffer.
+ */
+void nbcon_cpu_emergency_enter(void)
+{
+	unsigned int *cpu_emergency_nesting;
+
+	preempt_disable();
+
+	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
+	(*cpu_emergency_nesting)++;
+}
+
+/**
+ * nbcon_cpu_emergency_exit - Exit an emergency section
+ *
+ * Context:	Within an emergency section. Enables preemption.
+ */
+void nbcon_cpu_emergency_exit(void)
+{
+	unsigned int *cpu_emergency_nesting;
+
+	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
+
+	if (!WARN_ON_ONCE(*cpu_emergency_nesting == 0))
+		(*cpu_emergency_nesting)--;
+
+	preempt_enable();
+}
+
+/**
  * nbcon_alloc - Allocate and init the nbcon console specific data
  * @con:	Console to initialize
  *