[PATCH RFC 1/2] printk: Introduce console_flush_one_record

Andrew Murray posted 2 patches 2 weeks, 3 days ago
There is a newer version of this series
[PATCH RFC 1/2] printk: Introduce console_flush_one_record
Posted by Andrew Murray 2 weeks, 3 days ago
console_flush_all prints all remaining records to all usable consoles
whilst its caller holds console_lock. This can result in large waiting
times for those waiting for console_lock especially where there is a
large volume of records or where the console is slow (e.g. serial).

Let's extract the parts of this function which print a single record
into a new function named console_flush_one_record. This can later
be used for functions that will release and reacquire console_lock
between records.

This commit should not change existing functionality.

Signed-off-by: Andrew Murray <amurray@thegoodpenguin.co.uk>
---
 kernel/printk/printk.c | 162 +++++++++++++++++++++++++++++++------------------
 1 file changed, 102 insertions(+), 60 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 0efbcdda9aaba9d8d877df5e4f1db002d3a596bc..75a3c47e9c0e645a3198c5f56e47df2a8d1871e6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3161,6 +3161,101 @@ static inline void printk_kthreads_check_locked(void) { }
 
 #endif /* CONFIG_PRINTK */
 
+
+/*
+ * Print out one record for each console.
+ *
+ * @do_cond_resched is set by the caller. It can be true only in schedulable
+ * context.
+ *
+ * @next_seq is set to the sequence number after the last available record.
+ * The value is valid only when this function returns true.
+ *
+ * @handover will be set to true if a printk waiter has taken over the
+ * console_lock, in which case the caller is no longer holding the
+ * console_lock. Otherwise it is set to false.
+ *
+ * @any_usable will be set to true if there are any usable consoles.
+ *
+ * Returns true when there was at least one usable console and a record was
+ * flushed. A returned false indicates there were no records to flush for any
+ * of the consoles. It may also indicate that there were no usable consoles,
+ * the context has been lost or there is a panic suitation. Regardless the
+ * reason, the caller should assume it is not useful to immediately try again.
+ *
+ * Requires the console_lock.
+ */
+static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *handover,
+				     bool *any_usable)
+{
+	struct console_flush_type ft;
+	struct console *con;
+	bool any_progress;
+	int cookie;
+
+	*any_usable = false;
+	any_progress = false;
+
+	printk_get_console_flush_type(&ft);
+
+	cookie = console_srcu_read_lock();
+	for_each_console_srcu(con) {
+		short flags = console_srcu_read_flags(con);
+		u64 printk_seq;
+		bool progress;
+
+		/*
+		 * console_flush_one_record() is only responsible for
+		 * nbcon consoles when the nbcon consoles cannot print via
+		 * their atomic or threaded flushing.
+		 */
+		if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload))
+			continue;
+
+		if (!console_is_usable(con, flags, !do_cond_resched))
+			continue;
+		*any_usable = true;
+
+		if (flags & CON_NBCON) {
+			progress = nbcon_legacy_emit_next_record(con, handover, cookie,
+								 !do_cond_resched);
+			printk_seq = nbcon_seq_read(con);
+		} else {
+			progress = console_emit_next_record(con, handover, cookie);
+			printk_seq = con->seq;
+		}
+
+		/*
+		 * If a handover has occurred, the SRCU read lock
+		 * is already released.
+		 */
+		if (*handover)
+			return false;
+
+		/* Track the next of the highest seq flushed. */
+		if (printk_seq > *next_seq)
+			*next_seq = printk_seq;
+
+		if (!progress)
+			continue;
+		any_progress = true;
+
+		/* Allow panic_cpu to take over the consoles safely. */
+		if (other_cpu_in_panic())
+			goto abandon;
+
+		if (do_cond_resched)
+			cond_resched();
+	}
+	console_srcu_read_unlock(cookie);
+
+	return any_progress;
+
+abandon:
+	console_srcu_read_unlock(cookie);
+	return false;
+}
+
 /*
  * Print out all remaining records to all consoles.
  *
@@ -3186,77 +3281,24 @@ static inline void printk_kthreads_check_locked(void) { }
  */
 static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
 {
-	struct console_flush_type ft;
-	bool any_usable = false;
-	struct console *con;
+	bool any_usable;
 	bool any_progress;
-	int cookie;
 
 	*next_seq = 0;
 	*handover = false;
 
 	do {
-		any_progress = false;
-
-		printk_get_console_flush_type(&ft);
-
-		cookie = console_srcu_read_lock();
-		for_each_console_srcu(con) {
-			short flags = console_srcu_read_flags(con);
-			u64 printk_seq;
-			bool progress;
+		any_progress = console_flush_one_record(do_cond_resched, next_seq, handover,
+							&any_usable);
 
-			/*
-			 * console_flush_all() is only responsible for nbcon
-			 * consoles when the nbcon consoles cannot print via
-			 * their atomic or threaded flushing.
-			 */
-			if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload))
-				continue;
-
-			if (!console_is_usable(con, flags, !do_cond_resched))
-				continue;
-			any_usable = true;
-
-			if (flags & CON_NBCON) {
-				progress = nbcon_legacy_emit_next_record(con, handover, cookie,
-									 !do_cond_resched);
-				printk_seq = nbcon_seq_read(con);
-			} else {
-				progress = console_emit_next_record(con, handover, cookie);
-				printk_seq = con->seq;
-			}
-
-			/*
-			 * If a handover has occurred, the SRCU read lock
-			 * is already released.
-			 */
-			if (*handover)
-				return false;
-
-			/* Track the next of the highest seq flushed. */
-			if (printk_seq > *next_seq)
-				*next_seq = printk_seq;
-
-			if (!progress)
-				continue;
-			any_progress = true;
-
-			/* Allow panic_cpu to take over the consoles safely. */
-			if (other_cpu_in_panic())
-				goto abandon;
+		if (*handover)
+			return false;
 
-			if (do_cond_resched)
-				cond_resched();
-		}
-		console_srcu_read_unlock(cookie);
+		if (other_cpu_in_panic())
+			return false;
 	} while (any_progress);
 
 	return any_usable;
-
-abandon:
-	console_srcu_read_unlock(cookie);
-	return false;
 }
 
 static void __console_flush_and_unlock(void)

-- 
2.34.1
Re: [PATCH RFC 1/2] printk: Introduce console_flush_one_record
Posted by Petr Mladek 1 week, 6 days ago
On Mon 2025-09-15 13:43:05, Andrew Murray wrote:
> console_flush_all prints all remaining records to all usable consoles
> whilst its caller holds console_lock. This can result in large waiting
> times for those waiting for console_lock especially where there is a
> large volume of records or where the console is slow (e.g. serial).
> 
> Let's extract the parts of this function which print a single record
> into a new function named console_flush_one_record. This can later
> be used for functions that will release and reacquire console_lock
> between records.
> 
> This commit should not change existing functionality.
> 
> Signed-off-by: Andrew Murray <amurray@thegoodpenguin.co.uk>

The code shufling looks correct to me. Feel free to use:

Reviewed-by: Petr Mladek <pmladek@suse.com>

See a nit below.

> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -3186,77 +3281,24 @@ static inline void printk_kthreads_check_locked(void) { }
>   */
>  static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
>  {
> -	struct console_flush_type ft;
> -	bool any_usable = false;
> -	struct console *con;
> +	bool any_usable;
>  	bool any_progress;
> -	int cookie;
>  
>  	*next_seq = 0;
>  	*handover = false;
>  
>  	do {
> -		any_progress = false;
> -
> -		printk_get_console_flush_type(&ft);
> -
> -		cookie = console_srcu_read_lock();
> -		for_each_console_srcu(con) {
> -			short flags = console_srcu_read_flags(con);
> -			u64 printk_seq;
> -			bool progress;
> +		any_progress = console_flush_one_record(do_cond_resched, next_seq, handover,
> +							&any_usable);
>  
> -			/*
> -			 * console_flush_all() is only responsible for nbcon
> -			 * consoles when the nbcon consoles cannot print via
> -			 * their atomic or threaded flushing.
> -			 */
> -			if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload))
> -				continue;
> -
> -			if (!console_is_usable(con, flags, !do_cond_resched))
> -				continue;
> -			any_usable = true;
> -
> -			if (flags & CON_NBCON) {
> -				progress = nbcon_legacy_emit_next_record(con, handover, cookie,
> -									 !do_cond_resched);
> -				printk_seq = nbcon_seq_read(con);
> -			} else {
> -				progress = console_emit_next_record(con, handover, cookie);
> -				printk_seq = con->seq;
> -			}
> -
> -			/*
> -			 * If a handover has occurred, the SRCU read lock
> -			 * is already released.
> -			 */
> -			if (*handover)
> -				return false;
> -
> -			/* Track the next of the highest seq flushed. */
> -			if (printk_seq > *next_seq)
> -				*next_seq = printk_seq;
> -
> -			if (!progress)
> -				continue;
> -			any_progress = true;
> -
> -			/* Allow panic_cpu to take over the consoles safely. */
> -			if (other_cpu_in_panic())
> -				goto abandon;
> +		if (*handover)
> +			return false;
>  
> -			if (do_cond_resched)
> -				cond_resched();
> -		}
> -		console_srcu_read_unlock(cookie);
> +		if (other_cpu_in_panic())
> +			return false;
>  	} while (any_progress);
>  
>  	return any_usable;

The semantic of the function was always complicated. It results
into duplicated checks in the callers.

I think that we could do slightly better in this particular case.
But I would do the refactoring in a separate patch to make this
one easier for review.

I played with the code and came up with:

From 7a3c930a12d7c0157f404a4a834d1f92f3070799 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Thu, 18 Sep 2025 16:57:58 +0200
Subject: [RFC] printk: console_flush_one_record() code cleanup

console_flush_one_record() and console_flush_all() duplicate several
checks. They both want to tell the caller that consoles are not
longer usable in this context because it has lost the lock or
the lock has to be reserved for the panic CPU.

Pass this information by clearing the @any_usable parameter value
which has the same effect.

Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 kernel/printk/printk.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 53daab5cdee5..2cceedcc3d34 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3175,7 +3175,8 @@ static inline void printk_kthreads_check_locked(void) { }
  * console_lock, in which case the caller is no longer holding the
  * console_lock. Otherwise it is set to false.
  *
- * @any_usable will be set to true if there are any usable consoles.
+ * @any_usable will be set to true if there are any usable consoles
+ * in this context.
  *
  * Returns true when there was at least one usable console and a record was
  * flushed. A returned false indicates there were no records to flush for any
@@ -3230,7 +3231,7 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *
 		 * is already released.
 		 */
 		if (*handover)
-			return false;
+			goto unusable;
 
 		/* Track the next of the highest seq flushed. */
 		if (printk_seq > *next_seq)
@@ -3242,7 +3243,7 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *
 
 		/* Allow panic_cpu to take over the consoles safely. */
 		if (other_cpu_in_panic())
-			goto abandon;
+			goto unusable_srcu;
 
 		if (do_cond_resched)
 			cond_resched();
@@ -3251,8 +3252,10 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *
 
 	return any_progress;
 
-abandon:
+unusable_srcu:
 	console_srcu_read_unlock(cookie);
+unusable:
+	*any_usable = false;
 	return false;
 }
 
@@ -3288,14 +3291,9 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
 	*handover = false;
 
 	do {
-		any_progress = console_flush_one_record(do_cond_resched, next_seq, handover,
-							&any_usable);
+		any_progress = console_flush_one_record(do_cond_resched, next_seq,
+							handover, &any_usable);
 
-		if (*handover)
-			return false;
-
-		if (other_cpu_in_panic())
-			return false;
 	} while (any_progress);
 
 	return any_usable;
-- 
2.51.0

Best Regards,
Petr
Re: [PATCH RFC 1/2] printk: Introduce console_flush_one_record
Posted by Andrew Murray 1 week, 6 days ago
On Thu, 18 Sept 2025 at 16:22, Petr Mladek <pmladek@suse.com> wrote:
>
> On Mon 2025-09-15 13:43:05, Andrew Murray wrote:
> > console_flush_all prints all remaining records to all usable consoles
> > whilst its caller holds console_lock. This can result in large waiting
> > times for those waiting for console_lock especially where there is a
> > large volume of records or where the console is slow (e.g. serial).
> >
> > Let's extract the parts of this function which print a single record
> > into a new function named console_flush_one_record. This can later
> > be used for functions that will release and reacquire console_lock
> > between records.
> >
> > This commit should not change existing functionality.
> >
> > Signed-off-by: Andrew Murray <amurray@thegoodpenguin.co.uk>
>
> The code shufling looks correct to me. Feel free to use:
>
> Reviewed-by: Petr Mladek <pmladek@suse.com>

Thanks.


>
> See a nit below.
>
> > --- a/kernel/printk/printk.c
> > +++ b/kernel/printk/printk.c
> > @@ -3186,77 +3281,24 @@ static inline void printk_kthreads_check_locked(void) { }
> >   */
> >  static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
> >  {
> > -     struct console_flush_type ft;
> > -     bool any_usable = false;
> > -     struct console *con;
> > +     bool any_usable;
> >       bool any_progress;
> > -     int cookie;
> >
> >       *next_seq = 0;
> >       *handover = false;
> >
> >       do {
> > -             any_progress = false;
> > -
> > -             printk_get_console_flush_type(&ft);
> > -
> > -             cookie = console_srcu_read_lock();
> > -             for_each_console_srcu(con) {
> > -                     short flags = console_srcu_read_flags(con);
> > -                     u64 printk_seq;
> > -                     bool progress;
> > +             any_progress = console_flush_one_record(do_cond_resched, next_seq, handover,
> > +                                                     &any_usable);

Ah, there was an error in my patch. Previously console_flush_all would
set any_usable to false at the start, and then set it to true if any
usable consoles were seen whilst printing any of the records. My
changes resulted in any_usable being set to false on each entry of
console_flush_one_record - thus effectively only indicating if any
consoles were usable for *only* the last record.

Thus to preserve functionality, I'd need to additionally add these changes:

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 75a3c47e9c0e..060d4919de32 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3193,7 +3193,6 @@ static bool console_flush_one_record(bool
do_cond_resched, u64 *next_seq, bool *
        bool any_progress;
        int cookie;

-       *any_usable = false;
        any_progress = false;

        printk_get_console_flush_type(&ft);
@@ -3281,7 +3280,7 @@ static bool console_flush_one_record(bool
do_cond_resched, u64 *next_seq, bool *
  */
 static bool console_flush_all(bool do_cond_resched, u64 *next_seq,
bool *handover)
 {
-       bool any_usable;
+       bool any_usable = false;
        bool any_progress;

        *next_seq = 0;

Are you happy for me to keep your reviewed-by with these changes?


> >
> > -                     /*
> > -                      * console_flush_all() is only responsible for nbcon
> > -                      * consoles when the nbcon consoles cannot print via
> > -                      * their atomic or threaded flushing.
> > -                      */
> > -                     if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload))
> > -                             continue;
> > -
> > -                     if (!console_is_usable(con, flags, !do_cond_resched))
> > -                             continue;
> > -                     any_usable = true;
> > -
> > -                     if (flags & CON_NBCON) {
> > -                             progress = nbcon_legacy_emit_next_record(con, handover, cookie,
> > -                                                                      !do_cond_resched);
> > -                             printk_seq = nbcon_seq_read(con);
> > -                     } else {
> > -                             progress = console_emit_next_record(con, handover, cookie);
> > -                             printk_seq = con->seq;
> > -                     }
> > -
> > -                     /*
> > -                      * If a handover has occurred, the SRCU read lock
> > -                      * is already released.
> > -                      */
> > -                     if (*handover)
> > -                             return false;
> > -
> > -                     /* Track the next of the highest seq flushed. */
> > -                     if (printk_seq > *next_seq)
> > -                             *next_seq = printk_seq;
> > -
> > -                     if (!progress)
> > -                             continue;
> > -                     any_progress = true;
> > -
> > -                     /* Allow panic_cpu to take over the consoles safely. */
> > -                     if (other_cpu_in_panic())
> > -                             goto abandon;
> > +             if (*handover)
> > +                     return false;
> >
> > -                     if (do_cond_resched)
> > -                             cond_resched();
> > -             }
> > -             console_srcu_read_unlock(cookie);
> > +             if (other_cpu_in_panic())
> > +                     return false;
> >       } while (any_progress);
> >
> >       return any_usable;
>
> The semantic of the function was always complicated. It results
> into duplicated checks in the callers.

Yes, I've found this confusing.

any_usable is used by callers for different purposes. For
__console_flush_and_unlock, its used to determine if its possible to
try and do more work (i.e. to check if they should bother attempting
to print messages which may have been added between flush and unlock)
- i.e. useable consoles exist but also no panics or handovers have
happened. For get_init_console_seq it is used to determine if init_seq
was updated.

The comment for console_flush_all says:

 * Returns true when there was at least one usable console and all messages
 * were flushed to all usable consoles.

But I don't think that's true. It returns true when there was at least
one usable console and at least one message was attempted to be
written (I guess nbcon_legacy_emit_next_record or
console_emit_next_record could bail out before writing its message).
Unless I'm missing some assumptions about what can change in
iterations of the do/while loop.


>
> I think that we could do slightly better in this particular case.
> But I would do the refactoring in a separate patch to make this
> one easier for review.
>
> I played with the code and came up with:
>
> From 7a3c930a12d7c0157f404a4a834d1f92f3070799 Mon Sep 17 00:00:00 2001
> From: Petr Mladek <pmladek@suse.com>
> Date: Thu, 18 Sep 2025 16:57:58 +0200
> Subject: [RFC] printk: console_flush_one_record() code cleanup
>
> console_flush_one_record() and console_flush_all() duplicate several
> checks. They both want to tell the caller that consoles are not
> longer usable in this context because it has lost the lock or
> the lock has to be reserved for the panic CPU.
>
> Pass this information by clearing the @any_usable parameter value
> which has the same effect.
>
> Signed-off-by: Petr Mladek <pmladek@suse.com>
> ---
>  kernel/printk/printk.c | 20 +++++++++-----------
>  1 file changed, 9 insertions(+), 11 deletions(-)
>
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 53daab5cdee5..2cceedcc3d34 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -3175,7 +3175,8 @@ static inline void printk_kthreads_check_locked(void) { }
>   * console_lock, in which case the caller is no longer holding the
>   * console_lock. Otherwise it is set to false.
>   *
> - * @any_usable will be set to true if there are any usable consoles.
> + * @any_usable will be set to true if there are any usable consoles
> + * in this context.
>   *
>   * Returns true when there was at least one usable console and a record was
>   * flushed. A returned false indicates there were no records to flush for any
> @@ -3230,7 +3231,7 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *
>                  * is already released.
>                  */
>                 if (*handover)
> -                       return false;
> +                       goto unusable;
>
>                 /* Track the next of the highest seq flushed. */
>                 if (printk_seq > *next_seq)
> @@ -3242,7 +3243,7 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *
>
>                 /* Allow panic_cpu to take over the consoles safely. */
>                 if (other_cpu_in_panic())
> -                       goto abandon;
> +                       goto unusable_srcu;
>
>                 if (do_cond_resched)
>                         cond_resched();
> @@ -3251,8 +3252,10 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *
>
>         return any_progress;
>
> -abandon:
> +unusable_srcu:
>         console_srcu_read_unlock(cookie);
> +unusable:
> +       *any_usable = false;

I'm struggling to understand this. I think your intention is to start
with any_usable being true (which would have to be set in
console_flush_all) and then set it to false where necessary (i.e.
invert the current logic). unusable_srcu and unusable are two cases
where false should be returned - however false should also be returned
where there are no consoles (for_each_console_srcu gives nothing) - or
where *all* of the consoles it provides are either NBCON(atomic or
offload) or where the !console_is_usable. I.e. console_flush_all may
erroneously return true, which may cause the loop in
__console_flush_and_unlock to iterate too many times.


>         return false;
>  }
>
> @@ -3288,14 +3291,9 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
>         *handover = false;
>
>         do {
> -               any_progress = console_flush_one_record(do_cond_resched, next_seq, handover,
> -                                                       &any_usable);
> +               any_progress = console_flush_one_record(do_cond_resched, next_seq,
> +                                                       handover, &any_usable);
>
> -               if (*handover)
> -                       return false;
> -
> -               if (other_cpu_in_panic())
> -                       return false;

Ah, that makes a lot of sense, any_progress will be false in these cases anyway.


>         } while (any_progress);
>
>         return any_usable;
> --
> 2.51.0
>
> Best Regards,
> Petr

Thanks,

Andrew Murray
Re: [PATCH RFC 1/2] printk: Introduce console_flush_one_record
Posted by Petr Mladek 1 week, 2 days ago
On Fri 2025-09-19 14:06:24, Andrew Murray wrote:
> On Thu, 18 Sept 2025 at 16:22, Petr Mladek <pmladek@suse.com> wrote:
> >
> > On Mon 2025-09-15 13:43:05, Andrew Murray wrote:
> > > console_flush_all prints all remaining records to all usable consoles
> > > whilst its caller holds console_lock. This can result in large waiting
> > > times for those waiting for console_lock especially where there is a
> > > large volume of records or where the console is slow (e.g. serial).
> > >
> > > Let's extract the parts of this function which print a single record
> > > into a new function named console_flush_one_record. This can later
> > > be used for functions that will release and reacquire console_lock
> > > between records.
> > >
> > > This commit should not change existing functionality.
> > >
> > > --- a/kernel/printk/printk.c
> > > +++ b/kernel/printk/printk.c
> > > @@ -3186,77 +3281,24 @@ static inline void printk_kthreads_check_locked(void) { }
> > >   */
> > >  static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
> > >  {
> > > -     struct console_flush_type ft;
> > > -     bool any_usable = false;
> > > -     struct console *con;
> > > +     bool any_usable;
> > >       bool any_progress;
> > > -     int cookie;
> > >
> > >       *next_seq = 0;
> > >       *handover = false;
> > >
> > >       do {
> > > -             any_progress = false;
> > > -
> > > -             printk_get_console_flush_type(&ft);
> > > -
> > > -             cookie = console_srcu_read_lock();
> > > -             for_each_console_srcu(con) {
> > > -                     short flags = console_srcu_read_flags(con);
> > > -                     u64 printk_seq;
> > > -                     bool progress;
> > > +             any_progress = console_flush_one_record(do_cond_resched, next_seq, handover,
> > > +                                                     &any_usable);
> 
> Ah, there was an error in my patch. Previously console_flush_all would
> set any_usable to false at the start, and then set it to true if any
> usable consoles were seen whilst printing any of the records. My
> changes resulted in any_usable being set to false on each entry of
> console_flush_one_record - thus effectively only indicating if any
> consoles were usable for *only* the last record.

I noticed the change but I considered it cosmetic. In fact, I think
that it is better to reset the value in each cycle and return the value
reflecting the current situation.

If I get it correctly, it should not have any bad consequences.
In the worst case, we would return false when the last usable
console was disabled. It actually looks like the right value
for all callers.

> Thus to preserve functionality, I'd need to additionally add these changes:
>
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 75a3c47e9c0e..060d4919de32 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -3193,7 +3193,6 @@ static bool console_flush_one_record(bool
> do_cond_resched, u64 *next_seq, bool *
>         bool any_progress;
>         int cookie;
> 
> -       *any_usable = false;
>         any_progress = false;
> 
>         printk_get_console_flush_type(&ft);
> @@ -3281,7 +3280,7 @@ static bool console_flush_one_record(bool
> do_cond_resched, u64 *next_seq, bool *
>   */
>  static bool console_flush_all(bool do_cond_resched, u64 *next_seq,
> bool *handover)
>  {
> -       bool any_usable;
> +       bool any_usable = false;
>         bool any_progress;
> 
>         *next_seq = 0;
> 
> Are you happy for me to keep your reviewed-by with these changes?

Feel free to do this in v2 and keep my reviewed-by.
It is always better to reshufle a code without changing
the behavior.

> > > -                     /*
> > > -                      * console_flush_all() is only responsible for nbcon
> > > -                      * consoles when the nbcon consoles cannot print via
> > > -                      * their atomic or threaded flushing.
> > > -                      */
> > > -                     if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload))
> > > -                             continue;
> > > -
> > > -                     if (!console_is_usable(con, flags, !do_cond_resched))
> > > -                             continue;
> > > -                     any_usable = true;
> > > -
> > > -                     if (flags & CON_NBCON) {
> > > -                             progress = nbcon_legacy_emit_next_record(con, handover, cookie,
> > > -                                                                      !do_cond_resched);
> > > -                             printk_seq = nbcon_seq_read(con);
> > > -                     } else {
> > > -                             progress = console_emit_next_record(con, handover, cookie);
> > > -                             printk_seq = con->seq;
> > > -                     }
> > > -
> > > -                     /*
> > > -                      * If a handover has occurred, the SRCU read lock
> > > -                      * is already released.
> > > -                      */
> > > -                     if (*handover)
> > > -                             return false;
> > > -
> > > -                     /* Track the next of the highest seq flushed. */
> > > -                     if (printk_seq > *next_seq)
> > > -                             *next_seq = printk_seq;
> > > -
> > > -                     if (!progress)
> > > -                             continue;
> > > -                     any_progress = true;
> > > -
> > > -                     /* Allow panic_cpu to take over the consoles safely. */
> > > -                     if (other_cpu_in_panic())
> > > -                             goto abandon;
> > > +             if (*handover)
> > > +                     return false;
> > >
> > > -                     if (do_cond_resched)
> > > -                             cond_resched();
> > > -             }
> > > -             console_srcu_read_unlock(cookie);
> > > +             if (other_cpu_in_panic())
> > > +                     return false;
> > >       } while (any_progress);
> > >
> > >       return any_usable;
> >
> > The semantic of the function was always complicated. It results
> > into duplicated checks in the callers.
> 
> Yes, I've found this confusing.
> 
> any_usable is used by callers for different purposes. For
> __console_flush_and_unlock, its used to determine if its possible to
> try and do more work (i.e. to check if they should bother attempting
> to print messages which may have been added between flush and
> unlock)

This implements a historic approach when the current console_lock
owner is responsible for flushing all pending messages.

It makes sure that all messages are flushed to the console.
And parallel printk() does not need to wait for console_lock.

The "try and do more work" handles the following race:

CPU0				CPU1

printk()

  console_trylock()
  console_unlock() 
    __console_flush_and_unlock()
      console_flush_all()

				printk()
				console_trylock()
				  # fails because console_sem
				  # is still owned by CPU0


      __console_unlock()

Problem: Nobody owns console_lock but the message added by
	 CPU1 has not been flushed yet.

Solution: The last owner must check whether there is a new
	  message after releasing console_lock. And it must
	  try to flush it.

	  The try_lock might fail when another CPU gets
	  the console_lock in the meantime. But it is fine.
	  The new owner will flush the message then.


> - i.e. useable consoles exist but also no panics or handovers have
> happened.

These are situations when this context is not longer responsible
for flushing the rest because:

   + there are no usable consoles.

   + non-panic CPUs are going to be stopped. And the messages
     will get flushed by panic CPU, either by printk() called
     on panic CPU or by explicit flush in panic().

   + the console_lock ownership has been passed to another
     CPU via console_trylock_spinning(). The new owner will
     flush the pending messages. It helps to reduce the risk
     of softlockup reports with this historic approach.


> For get_init_console_seq it is used to determine if init_seq
> was updated.

I would rather say that it is used to determine whether init_seq
has a reliable value. Where reliable means that it flushed and
checked all usable consoles.

Note that this value is use to prevent duplicate output when
a proper console driver replaces an early console driver.

We are not able to detect which early/proper drivers are using
the same physical device => it is better to flush all consoles
and use the last next_seq which should be the same on all consoles.


> The comment for console_flush_all says:
> 
>  * Returns true when there was at least one usable console and all messages
>  * were flushed to all usable consoles.
>
> But I don't think that's true. It returns true when there was at least
> one usable console and at least one message was attempted to be
> written (I guess nbcon_legacy_emit_next_record or
> console_emit_next_record could bail out before writing its message).
> Unless I'm missing some assumptions about what can change in
> iterations of the do/while loop.

The "bail out" should be detected. This is why console_flush_all()
explicitly returns "false" in the following situations:

  + handover happened
  + other_cpu_in_panic() returns true
  + both any_progress and any_usable are false

Hmm, I agree that the last case is not 100% correct:

  1. The original code did not reset any_usable in each cycle.
     It might be true even when all usable consoles have been
     disabled in the meantime.

     Well, it is a corner case and should not cause problems.

     In the worst case, console_unlock() will do one more
     cycle (try_lock + console_flush_all) and the new
     console_flush_all() would return false because
     there won't be any usable console

     This problem should not happen with get_init_console_seq()
     because it is called under console_list_lock. Consoles
     could not be disabled in parallel.

     This will actually be fixed by resetting "any_usable" in
     each cycle.


   2. "any_progress" is set to true when at least one console
      made progress.

      It is not a big deal:

	+ console_flush_all() would explicitly return false when
	  there was a handover or other_cpu_in_panic()
	  returned true.

	+ console_emit_next_record() always finishes the job
	  when there is a pending message.

	+ nbcon_legacy_emit_next_record() might lose the owner
	  ship. But the new owner will become responsible for
	  flushing everything. And console_flush_all() would
	  try one more cycle...

      It seems to be working correctly in all situations after all.


> > I think that we could do slightly better in this particular case.
> > But I would do the refactoring in a separate patch to make this
> > one easier for review.
> >
> > I played with the code and came up with:
> >
> > From 7a3c930a12d7c0157f404a4a834d1f92f3070799 Mon Sep 17 00:00:00 2001
> > From: Petr Mladek <pmladek@suse.com>
> > Date: Thu, 18 Sep 2025 16:57:58 +0200
> > Subject: [RFC] printk: console_flush_one_record() code cleanup
> >
> > console_flush_one_record() and console_flush_all() duplicate several
> > checks. They both want to tell the caller that consoles are not
> > longer usable in this context because it has lost the lock or
> > the lock has to be reserved for the panic CPU.
> >
> > Pass this information by clearing the @any_usable parameter value
> > which has the same effect.
> >
> > Signed-off-by: Petr Mladek <pmladek@suse.com>
> > ---
> >  kernel/printk/printk.c | 20 +++++++++-----------
> >  1 file changed, 9 insertions(+), 11 deletions(-)
> >
> > diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> > index 53daab5cdee5..2cceedcc3d34 100644
> > --- a/kernel/printk/printk.c
> > +++ b/kernel/printk/printk.c
> > @@ -3175,7 +3175,8 @@ static inline void printk_kthreads_check_locked(void) { }
> >   * console_lock, in which case the caller is no longer holding the
> >   * console_lock. Otherwise it is set to false.
> >   *
> > - * @any_usable will be set to true if there are any usable consoles.
> > + * @any_usable will be set to true if there are any usable consoles
> > + * in this context.
> >   *
> >   * Returns true when there was at least one usable console and a record was
> >   * flushed. A returned false indicates there were no records to flush for any
> > @@ -3230,7 +3231,7 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *
> >                  * is already released.
> >                  */
> >                 if (*handover)
> > -                       return false;
> > +                       goto unusable;
> >
> >                 /* Track the next of the highest seq flushed. */
> >                 if (printk_seq > *next_seq)
> > @@ -3242,7 +3243,7 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *
> >
> >                 /* Allow panic_cpu to take over the consoles safely. */
> >                 if (other_cpu_in_panic())
> > -                       goto abandon;
> > +                       goto unusable_srcu;
> >
> >                 if (do_cond_resched)
> >                         cond_resched();
> > @@ -3251,8 +3252,10 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *
> >
> >         return any_progress;
> >
> > -abandon:
> > +unusable_srcu:
> >         console_srcu_read_unlock(cookie);
> > +unusable:
> > +       *any_usable = false;
> 
> I'm struggling to understand this. I think your intention is to start
> with any_usable being true (which would have to be set in
> console_flush_all)

No, my intention is to start with @any_usable set to "false".

It would be set to "true" only when console_is_usable() returned
true for at least one console.

And it would be set back to "false" when the function was not
able to finish the job (bailed out) because:

    + handover
    + other_cpu_in_panic()

By other words, the variable @any_usable would have the same
semantic as console_flush_all() return value.

Maybe, we could rename the variable to "flushed". It would
be set to "true" when the function was able to flush on
all usable consoles and when it was not interrupted.

I hope that it makes more sense now.
 
> 
> >         return false;
> >  }
> >
> > @@ -3288,14 +3291,9 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
> >         *handover = false;
> >
> >         do {
> > -               any_progress = console_flush_one_record(do_cond_resched, next_seq, handover,
> > -                                                       &any_usable);
> > +               any_progress = console_flush_one_record(do_cond_resched, next_seq,
> > +                                                       handover, &any_usable);
> >
> > -               if (*handover)
> > -                       return false;
> > -
> > -               if (other_cpu_in_panic())
> > -                       return false;
> 
> Ah, that makes a lot of sense, any_progress will be false in these cases anyway.
> 
> 
> >         } while (any_progress);
> >
> >         return any_usable;

Best Regards,
Petr
Re: [PATCH RFC 1/2] printk: Introduce console_flush_one_record
Posted by Andrew Murray 5 days, 4 hours ago
On Tue, 23 Sept 2025 at 14:30, Petr Mladek <pmladek@suse.com> wrote:
>
> On Fri 2025-09-19 14:06:24, Andrew Murray wrote:
> > On Thu, 18 Sept 2025 at 16:22, Petr Mladek <pmladek@suse.com> wrote:


> > Thus to preserve functionality, I'd need to additionally add these changes:
> >
> > diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> > index 75a3c47e9c0e..060d4919de32 100644
> > --- a/kernel/printk/printk.c
> > +++ b/kernel/printk/printk.c
> > @@ -3193,7 +3193,6 @@ static bool console_flush_one_record(bool
> > do_cond_resched, u64 *next_seq, bool *
> >         bool any_progress;
> >         int cookie;
> >
> > -       *any_usable = false;
> >         any_progress = false;
> >
> >         printk_get_console_flush_type(&ft);
> > @@ -3281,7 +3280,7 @@ static bool console_flush_one_record(bool
> > do_cond_resched, u64 *next_seq, bool *
> >   */
> >  static bool console_flush_all(bool do_cond_resched, u64 *next_seq,
> > bool *handover)
> >  {
> > -       bool any_usable;
> > +       bool any_usable = false;
> >         bool any_progress;
> >
> >         *next_seq = 0;
> >
> > Are you happy for me to keep your reviewed-by with these changes?
>
> Feel free to do this in v2 and keep my reviewed-by.
> It is always better to reshufle a code without changing
> the behavior.

No problem, I've squashed the above into a v2 of this first patch.

I've also included the reverse of this change into your cleanup patch,
thus ensuring we set any_usable to false at the start of
console_flush_one_record, as suggested.


>
> > > > -                     /*
> > > > -                      * console_flush_all() is only responsible for nbcon
> > > > -                      * consoles when the nbcon consoles cannot print via
> > > > -                      * their atomic or threaded flushing.
> > > > -                      */
> > > > -                     if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload))
> > > > -                             continue;
> > > > -
> > > > -                     if (!console_is_usable(con, flags, !do_cond_resched))
> > > > -                             continue;
> > > > -                     any_usable = true;
> > > > -
> > > > -                     if (flags & CON_NBCON) {
> > > > -                             progress = nbcon_legacy_emit_next_record(con, handover, cookie,
> > > > -                                                                      !do_cond_resched);
> > > > -                             printk_seq = nbcon_seq_read(con);
> > > > -                     } else {
> > > > -                             progress = console_emit_next_record(con, handover, cookie);
> > > > -                             printk_seq = con->seq;
> > > > -                     }
> > > > -
> > > > -                     /*
> > > > -                      * If a handover has occurred, the SRCU read lock
> > > > -                      * is already released.
> > > > -                      */
> > > > -                     if (*handover)
> > > > -                             return false;
> > > > -
> > > > -                     /* Track the next of the highest seq flushed. */
> > > > -                     if (printk_seq > *next_seq)
> > > > -                             *next_seq = printk_seq;
> > > > -
> > > > -                     if (!progress)
> > > > -                             continue;
> > > > -                     any_progress = true;
> > > > -
> > > > -                     /* Allow panic_cpu to take over the consoles safely. */
> > > > -                     if (other_cpu_in_panic())
> > > > -                             goto abandon;
> > > > +             if (*handover)
> > > > +                     return false;
> > > >
> > > > -                     if (do_cond_resched)
> > > > -                             cond_resched();
> > > > -             }
> > > > -             console_srcu_read_unlock(cookie);
> > > > +             if (other_cpu_in_panic())
> > > > +                     return false;
> > > >       } while (any_progress);
> > > >
> > > >       return any_usable;
> > >
> > > The semantic of the function was always complicated. It results
> > > into duplicated checks in the callers.
> >
> > Yes, I've found this confusing.
> >
> > any_usable is used by callers for different purposes. For
> > __console_flush_and_unlock, its used to determine if its possible to
> > try and do more work (i.e. to check if they should bother attempting
> > to print messages which may have been added between flush and
> > unlock)
>
> This implements a historic approach when the current console_lock
> owner is responsible for flushing all pending messages.
>
> It makes sure that all messages are flushed to the console.
> And parallel printk() does not need to wait for console_lock.
>
> The "try and do more work" handles the following race:
>
> CPU0                            CPU1
>
> printk()
>
>   console_trylock()
>   console_unlock()
>     __console_flush_and_unlock()
>       console_flush_all()
>
>                                 printk()
>                                 console_trylock()
>                                   # fails because console_sem
>                                   # is still owned by CPU0
>
>
>       __console_unlock()
>
> Problem: Nobody owns console_lock but the message added by
>          CPU1 has not been flushed yet.
>
> Solution: The last owner must check whether there is a new
>           message after releasing console_lock. And it must
>           try to flush it.
>
>           The try_lock might fail when another CPU gets
>           the console_lock in the meantime. But it is fine.
>           The new owner will flush the message then.
>
>
> > - i.e. useable consoles exist but also no panics or handovers have
> > happened.
>
> These are situations when this context is not longer responsible
> for flushing the rest because:
>
>    + there are no usable consoles.
>
>    + non-panic CPUs are going to be stopped. And the messages
>      will get flushed by panic CPU, either by printk() called
>      on panic CPU or by explicit flush in panic().
>
>    + the console_lock ownership has been passed to another
>      CPU via console_trylock_spinning(). The new owner will
>      flush the pending messages. It helps to reduce the risk
>      of softlockup reports with this historic approach.
>
>
> > For get_init_console_seq it is used to determine if init_seq
> > was updated.
>
> I would rather say that it is used to determine whether init_seq
> has a reliable value. Where reliable means that it flushed and
> checked all usable consoles.
>
> Note that this value is use to prevent duplicate output when
> a proper console driver replaces an early console driver.
>
> We are not able to detect which early/proper drivers are using
> the same physical device => it is better to flush all consoles
> and use the last next_seq which should be the same on all consoles.
>
>
> > The comment for console_flush_all says:
> >
> >  * Returns true when there was at least one usable console and all messages
> >  * were flushed to all usable consoles.
> >
> > But I don't think that's true. It returns true when there was at least
> > one usable console and at least one message was attempted to be
> > written (I guess nbcon_legacy_emit_next_record or
> > console_emit_next_record could bail out before writing its message).
> > Unless I'm missing some assumptions about what can change in
> > iterations of the do/while loop.
>
> The "bail out" should be detected. This is why console_flush_all()
> explicitly returns "false" in the following situations:
>
>   + handover happened
>   + other_cpu_in_panic() returns true
>   + both any_progress and any_usable are false
>
> Hmm, I agree that the last case is not 100% correct:
>
>   1. The original code did not reset any_usable in each cycle.
>      It might be true even when all usable consoles have been
>      disabled in the meantime.
>
>      Well, it is a corner case and should not cause problems.
>
>      In the worst case, console_unlock() will do one more
>      cycle (try_lock + console_flush_all) and the new
>      console_flush_all() would return false because
>      there won't be any usable console
>
>      This problem should not happen with get_init_console_seq()
>      because it is called under console_list_lock. Consoles
>      could not be disabled in parallel.
>
>      This will actually be fixed by resetting "any_usable" in
>      each cycle.
>
>
>    2. "any_progress" is set to true when at least one console
>       made progress.
>
>       It is not a big deal:
>
>         + console_flush_all() would explicitly return false when
>           there was a handover or other_cpu_in_panic()
>           returned true.
>
>         + console_emit_next_record() always finishes the job
>           when there is a pending message.
>
>         + nbcon_legacy_emit_next_record() might lose the owner
>           ship. But the new owner will become responsible for
>           flushing everything. And console_flush_all() would
>           try one more cycle...
>
>       It seems to be working correctly in all situations after all.

Thanks for the explanations, much appreciated.


>
>
> > > I think that we could do slightly better in this particular case.
> > > But I would do the refactoring in a separate patch to make this
> > > one easier for review.
> > >
> > > I played with the code and came up with:
> > >
> > > From 7a3c930a12d7c0157f404a4a834d1f92f3070799 Mon Sep 17 00:00:00 2001
> > > From: Petr Mladek <pmladek@suse.com>
> > > Date: Thu, 18 Sep 2025 16:57:58 +0200
> > > Subject: [RFC] printk: console_flush_one_record() code cleanup
> > >
> > > console_flush_one_record() and console_flush_all() duplicate several
> > > checks. They both want to tell the caller that consoles are not
> > > longer usable in this context because it has lost the lock or
> > > the lock has to be reserved for the panic CPU.
> > >
> > > Pass this information by clearing the @any_usable parameter value
> > > which has the same effect.
> > >
> > > Signed-off-by: Petr Mladek <pmladek@suse.com>
> > > ---
> > >  kernel/printk/printk.c | 20 +++++++++-----------
> > >  1 file changed, 9 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> > > index 53daab5cdee5..2cceedcc3d34 100644
> > > --- a/kernel/printk/printk.c
> > > +++ b/kernel/printk/printk.c
> > > @@ -3175,7 +3175,8 @@ static inline void printk_kthreads_check_locked(void) { }
> > >   * console_lock, in which case the caller is no longer holding the
> > >   * console_lock. Otherwise it is set to false.
> > >   *
> > > - * @any_usable will be set to true if there are any usable consoles.
> > > + * @any_usable will be set to true if there are any usable consoles
> > > + * in this context.
> > >   *
> > >   * Returns true when there was at least one usable console and a record was
> > >   * flushed. A returned false indicates there were no records to flush for any
> > > @@ -3230,7 +3231,7 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *
> > >                  * is already released.
> > >                  */
> > >                 if (*handover)
> > > -                       return false;
> > > +                       goto unusable;
> > >
> > >                 /* Track the next of the highest seq flushed. */
> > >                 if (printk_seq > *next_seq)
> > > @@ -3242,7 +3243,7 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *
> > >
> > >                 /* Allow panic_cpu to take over the consoles safely. */
> > >                 if (other_cpu_in_panic())
> > > -                       goto abandon;
> > > +                       goto unusable_srcu;
> > >
> > >                 if (do_cond_resched)
> > >                         cond_resched();
> > > @@ -3251,8 +3252,10 @@ static bool console_flush_one_record(bool do_cond_resched, u64 *next_seq, bool *
> > >
> > >         return any_progress;
> > >
> > > -abandon:
> > > +unusable_srcu:
> > >         console_srcu_read_unlock(cookie);
> > > +unusable:
> > > +       *any_usable = false;
> >
> > I'm struggling to understand this. I think your intention is to start
> > with any_usable being true (which would have to be set in
> > console_flush_all)
>
> No, my intention is to start with @any_usable set to "false".
>
> It would be set to "true" only when console_is_usable() returned
> true for at least one console.
>
> And it would be set back to "false" when the function was not
> able to finish the job (bailed out) because:
>
>     + handover
>     + other_cpu_in_panic()
>
> By other words, the variable @any_usable would have the same
> semantic as console_flush_all() return value.

Thanks for explaining, I'm happy with this patch now, I've rebased it
onto my series. As suggested, I've updated it to ensure any_usable is
false at the start of console_flush_one_record.


>
> Maybe, we could rename the variable to "flushed". It would
> be set to "true" when the function was able to flush on
> all usable consoles and when it was not interrupted.

Thanks,

Andrew Murray