From: Thomas Gleixner <tglx@linutronix.de>
Provide the main implementation for running a printer kthread
per nbcon console that is takeover/handover aware.
Co-developed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Thomas Gleixner (Intel) <tglx@linutronix.de>
---
include/linux/console.h | 26 ++++++
kernel/printk/internal.h | 26 ++++++
kernel/printk/nbcon.c | 196 +++++++++++++++++++++++++++++++++++++--
kernel/printk/printk.c | 34 +++++++
4 files changed, 275 insertions(+), 7 deletions(-)
diff --git a/include/linux/console.h b/include/linux/console.h
index 4aaf053840ee..4de42ec1527c 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -17,6 +17,7 @@
#include <linux/atomic.h>
#include <linux/bits.h>
#include <linux/rculist.h>
+#include <linux/rcuwait.h>
#include <linux/types.h>
#include <linux/vesa.h>
@@ -324,6 +325,8 @@ struct nbcon_write_context {
* @nbcon_seq: Sequence number of the next record for nbcon to print
* @nbcon_device_ctxt: Context available for non-printing operations
* @pbufs: Pointer to nbcon private buffer
+ * @kthread: Printer kthread for this console
+ * @rcuwait: RCU-safe wait object for @kthread waking
*/
struct console {
char name[16];
@@ -373,6 +376,27 @@ struct console {
*/
void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt);
+ /**
+ * @write_thread:
+ *
+ * NBCON callback to write out text in task context.
+ *
+ * This callback is called after device_lock() and with the nbcon
+ * console acquired. Any necessary driver synchronization should have
+ * been performed by the device_lock() callback.
+ *
+ * This callback is always called from task context but with migration
+ * disabled.
+ *
+ * The same criteria for console ownership verification and unsafe
+ * sections applies as with write_atomic(). The difference between
+ * this callback and write_atomic() is that this callback is used
+ * during normal operation and is always called from task context.
+ * This allows drivers to operate in their own locking context for
+ * synchronizing output to the hardware.
+ */
+ void (*write_thread)(struct console *con, struct nbcon_write_context *wctxt);
+
/**
* @device_lock:
*
@@ -420,6 +444,8 @@ struct console {
atomic_long_t __private nbcon_seq;
struct nbcon_context __private nbcon_device_ctxt;
struct printk_buffers *pbufs;
+ struct task_struct *kthread;
+ struct rcuwait rcuwait;
};
#ifdef CONFIG_LOCKDEP
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 0439cf2fdc22..38680c6b2b39 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -92,6 +92,7 @@ enum nbcon_prio nbcon_get_default_prio(void);
void nbcon_atomic_flush_pending(void);
bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
int cookie);
+void nbcon_kthread_create(struct console *con);
/*
* Check if the given console is currently capable and allowed to print
@@ -110,6 +111,8 @@ static inline bool console_is_usable(struct console *con, short flags)
if (flags & CON_NBCON) {
if (!con->write_atomic)
return false;
+ if (!con->write_thread)
+ return false;
} else {
if (!con->write)
return false;
@@ -126,12 +129,35 @@ static inline bool console_is_usable(struct console *con, short flags)
return true;
}
+/**
+ * nbcon_kthread_wake - Wake up a printk thread
+ * @con: Console to operate on
+ */
+static inline void nbcon_kthread_wake(struct console *con)
+{
+ /*
+ * Guarantee any new records can be seen by tasks preparing to wait
+ * before this context checks if the rcuwait is empty.
+ *
+ * The full memory barrier in rcuwait_wake_up() pairs with the full
+ * memory barrier within set_current_state() of
+ * ___rcuwait_wait_event(), which is called after prepare_to_rcuwait()
+ * adds the waiter but before it has checked the wait condition.
+ *
+ * This pairs with nbcon_kthread_func:A.
+ */
+ rcuwait_wake_up(&con->rcuwait); /* LMM(nbcon_kthread_wake:A) */
+}
+
#else
#define PRINTK_PREFIX_MAX 0
#define PRINTK_MESSAGE_MAX 0
#define PRINTKRB_RECORD_MAX 0
+static inline void nbcon_kthread_wake(struct console *con) { }
+static inline void nbcon_kthread_create(struct console *con) { }
+
/*
* In !PRINTK builds we still export console_sem
* semaphore and some of console functions (console_unlock()/etc.), so
diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index 6e9e24aa0a7f..89b340ca303c 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -10,6 +10,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/irqflags.h>
+#include <linux/kthread.h>
#include <linux/minmax.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
@@ -837,6 +838,7 @@ EXPORT_SYMBOL_GPL(nbcon_exit_unsafe);
/**
* nbcon_emit_next_record - Emit a record in the acquired context
* @wctxt: The write context that will be handed to the write function
+ * @use_atomic: True if the write_atomic() callback is to be used
*
* Return: True if this context still owns the console. False if
* ownership was handed over or taken.
@@ -850,7 +852,7 @@ EXPORT_SYMBOL_GPL(nbcon_exit_unsafe);
* When true is returned, @wctxt->ctxt.backlog indicates whether there are
* still records pending in the ringbuffer,
*/
-static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt)
+static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic)
{
struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
struct console *con = ctxt->console;
@@ -899,8 +901,14 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt)
nbcon_state_read(con, &cur);
wctxt->unsafe_takeover = cur.unsafe_takeover;
- if (con->write_atomic) {
+ if (use_atomic &&
+ con->write_atomic) {
con->write_atomic(con, wctxt);
+
+ } else if (!use_atomic &&
+ con->write_thread) {
+ con->write_thread(con, wctxt);
+
} else {
/*
* This function should never be called for legacy consoles.
@@ -936,6 +944,120 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt)
return nbcon_context_exit_unsafe(ctxt);
}
+/**
+ * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup
+ * @con: Console to operate on
+ * @ctxt: The nbcon context from nbcon_context_try_acquire()
+ *
+ * Return: True if the thread should shutdown or if the console is
+ * allowed to print and a record is available. False otherwise.
+ *
+ * After the thread wakes up, it must first check if it should shutdown before
+ * attempting any printing.
+ */
+static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt)
+{
+ bool ret = false;
+ short flags;
+ int cookie;
+
+ if (kthread_should_stop())
+ return true;
+
+ cookie = console_srcu_read_lock();
+
+ flags = console_srcu_read_flags(con);
+ if (console_is_usable(con, flags)) {
+ /* Bring the sequence in @ctxt up to date */
+ ctxt->seq = nbcon_seq_read(con);
+
+ ret = prb_read_valid(prb, ctxt->seq, NULL);
+ }
+
+ console_srcu_read_unlock(cookie);
+ return ret;
+}
+
+/**
+ * nbcon_kthread_func - The printer thread function
+ * @__console: Console to operate on
+ *
+ * Return: 0
+ */
+static int nbcon_kthread_func(void *__console)
+{
+ struct console *con = __console;
+ struct nbcon_write_context wctxt = {
+ .ctxt.console = con,
+ .ctxt.prio = NBCON_PRIO_NORMAL,
+ };
+ struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
+ short con_flags;
+ bool backlog;
+ int cookie;
+ int ret;
+
+wait_for_event:
+ /*
+ * Guarantee this task is visible on the rcuwait before
+ * checking the wake condition.
+ *
+ * The full memory barrier within set_current_state() of
+ * ___rcuwait_wait_event() pairs with the full memory
+ * barrier within rcuwait_has_sleeper().
+ *
+ * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A.
+ */
+ ret = rcuwait_wait_event(&con->rcuwait,
+ nbcon_kthread_should_wakeup(con, ctxt),
+ TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */
+
+ if (kthread_should_stop())
+ return 0;
+
+ /* Wait was interrupted by a spurious signal, go back to sleep. */
+ if (ret)
+ goto wait_for_event;
+
+ do {
+ backlog = false;
+
+ cookie = console_srcu_read_lock();
+
+ con_flags = console_srcu_read_flags(con);
+
+ if (console_is_usable(con, con_flags)) {
+ unsigned long lock_flags;
+
+ con->device_lock(con, &lock_flags);
+
+ /*
+ * Ensure this stays on the CPU to make handover and
+ * takeover possible.
+ */
+ cant_migrate();
+
+ if (nbcon_context_try_acquire(ctxt)) {
+ /*
+ * If the emit fails, this context is no
+ * longer the owner.
+ */
+ if (nbcon_emit_next_record(&wctxt, false)) {
+ nbcon_context_release(ctxt);
+ backlog = ctxt->backlog;
+ }
+ }
+
+ con->device_unlock(con, lock_flags);
+ }
+
+ console_srcu_read_unlock(cookie);
+
+ } while (backlog);
+
+ goto wait_for_event;
+}
+
/* Track the nbcon emergency nesting per CPU. */
static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting);
static unsigned int early_nbcon_pcpu_emergency_nesting __initdata;
@@ -1012,7 +1134,7 @@ static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt)
* The higher priority printing context takes over responsibility
* to print the pending records.
*/
- if (!nbcon_emit_next_record(wctxt))
+ if (!nbcon_emit_next_record(wctxt, true))
return false;
nbcon_context_release(ctxt);
@@ -1113,7 +1235,7 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
* handed over or taken over. In both cases the context is no
* longer valid.
*/
- if (!nbcon_emit_next_record(&wctxt))
+ if (!nbcon_emit_next_record(&wctxt, true))
return -EAGAIN;
if (!ctxt->backlog) {
@@ -1172,10 +1294,10 @@ static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
/*
* If flushing was successful but more records are available, this
- * context must flush those remaining records because there is no
- * other context that will do it.
+ * context must flush those remaining records if the printer thread
+ * is not available do it.
*/
- if (prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
+ if (!con->kthread && prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
stop_seq = prb_next_reserve_seq(prb);
goto again;
}
@@ -1332,6 +1454,63 @@ void nbcon_cpu_emergency_flush(void)
}
}
+/*
+ * nbcon_kthread_stop - Stop a printer thread
+ * @con: Console to operate on
+ */
+static void nbcon_kthread_stop(struct console *con)
+{
+ lockdep_assert_console_list_lock_held();
+
+ if (!con->kthread)
+ return;
+
+ kthread_stop(con->kthread);
+ con->kthread = NULL;
+}
+
+/**
+ * nbcon_kthread_create - Create a printer thread
+ * @con: Console to operate on
+ *
+ * If it fails, let the console proceed. The atomic part might
+ * be usable and useful.
+ */
+void nbcon_kthread_create(struct console *con)
+{
+ struct task_struct *kt;
+
+ lockdep_assert_console_list_lock_held();
+
+ if (!(con->flags & CON_NBCON) || !con->write_thread)
+ return;
+
+ if (con->kthread)
+ return;
+
+ /*
+ * Printer threads cannot be started as long as any boot console is
+ * registered because there is no way to synchronize the hardware
+ * registers between boot console code and regular console code.
+ */
+ if (have_boot_console)
+ return;
+
+ kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index);
+ if (IS_ERR(kt)) {
+ con_printk(KERN_ERR, con, "failed to start printing thread\n");
+ return;
+ }
+
+ con->kthread = kt;
+
+ /*
+ * It is important that console printing threads are scheduled
+ * shortly after a printk call and with generous runtime budgets.
+ */
+ sched_set_normal(con->kthread, -20);
+}
+
/**
* nbcon_alloc - Allocate buffers needed by the nbcon console
* @con: Console to allocate buffers for
@@ -1377,6 +1556,7 @@ void nbcon_init(struct console *con, u64 init_seq)
/* nbcon_alloc() must have been called and successful! */
BUG_ON(!con->pbufs);
+ rcuwait_init(&con->rcuwait);
nbcon_seq_force(con, init_seq);
nbcon_state_set(con, &state);
}
@@ -1389,6 +1569,7 @@ void nbcon_free(struct console *con)
{
struct nbcon_state state = { };
+ nbcon_kthread_stop(con);
nbcon_state_set(con, &state);
/* Boot consoles share global printk buffers. */
@@ -1458,6 +1639,7 @@ void nbcon_device_release(struct console *con)
*/
cookie = console_srcu_read_lock();
if (console_is_usable(con, console_srcu_read_flags(con)) &&
+ !con->kthread &&
prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
__nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false);
}
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 5dcc05e1aa56..137bd9a721c4 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2685,6 +2685,8 @@ void suspend_console(void)
void resume_console(void)
{
struct console *con;
+ short flags;
+ int cookie;
if (!console_suspend_enabled)
return;
@@ -2701,6 +2703,14 @@ void resume_console(void)
*/
synchronize_srcu(&console_srcu);
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(con) {
+ flags = console_srcu_read_flags(con);
+ if (flags & CON_NBCON)
+ nbcon_kthread_wake(con);
+ }
+ console_srcu_read_unlock(cookie);
+
pr_flush(1000, true);
}
@@ -3021,6 +3031,13 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
u64 printk_seq;
bool progress;
+ /*
+ * console_flush_all() is only for legacy consoles,
+ * unless the nbcon console has no kthread printer.
+ */
+ if ((flags & CON_NBCON) && con->kthread)
+ continue;
+
if (!console_is_usable(con, flags))
continue;
any_usable = true;
@@ -3326,9 +3343,26 @@ EXPORT_SYMBOL(console_stop);
void console_start(struct console *console)
{
+ short flags;
+ int cookie;
+
console_list_lock();
console_srcu_write_flags(console, console->flags | CON_ENABLED);
console_list_unlock();
+
+ /*
+ * Ensure that all SRCU list walks have completed. The related
+ * printing context must be able to see it is enabled so that
+ * it is guaranteed to wake up and resume printing.
+ */
+ synchronize_srcu(&console_srcu);
+
+ cookie = console_srcu_read_lock();
+ flags = console_srcu_read_flags(console);
+ if (flags & CON_NBCON)
+ nbcon_kthread_wake(console);
+ console_srcu_read_unlock(cookie);
+
__pr_flush(console, 1000, true);
}
EXPORT_SYMBOL(console_start);
--
2.39.2
On Tue 2024-06-04 01:30:39, John Ogness wrote:
> From: Thomas Gleixner <tglx@linutronix.de>
>
> Provide the main implementation for running a printer kthread
> per nbcon console that is takeover/handover aware.
The changes done by this patch deserve a more detailed commit message ;-)
I updated this commit message in parallel with commenting the related
code changes. My later comment might explain the motivation for
writing the commit message this way.
<proposal>
Provide the main implementation for running a printer kthread per
nbcon console that is takeover/handover aware.
Once the kthread is running and available, it gets responsible for
flushing any pending messages which were added in NBCON_PRIO_NORMAL
context. Namely the legacy console_flush_all() and device_release()
do not longer flush the console. And nbcon_atomic_flush_pending()
used by nbcon_cpu_emergency_exit() does not longer flush messages
added after the emergency ones.
The kthread uses a new write_thread() callback with both device_lock()
and acquired the console context.
The console ownership handling is necessary for synchronization against
write_atomic() which is synchronized only via the console context
ownership.
The device_lock() serializes acquiring the console context with
NBCON_PRIO_NORMAL. It is needed when the device_lock() does not
disable preemption. It prevents the following race:
CPU0 CPU1
[ task A ]
nbcon_context_try_acquire()
# success with NORMAL prio
# .unsafe == false; // safe for takeover
[ schedule: task A -> B ]
WARN_ON()
nbcon_atomic_flush_pending()
nbcon_context_try_acquire()
# success with EMERGENCY prio
# flushing
nbcon_context_release()
# HERE: con->nbcon_state is free
# to take by anyone !!!
nbcon_context_try_acquire()
# success with NORMAL prio [ task B ]
[ schedule: task B -> A ]
nbcon_enter_unsafe()
nbcon_context_can_proceed()
BUG: nbcon_context_can_proceed() returns "true" because
the console is owned by a context on CPU0 with
NBCON_PRIO_NORMAL.
But it should return "false". The console is owned
by a context from task B and we do the check
in a context from task A.
The write context in the kthread will stay safe only when either of
the following conditions are true:
1. The kthread is the only context which acquires the console
with NBCON_PRIO_NORMAL.
2. Other caller acquires the console context with NBCON_PRIO_NORMAL
under the device_lock.
3. Other caller acquires the console context with NBCON_PRIO_NORMAL
with disabled preemption. It will release the context before
rescheduling.
It is even double guaranteed. First, __nbcon_atomic_flush_pending_con()
is called:
+ with disabled interrupts from nbcon_atomic_flush_pending_con()
+ under device_lock() from nbcon_device_release()
Second, they are not longer called with NBCON_PRIO_NORMAL when
con->kthread exists.
Also note that the kthreads are running with nice -20 so that they are
scheduled shortly after a printk call and with generous runtime
budgets.
This patch provides just the basic functionality. The followup changes
will add the necessary integration, for example, creating the kthreads
at the right time or waking them when a new message appear.
</proposal>
BTW: It really looks like the safety is double guaranteed. Maybe
the con->device_lock() is not needed in nbcon_kthread_func()
after all. Well, I would keep it to be on the safe side.
> --- a/include/linux/console.h
> +++ b/include/linux/console.h
> @@ -373,6 +376,27 @@ struct console {
> */
> void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt);
>
> + /**
> + * @write_thread:
> + *
> + * NBCON callback to write out text in task context.
> + *
> + * This callback is called after device_lock() and with the nbcon
> + * console acquired. Any necessary driver synchronization should have
> + * been performed by the device_lock() callback.
> + *
> + * This callback is always called from task context but with migration
> + * disabled.
> + *
> + * The same criteria for console ownership verification and unsafe
> + * sections applies as with write_atomic(). The difference between
> + * this callback and write_atomic() is that this callback is used
> + * during normal operation and is always called from task context.
> + * This allows drivers to operate in their own locking context for
> + * synchronizing output to the hardware.
> + */
The description is not bad. It seems to include all the important
information. Except than I still needed to scratch my head around it
to understand the real purpose and rules.
Namely, the following sentences are kind of vague and bring questions
which I had in the past before I understood all the relations:
1. "Any necessary driver synchronization should have been performed
by the device_lock() callback."
Q: Why do we need to take both device_lock() and the context then?
Q: Why the acquired context is not enough?
2. "This allows drivers to operate in their own locking context for
synchronizing output to the hardware."
Q: What exactly does this sentence mean?
Q: What is the driver?
Q: What are the output callbacks?
Q: How exactly is this related to write_atomic() and write_thread()?
Q: Is the context more strict or relaxed, in which way?
Also I still keep in my mind that nbcon_context_try_acquire() is not
safe with NBCON_NORMAL_PRIO with enabled preemption. There is
the race discussed in the previous patchset, see
https://lore.kernel.org/r/ZiD3FNBZh_iMOVWY@pathway.suse.cz
I wonder if we could be more strigthforward.
<my-take>
/**
* @write_thread:
*
* NBCON callback to write out text in task context.
*
* This callback must be called only in task context with both
* device_lock() and the nbcon console acquired.
*
* The same rules for console ownership verification and unsafe
* sections handling applies as with write_atomic().
*
* The console ownership handling is necessary for synchronization
* against write_atomic() which is synchronized only via the context.
*
* The device_lock() serializes acquiring the console context
* with NBCON_PRIO_NORMAL. It is necessary when the device_lock()
* does not disable preemption. The console context locking is
* not able to detect a race in the following scenario:
*
* 1. [Task A] owns a context with NBCON_PRIO_NORMAL on [CPU X]
* and is scheduled.
*
* 2. Another context takes over the lock with NBCON_PRIO_EMERGENCY
* and releases it.
*
* 3. [Task B] acquires a context with NBCON_PRIO_NORMAL on [CPU X]
* and is scheduled.
*
* 4. [Task A] gets running on [CPU X] and see that the console is
* is still owned by a task on [CPU X] with NBON_PRIO_NORMAL.
* It can't detect that it is actually owned by another task.
*/
</my-take>
My variant describes the purpose of device_lock() quite different way.
But I think that this is the real purpose and we must be clear about
it.
Sigh, I was not able to describe the race reasonably a shorter
way. Maybe, we should move this detailed explanation above,
nbcon_context_try_acquire() and use just a reference here.
> + void (*write_thread)(struct console *con, struct nbcon_write_context *wctxt);
> +
> /**
> * @device_lock:
> *
> --- a/kernel/printk/nbcon.c
> +++ b/kernel/printk/nbcon.c
> @@ -936,6 +944,120 @@ static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt)
> return nbcon_context_exit_unsafe(ctxt);
> }
>
> +/**
> + * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup
> + * @con: Console to operate on
> + * @ctxt: The nbcon context from nbcon_context_try_acquire()
> + *
> + * Return: True if the thread should shutdown or if the console is
> + * allowed to print and a record is available. False otherwise.
> + *
> + * After the thread wakes up, it must first check if it should shutdown before
> + * attempting any printing.
> + */
> +static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt)
> +{
> + bool ret = false;
> + short flags;
> + int cookie;
> +
> + if (kthread_should_stop())
> + return true;
> +
> + cookie = console_srcu_read_lock();
> +
> + flags = console_srcu_read_flags(con);
> + if (console_is_usable(con, flags)) {
> + /* Bring the sequence in @ctxt up to date */
> + ctxt->seq = nbcon_seq_read(con);
> +
> + ret = prb_read_valid(prb, ctxt->seq, NULL);
> + }
> +
> + console_srcu_read_unlock(cookie);
> + return ret;
> +}
> +
> +/**
> + * nbcon_kthread_func - The printer thread function
> + * @__console: Console to operate on
> + *
> + * Return: 0
> + */
> +static int nbcon_kthread_func(void *__console)
> +{
> + struct console *con = __console;
> + struct nbcon_write_context wctxt = {
> + .ctxt.console = con,
> + .ctxt.prio = NBCON_PRIO_NORMAL,
> + };
> + struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
> + short con_flags;
> + bool backlog;
> + int cookie;
> + int ret;
> +
> +wait_for_event:
> + /*
> + * Guarantee this task is visible on the rcuwait before
> + * checking the wake condition.
> + *
> + * The full memory barrier within set_current_state() of
> + * ___rcuwait_wait_event() pairs with the full memory
> + * barrier within rcuwait_has_sleeper().
> + *
> + * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A.
> + */
> + ret = rcuwait_wait_event(&con->rcuwait,
> + nbcon_kthread_should_wakeup(con, ctxt),
> + TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */
> +
> + if (kthread_should_stop())
> + return 0;
> +
> + /* Wait was interrupted by a spurious signal, go back to sleep. */
> + if (ret)
> + goto wait_for_event;
> +
> + do {
> + backlog = false;
> +
I am not sure how obvious is that we take the lock around the entire
operation. In principle, the console could not disappear. So it might
look like it is not really necessary. But it actually plays important
role when stopping/suspending the console. I would add a comment:
/*
* Keep the read lock around the entire operation so that
* synchronize_srcu() could prove that the kthread stopped
* or suspended printing.
*/
> + cookie = console_srcu_read_lock();
> +
> + con_flags = console_srcu_read_flags(con);
> +
> + if (console_is_usable(con, con_flags)) {
> + unsigned long lock_flags;
> +
> + con->device_lock(con, &lock_flags);
> +
> + /*
> + * Ensure this stays on the CPU to make handover and
> + * takeover possible.
> + */
> + cant_migrate();
> +
> + if (nbcon_context_try_acquire(ctxt)) {
> + /*
> + * If the emit fails, this context is no
> + * longer the owner.
> + */
> + if (nbcon_emit_next_record(&wctxt, false)) {
> + nbcon_context_release(ctxt);
> + backlog = ctxt->backlog;
> + }
> + }
> +
> + con->device_unlock(con, lock_flags);
> + }
> +
> + console_srcu_read_unlock(cookie);
> +
> + } while (backlog);
Thinking loudly:
We do not check kthread_should_stop() in the cycle. It means that it
would flush all messages before stopping the kthread. But only
when it is already in the cycle. It would not flush the messages
in the following scenario:
CPU0 CPU1
printk("Unregistering console\n");
nbcon_wake_threads();
irq_work_queue(&con->irq_work);
kthread_stop(con->thread);
nbcon_kthread_func()
rcuwait_wait_event()
nbcon_kthread_should_wakeup()
if (kthread_should_stop())
// true
return 0
Result: The kthread did not flush the pending messages in this case.
I do not have strong opinion. Well, it would make sense to flush
all messages before stopping the kthread,
Maybe, we should move kthread_should_stop() check here.
Maybe, we should move the flushing cycle into a separate function
and the kthread might look like:
do {
rcuwait_wait_event(&con->rcuwait,
nbcon_kthread_should_wakeup(con, ctxt),
TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */
nbcon_kthread_flush();
while (!kthread_should_stop);
Note that I did not add the check for the spurious signal. It looks
just like an optimization. IMHO, it is not worth the code churn.
Also the spurious signal does not mean that there are no
pending messages. And if we want to flush everything before exiting...
Anyway, we probably should document the (desired) behavior in the function
description.
> +
> + goto wait_for_event;
> +}
> +
> /* Track the nbcon emergency nesting per CPU. */
> static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting);
> static unsigned int early_nbcon_pcpu_emergency_nesting __initdata;
> @@ -1332,6 +1454,63 @@ void nbcon_cpu_emergency_flush(void)
> }
> }
>
> +/*
> + * nbcon_kthread_stop - Stop a printer thread
> + * @con: Console to operate on
> + */
> +static void nbcon_kthread_stop(struct console *con)
> +{
> + lockdep_assert_console_list_lock_held();
> +
> + if (!con->kthread)
> + return;
> +
> + kthread_stop(con->kthread);
> + con->kthread = NULL;
> +}
> +
> +/**
> + * nbcon_kthread_create - Create a printer thread
> + * @con: Console to operate on
> + *
> + * If it fails, let the console proceed. The atomic part might
> + * be usable and useful.
> + */
> +void nbcon_kthread_create(struct console *con)
> +{
> + struct task_struct *kt;
> +
> + lockdep_assert_console_list_lock_held();
> +
> + if (!(con->flags & CON_NBCON) || !con->write_thread)
> + return;
> +
> + if (con->kthread)
> + return;
> +
> + /*
> + * Printer threads cannot be started as long as any boot console is
> + * registered because there is no way to synchronize the hardware
> + * registers between boot console code and regular console code.
> + */
> + if (have_boot_console)
> + return;
> +
> + kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index);
> + if (IS_ERR(kt)) {
> + con_printk(KERN_ERR, con, "failed to start printing thread\n");
> + return;
I am a bit surprised that we ignore the error here.
Hmm, it likely works in this patch because the legacy code would still
flush the console when con->thread is not assigned.
But it would stop working later when the legacy loop is disabled
by the global @printk_threads_enabled variable.
> + }
> +
Thinking loudly:
The kthread is running and started processing messages at this moment.
But con->kthraed is not set yet.
All this is done under @console_list_lock. It only guarantees that
the struct console would not disappear while some code access it under
console_srcu_read_lock().
The value is used in console_flush_all() and in nbcon_device_release()
to decide whether to flush the console directly or not.
I first though that console_flush_all() and nbcon_device_release()
could flush the messages using write_atomic() in parallel with
the kthread and it might create the race with NBCON_PRIO_NORMAL
context ownership.
But both calls will ignore this console until it is added into
@console_list which is done later under con->device_lock().
Not to say that:
+ console_flush_all() acquires the nbcon context with interrupts
disabled => race prevented
+ nbcon_device_release() acquires the nbcon context under device
lock => serialized with the kthread
So we are on the safe side (double guaranteed). Well, it took me quite
some time to realize this. It might be worth a comment.
<proposal>
/*
* Some users check con->kthread to decide whether to flush
* the messages directly using con->write_atomic(). Note that
* possible races with the kthread are double prevented.
*
* First, the users ignore this console until it is added into
* @console_list which is done under con->device_lock().
* Second, the calls would be additionaly serialized by acquiring
* the console context.
*/
</proposal>
> + con->kthread = kt;
> +
> + /*
> + * It is important that console printing threads are scheduled
> + * shortly after a printk call and with generous runtime budgets.
> + */
> + sched_set_normal(con->kthread, -20);
I would prefer to move this into a separate patch. I feel that it
might be kind of controversial and subject to a change in the future.
We should not hide this "tiny" detail in this mega patch ;-)
That said, the low "nice" value makes perfect sense to me.
And I makes me feel comfortable.
> +}
> +
> /**
> * nbcon_alloc - Allocate buffers needed by the nbcon console
> * @con: Console to allocate buffers for
> @@ -1458,6 +1639,7 @@ void nbcon_device_release(struct console *con)
> */
> cookie = console_srcu_read_lock();
> if (console_is_usable(con, console_srcu_read_flags(con)) &&
> + !con->kthread &&
This would deserve updating the comment like in nbcon_atomic_flush_pending_con().
> prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
> __nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false);
> }
Best Regards,
Petr
On 2024-06-07, Petr Mladek <pmladek@suse.com> wrote:
> I updated this commit message in parallel with commenting the related
> code changes. My later comment might explain the motivation for
> writing the commit message this way.
I am OK with your proposed commit message. Thank you for taking the time
to fill in all the details.
> <proposal>
[...]
> The write context in the kthread will stay safe only when either of
> the following conditions are true:
>
> 1. The kthread is the only context which acquires the console
> with NBCON_PRIO_NORMAL.
>
> 2. Other caller acquires the console context with NBCON_PRIO_NORMAL
> under the device_lock.
>
> 3. Other caller acquires the console context with NBCON_PRIO_NORMAL
> with disabled preemption. It will release the context before
> rescheduling.
>
> It is even double guaranteed. First, __nbcon_atomic_flush_pending_con()
> is called:
>
> + with disabled interrupts from nbcon_atomic_flush_pending_con()
> + under device_lock() from nbcon_device_release()
[...]
> </proposal>
>
> BTW: It really looks like the safety is double guaranteed. Maybe
> the con->device_lock() is not needed in nbcon_kthread_func()
> after all. Well, I would keep it to be on the safe side.
For the threaded case it is technically correct to say the safety is
double guaranteed. But the outer safety (device_lock()) can provide a
preemptible lock, which is the key for the non-interference property of
the threaded printers.
For example, for an nbcon framebuffer console, device_lock() most likely
will be a mutex.
During normal operation, the inner safety (console context) will never
be contended. That really only exists to synchronize the atomic case.
>> --- a/include/linux/console.h
>> +++ b/include/linux/console.h
>> @@ -373,6 +376,27 @@ struct console {
>> */
>> void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt);
>>
>> + /**
>> + * @write_thread:
>> + *
>> + * NBCON callback to write out text in task context.
>> + *
>> + * This callback is called after device_lock() and with the nbcon
>> + * console acquired. Any necessary driver synchronization should have
>> + * been performed by the device_lock() callback.
>> + *
>> + * This callback is always called from task context but with migration
>> + * disabled.
>> + *
>> + * The same criteria for console ownership verification and unsafe
>> + * sections applies as with write_atomic(). The difference between
>> + * this callback and write_atomic() is that this callback is used
>> + * during normal operation and is always called from task context.
>> + * This allows drivers to operate in their own locking context for
>> + * synchronizing output to the hardware.
>> + */
>
> The description is not bad. It seems to include all the important
> information. Except than I still needed to scratch my head around it
> to understand the real purpose and rules.
>
> Namely, the following sentences are kind of vague and bring questions
> which I had in the past before I understood all the relations:
>
> 1. "Any necessary driver synchronization should have been performed
> by the device_lock() callback."
>
> Q: Why do we need to take both device_lock() and the context then?
The device_lock() enables the non-interference property during normal
operation. OTOH the context is taken to synchronize against non-normal
(emergency/panic) operation.
> Q: Why the acquired context is not enough?
Because the context _requires_ disabling preemption during all driver
activity, which is a major source of interference to the system.
If you only care about UART drivers and !PREEMPT_RT, then there really
is not much difference between spinning on a spinlock vs. spinning on a
context. But for non-UART drivers or PREEMPT_RT, there is a huge
difference between blocking on device_lock() with preemption enabled vs.
spinning on a context.
I think it really helps to see the importance if you assume
device_lock() is a mutex. (All the printk code must also make this
assumption.)
> 2. "This allows drivers to operate in their own locking context for
> synchronizing output to the hardware."
>
> Q: What exactly does this sentence mean?
For example, if a console driver uses a mutex to synchronize hardware
access, it can implement this callback knowing it is under that
mutex. This also applies for any locking mechanism the driver might use.
> Q: What is the driver?
The code that implements _all_ access to the hardware for _all_
purposes.
> Q: What are the output callbacks?
I am not referring to callbacks. I am referring to any code in the
driver that is accessing the parts of the hardware that are related to
outputting kernel messages.
> Q: How exactly is this related to write_atomic() and write_thread()?
Both nbcon console write callbacks must synchronize against other
activities of the driver. For write_thread(), it is primarily via
device_lock().
> Q: Is the context more strict or relaxed, in which way?
If device_lock() is using a locking mechanism that does not disable
preemption, write_thread() is quite a bit more relaxed than the disabled
preemption context of write_atomic().
> Also I still keep in my mind that nbcon_context_try_acquire() is not
> safe with NBCON_NORMAL_PRIO with enabled preemption. There is
> the race discussed in the previous patchset, see
> https://lore.kernel.org/r/ZiD3FNBZh_iMOVWY@pathway.suse.cz
>
> I wonder if we could be more strigthforward.
>
> <my-take>
> /**
> * @write_thread:
> *
> * NBCON callback to write out text in task context.
> *
> * This callback must be called only in task context with both
> * device_lock() and the nbcon console acquired.
> *
> * The same rules for console ownership verification and unsafe
> * sections handling applies as with write_atomic().
> *
> * The console ownership handling is necessary for synchronization
> * against write_atomic() which is synchronized only via the context.
> *
> * The device_lock() serializes acquiring the console context
> * with NBCON_PRIO_NORMAL. It is necessary when the device_lock()
> * does not disable preemption. The console context locking is
> * not able to detect a race in the following scenario:
> *
> * 1. [Task A] owns a context with NBCON_PRIO_NORMAL on [CPU X]
> * and is scheduled.
> *
> * 2. Another context takes over the lock with NBCON_PRIO_EMERGENCY
> * and releases it.
> *
> * 3. [Task B] acquires a context with NBCON_PRIO_NORMAL on [CPU X]
> * and is scheduled.
> *
> * 4. [Task A] gets running on [CPU X] and see that the console is
> * is still owned by a task on [CPU X] with NBON_PRIO_NORMAL.
> * It can't detect that it is actually owned by another task.
> */
> </my-take>
>
> My variant describes the purpose of device_lock() quite different way.
> But I think that this is the real purpose and we must be clear about
> it.
I would move that last part (starting with "The device_lock()
serializes...") into the kerneldoc for device_lock(). It really has
nothing to do with the write_thread() callback.
> Sigh, I was not able to describe the race reasonably a shorter
> way. Maybe, we should move this detailed explanation above,
> nbcon_context_try_acquire() and use just a reference here.
How about putting it as kerneldoc for nbcon_owner_matches() and refer to
it there from the device_lock() kerneldoc (or vice versa)?
For the write_thread() kerneldoc we could refer to the device_lock()
kerneldoc.
>> +/**
>> + * nbcon_kthread_func - The printer thread function
>> + * @__console: Console to operate on
>> + *
>> + * Return: 0
>> + */
>> +static int nbcon_kthread_func(void *__console)
>> +{
>> + struct console *con = __console;
>> + struct nbcon_write_context wctxt = {
>> + .ctxt.console = con,
>> + .ctxt.prio = NBCON_PRIO_NORMAL,
>> + };
>> + struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
>> + short con_flags;
>> + bool backlog;
>> + int cookie;
>> + int ret;
>> +
>> +wait_for_event:
>> + /*
>> + * Guarantee this task is visible on the rcuwait before
>> + * checking the wake condition.
>> + *
>> + * The full memory barrier within set_current_state() of
>> + * ___rcuwait_wait_event() pairs with the full memory
>> + * barrier within rcuwait_has_sleeper().
>> + *
>> + * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A.
>> + */
>> + ret = rcuwait_wait_event(&con->rcuwait,
>> + nbcon_kthread_should_wakeup(con, ctxt),
>> + TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */
>> +
>> + if (kthread_should_stop())
>> + return 0;
>> +
>> + /* Wait was interrupted by a spurious signal, go back to sleep. */
>> + if (ret)
>> + goto wait_for_event;
>> +
>> + do {
>> + backlog = false;
>> +
>
> I am not sure how obvious is that we take the lock around the entire
> operation. In principle, the console could not disappear. So it might
> look like it is not really necessary. But it actually plays important
> role when stopping/suspending the console. I would add a comment:
>
> /*
> * Keep the read lock around the entire operation so that
> * synchronize_srcu() could prove that the kthread stopped
> * or suspended printing.
> */
Agreed.
>> + cookie = console_srcu_read_lock();
>> +
>> + con_flags = console_srcu_read_flags(con);
>> +
>> + if (console_is_usable(con, con_flags)) {
>> + unsigned long lock_flags;
>> +
>> + con->device_lock(con, &lock_flags);
>> +
>> + /*
>> + * Ensure this stays on the CPU to make handover and
>> + * takeover possible.
>> + */
>> + cant_migrate();
>> +
>> + if (nbcon_context_try_acquire(ctxt)) {
>> + /*
>> + * If the emit fails, this context is no
>> + * longer the owner.
>> + */
>> + if (nbcon_emit_next_record(&wctxt, false)) {
>> + nbcon_context_release(ctxt);
>> + backlog = ctxt->backlog;
>> + }
>> + }
>> +
>> + con->device_unlock(con, lock_flags);
>> + }
>> +
>> + console_srcu_read_unlock(cookie);
>> +
>> + } while (backlog);
>
> Thinking loudly:
>
> We do not check kthread_should_stop() in the cycle. It means that it
> would flush all messages before stopping the kthread. But only
> when it is already in the cycle. It would not flush the messages
> in the following scenario:
>
> CPU0 CPU1
>
> printk("Unregistering console\n");
> nbcon_wake_threads();
> irq_work_queue(&con->irq_work);
>
> kthread_stop(con->thread);
>
> nbcon_kthread_func()
> rcuwait_wait_event()
> nbcon_kthread_should_wakeup()
>
> if (kthread_should_stop())
> // true
> return 0
>
> Result: The kthread did not flush the pending messages in this case.
Nice catch. How about if we add the following to
unregister_console_locked():
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index f2ac7aaab234..fab69ca7f938 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3995,6 +3995,8 @@ static int unregister_console_locked(struct console *console)
if (res > 0)
return 0;
+ __pr_flush(console, 1000, true);
+
/* Disable it unconditionally */
console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
>> + kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index);
>> + if (IS_ERR(kt)) {
>> + con_printk(KERN_ERR, con, "failed to start printing thread\n");
>> + return;
>
> I am a bit surprised that we ignore the error here.
>
>
> Hmm, it likely works in this patch because the legacy code would still
> flush the console when con->thread is not assigned.
>
> But it would stop working later when the legacy loop is disabled
> by the global @printk_threads_enabled variable.
The thread failing to start is a serious issue. Particularly for
PREEMPT_RT. Probably it should be something like:
if (WARN_ON(IS_ERR(kt))) {
In fact, I nbcon_atomic_flush_pending_con() and nbcon_device_release()
need to check @printk_threads_enabled _instead_ of con->kthread. Once
"threading mode" has been activated, there should be _no_ atomic
printing during normal operation.
> The kthread is running and started processing messages at this moment.
> But con->kthread is not set yet.
[...]
> It might be worth a comment.
>
> <proposal>
> /*
> * Some users check con->kthread to decide whether to flush
> * the messages directly using con->write_atomic(). Note that
> * possible races with the kthread are double prevented.
> *
> * First, the users ignore this console until it is added into
> * @console_list which is done under con->device_lock().
> * Second, the calls would be additionaly serialized by acquiring
> * the console context.
> */
> </proposal>
It is enough to mention only the calling context. How about:
/*
* Some users check con->kthread to decide whether to flush the
* messages directly using con->write_atomic(). A possible race
* with the kthread is prevented because all printing is
* serialized by acquiring the console context.
*/
>> + con->kthread = kt;
>> + /*
>> + * It is important that console printing threads are scheduled
>> + * shortly after a printk call and with generous runtime budgets.
>> + */
>> + sched_set_normal(con->kthread, -20);
>
> I would prefer to move this into a separate patch. I feel that it
> might be kind of controversial and subject to a change in the future.
> We should not hide this "tiny" detail in this mega patch ;-)
OK.
>> @@ -1458,6 +1639,7 @@ void nbcon_device_release(struct console *con)
>> */
>> cookie = console_srcu_read_lock();
>> if (console_is_usable(con, console_srcu_read_flags(con)) &&
>> + !con->kthread &&
>
> This would deserve updating the comment like in
> nbcon_atomic_flush_pending_con().
OK.
John
On Mon 2024-06-10 14:15:10, John Ogness wrote:
> On 2024-06-07, Petr Mladek <pmladek@suse.com> wrote:
> > I updated this commit message in parallel with commenting the related
> > code changes. My later comment might explain the motivation for
> > writing the commit message this way.
>
> I am OK with your proposed commit message. Thank you for taking the time
> to fill in all the details.
We should probably update it a bit so that it describes the role
of the device_lock() more precisely. See the discussion below.
> > <proposal>
>
> [...]
>
> > The write context in the kthread will stay safe only when either of
> > the following conditions are true:
> >
> > 1. The kthread is the only context which acquires the console
> > with NBCON_PRIO_NORMAL.
> >
> > 2. Other caller acquires the console context with NBCON_PRIO_NORMAL
> > under the device_lock.
> >
> > 3. Other caller acquires the console context with NBCON_PRIO_NORMAL
> > with disabled preemption. It will release the context before
> > rescheduling.
> >
> > It is even double guaranteed. First, __nbcon_atomic_flush_pending_con()
> > is called:
> >
> > + with disabled interrupts from nbcon_atomic_flush_pending_con()
> > + under device_lock() from nbcon_device_release()
>
> [...]
>
> > </proposal>
> >
> > BTW: It really looks like the safety is double guaranteed. Maybe
> > the con->device_lock() is not needed in nbcon_kthread_func()
> > after all. Well, I would keep it to be on the safe side.
>
> For the threaded case it is technically correct to say the safety is
> double guaranteed. But the outer safety (device_lock()) can provide a
> preemptible lock, which is the key for the non-interference property of
> the threaded printers.
>
> For example, for an nbcon framebuffer console, device_lock() most likely
> will be a mutex.
>
> During normal operation, the inner safety (console context) will never
> be contended. That really only exists to synchronize the atomic case.
I see. For example, the spin_lock_irqsave() in __uart_port_lock_irqsave()
will be sleeping lock in RT => preserving RT guarantees. And it will
make sure that the inner nbcon_context_try_acquire() would never
have to spin => never break the RT guarantees.
It makes perfect sense. Do I get it correctly?
> >> --- a/include/linux/console.h
> >> +++ b/include/linux/console.h
> >> @@ -373,6 +376,27 @@ struct console {
> >> */
> >> void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt);
> >>
> >> + /**
> >> + * @write_thread:
> >> + *
> >> + * NBCON callback to write out text in task context.
> >> + *
> >> + * This callback is called after device_lock() and with the nbcon
> >> + * console acquired. Any necessary driver synchronization should have
> >> + * been performed by the device_lock() callback.
> >> + *
> >> + * This callback is always called from task context but with migration
> >> + * disabled.
> >> + *
> >> + * The same criteria for console ownership verification and unsafe
> >> + * sections applies as with write_atomic(). The difference between
> >> + * this callback and write_atomic() is that this callback is used
> >> + * during normal operation and is always called from task context.
> >> + * This allows drivers to operate in their own locking context for
> >> + * synchronizing output to the hardware.
> >> + */
> >
> > The description is not bad. It seems to include all the important
> > information. Except than I still needed to scratch my head around it
> > to understand the real purpose and rules.
> >
> > Namely, the following sentences are kind of vague and bring questions
> > which I had in the past before I understood all the relations:
> >
> > 1. "Any necessary driver synchronization should have been performed
> > by the device_lock() callback."
> >
> > Q: Why do we need to take both device_lock() and the context then?
>
> The device_lock() enables the non-interference property during normal
> operation. OTOH the context is taken to synchronize against non-normal
> (emergency/panic) operation.
>
> > Q: Why the acquired context is not enough?
>
> Because the context _requires_ disabling preemption during all driver
> activity, which is a major source of interference to the system.
>
> If you only care about UART drivers and !PREEMPT_RT, then there really
> is not much difference between spinning on a spinlock vs. spinning on a
> context. But for non-UART drivers or PREEMPT_RT, there is a huge
> difference between blocking on device_lock() with preemption enabled vs.
> spinning on a context.
This helped a lot. My brain is not trained to see these RT-specific
effects.
> I think it really helps to see the importance if you assume
> device_lock() is a mutex. (All the printk code must also make this
> assumption.)
From my POV, the game changer is the RT aspect. I knew that
for graphics console drivers the mutex was a nice-to-have thing.
I did not realize that for RT it was a must-to-have thing.
I mean, I did not realize that nbcon_console_try_acquire() was
not RT friendly. We could also say that nbcon_console_try_acquire()
is not acceptable for RT as the primary synchronization primitive.
> > I wonder if we could be more strigthforward.
> >
> > <my-take>
> > /**
> > * @write_thread:
> > *
> > * NBCON callback to write out text in task context.
> > *
> > * This callback must be called only in task context with both
> > * device_lock() and the nbcon console acquired.
> > *
> > * The same rules for console ownership verification and unsafe
> > * sections handling applies as with write_atomic().
> > *
> > * The console ownership handling is necessary for synchronization
> > * against write_atomic() which is synchronized only via the context.
> > *
> > * The device_lock() serializes acquiring the console context
> > * with NBCON_PRIO_NORMAL. It is necessary when the device_lock()
> > * does not disable preemption. The console context locking is
> > * not able to detect a race in the following scenario:
> > *
> > * 1. [Task A] owns a context with NBCON_PRIO_NORMAL on [CPU X]
> > * and is scheduled.
> > *
> > * 2. Another context takes over the lock with NBCON_PRIO_EMERGENCY
> > * and releases it.
> > *
> > * 3. [Task B] acquires a context with NBCON_PRIO_NORMAL on [CPU X]
> > * and is scheduled.
> > *
> > * 4. [Task A] gets running on [CPU X] and see that the console is
> > * is still owned by a task on [CPU X] with NBON_PRIO_NORMAL.
> > * It can't detect that it is actually owned by another task.
> > */
> > </my-take>
> >
> > My variant describes the purpose of device_lock() quite different way.
> > But I think that this is the real purpose and we must be clear about
> > it.
>
> I would move that last part (starting with "The device_lock()
> serializes...") into the kerneldoc for device_lock(). It really has
> nothing to do with the write_thread() callback.
I agree that describing the details of the race is not important here.
Well, I would still like to describe the role of device_lock()
for write_kthread(). It would help even me to create a better
mental model ;-)
What about the following?
<proposal-2>
/**
* @write_thread:
*
* NBCON callback to write out text in task context.
*
* This callback must be called only in task context with both
* device_lock() and the nbcon console acquired with
* NBCON_PRIO_NORMAL.
*
* The same rules for console ownership verification and unsafe
* sections handling applies as with write_atomic().
*
* The console ownership handling is necessary for synchronization
* against write_atomic() which is synchronized only via the context.
*
* The device_lock() provides the primary serialization for operations
* on the device. It might be as relaxed (mutex)[*] or as tight
* (disabled preemption and interrupts) as needed. It allows
* the kthread to operate in the least restrictive mode[**].
*
* [*] Standalone nbcon_context_try_acquire() is not safe with
* the preemption enabled, see nbcon_owner_matches(). But it
* can be safe when always called in the preemptive context
* under the device_lock().
*
* [**] The device_lock() makes sure that nbcon_context_try_acquire()
* would never need to spin which is important especially with
* PREEMPT_RT.
*/
</proposal-2>
> > Sigh, I was not able to describe the race reasonably a shorter
> > way. Maybe, we should move this detailed explanation above,
> > nbcon_context_try_acquire() and use just a reference here.
>
> How about putting it as kerneldoc for nbcon_owner_matches() and refer to
> it there from the device_lock() kerneldoc (or vice versa)?
Yup. nbcon_owner_matches() looks like the right place for details
about the possible race.
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -3995,6 +3995,8 @@ static int unregister_console_locked(struct console *console)
> if (res > 0)
> return 0;
>
> + __pr_flush(console, 1000, true);
> +
> /* Disable it unconditionally */
> console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
Makes sense.
And it actually means that it is too late to flush messages
when kthread_should_stop() returns true. So, it does not matter if
we check it in the while(backlog) loop or not. Well, it might be
worth a comment in the code.
> >> + kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index);
> >> + if (IS_ERR(kt)) {
> >> + con_printk(KERN_ERR, con, "failed to start printing thread\n");
> >> + return;
> >
> > I am a bit surprised that we ignore the error here.
> >
> >
> > Hmm, it likely works in this patch because the legacy code would still
> > flush the console when con->thread is not assigned.
> >
> > But it would stop working later when the legacy loop is disabled
> > by the global @printk_threads_enabled variable.
>
> The thread failing to start is a serious issue. Particularly for
> PREEMPT_RT.
I agree.
> Probably it should be something like:
>
> if (WARN_ON(IS_ERR(kt))) {
Might make sense.
> In fact, I nbcon_atomic_flush_pending_con() and nbcon_device_release()
> need to check @printk_threads_enabled _instead_ of con->kthread. Once
> "threading mode" has been activated, there should be _no_ atomic
> printing during normal operation.
OK, we have two possibilities when a kthread could not get started:
1. printk() could fallback to flushing messages via write_atomic()
in the legacy console_unlock() loop.
It might break RT guarantees but people would see the messages.
2. The affected console would flush the messages only in emergency
or panic mode.
This would preserve RT guarantees. And printk() would still be
able to show emergency and panic messages, including the WARN_ON()
about that the printk kthread did not start.
It seems that you prefer the 2nd variant. But you are RT-biased ;-)
I would personally prefer the 1st variant. But I am printk-biased ;-)
Honestly, if the system is not able to start the kthread then
it is probably useless anyway. I would prefer if printk keeps working
so that people know what is going on ;-)
> > The kthread is running and started processing messages at this moment.
> > But con->kthread is not set yet.
>
> [...]
>
> > It might be worth a comment.
> >
> > <proposal>
> > /*
> > * Some users check con->kthread to decide whether to flush
> > * the messages directly using con->write_atomic(). Note that
> > * possible races with the kthread are double prevented.
> > *
> > * First, the users ignore this console until it is added into
> > * @console_list which is done under con->device_lock().
> > * Second, the calls would be additionaly serialized by acquiring
> > * the console context.
> > */
> > </proposal>
>
> It is enough to mention only the calling context. How about:
>
> /*
> * Some users check con->kthread to decide whether to flush the
> * messages directly using con->write_atomic(). A possible race
> * with the kthread is prevented because all printing is
> * serialized by acquiring the console context.
> */
I am not sure what is more important. In fact, all these users
check con->kthread only when the console is on @console_list.
So, maybe the note about #console_list is more useful.
BTW: uart_port_lock() allows access to the device and it acquires
the context only when the console appears on @console_list.
Well, the access is serialized by the device_lock.
> >> + con->kthread = kt;
After all, I would add two comments, like these:
<proposal-2>
/*
* Any access to the console device is serialized either by
* device_lock() or console context or both.
*/
kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index);
[...]
/*
* Some users check con->kthread to decide whether to flush
* the messages directly using con->write_atomic(). But they
* do so only when the console is already in @console_list.
*/
con->kthread = kt;
</proposal-2>
Best Regards,
Petr
On 2024-06-11, Petr Mladek <pmladek@suse.com> wrote:
>> During normal operation, the inner safety (console context) will never
>> be contended. That really only exists to synchronize the atomic case.
>
> I see. For example, the spin_lock_irqsave() in __uart_port_lock_irqsave()
> will be sleeping lock in RT => preserving RT guarantees. And it will
> make sure that the inner nbcon_context_try_acquire() would never
> have to spin => never break the RT guarantees.
>
> It makes perfect sense. Do I get it correctly?
Yes.
> Well, I would still like to describe the role of device_lock()
> for write_kthread(). It would help even me to create a better
> mental model ;-)
>
> What about the following?
>
> <proposal-2>
> /**
> * @write_thread:
> *
> * NBCON callback to write out text in task context.
> *
> * This callback must be called only in task context with both
> * device_lock() and the nbcon console acquired with
> * NBCON_PRIO_NORMAL.
> *
> * The same rules for console ownership verification and unsafe
> * sections handling applies as with write_atomic().
> *
> * The console ownership handling is necessary for synchronization
> * against write_atomic() which is synchronized only via the context.
> *
> * The device_lock() provides the primary serialization for operations
> * on the device. It might be as relaxed (mutex)[*] or as tight
> * (disabled preemption and interrupts) as needed. It allows
> * the kthread to operate in the least restrictive mode[**].
> *
> * [*] Standalone nbcon_context_try_acquire() is not safe with
> * the preemption enabled, see nbcon_owner_matches(). But it
> * can be safe when always called in the preemptive context
> * under the device_lock().
> *
> * [**] The device_lock() makes sure that nbcon_context_try_acquire()
> * would never need to spin which is important especially with
> * PREEMPT_RT.
> */
> </proposal-2>
OK. I will use this.
>> --- a/kernel/printk/printk.c
>> +++ b/kernel/printk/printk.c
>> @@ -3995,6 +3995,8 @@ static int unregister_console_locked(struct console *console)
>> if (res > 0)
>> return 0;
>>
>> + __pr_flush(console, 1000, true);
>> +
>> /* Disable it unconditionally */
>> console_srcu_write_flags(console, console->flags & ~CON_ENABLED);
>
> Makes sense.
>
> And it actually means that it is too late to flush messages
> when kthread_should_stop() returns true. So, it does not matter if
> we check it in the while(backlog) loop or not. Well, it might be
> worth a comment in the code.
I will add a comment for this.
>> The thread failing to start is a serious issue. Particularly for
>> PREEMPT_RT.
>
> I agree.
>
>> Probably it should be something like:
>>
>> if (WARN_ON(IS_ERR(kt))) {
>
> Might make sense.
I will add this for v2.
> Honestly, if the system is not able to start the kthread then
> it is probably useless anyway. I would prefer if printk keeps working
> so that people know what is going on ;-)
OK. For v2 I will change it to fallback to the legacy printing for those
consoles that do not have a kthread.
> After all, I would add two comments, like these:
>
> <proposal-2>
> /*
> * Any access to the console device is serialized either by
> * device_lock() or console context or both.
> */
> kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name,
> con->index);
> [...]
>
> /*
> * Some users check con->kthread to decide whether to flush
> * the messages directly using con->write_atomic(). But they
> * do so only when the console is already in @console_list.
> */
I do not understand how @console_list is related to racing between
non-thread and thread. kthreads are not only created during
registration. For example, they can be created much later when the last
boot console unregisters.
I am OK with the first comment of this proposal. I do not understand the
second comment.
> con->kthread = kt;
> </proposal-2>
John
On 2024-06-11, Petr Mladek <pmladek@suse.com> wrote: > Honestly, if the system is not able to start the kthread then > it is probably useless anyway. I would prefer if printk keeps working > so that people know what is going on ;-) I have been looking into and thinking about this some more. I do not like the idea of the system doing some sort of fallback if some of the kthreads fail to start. Not only does it complicate the code, but it greatly increases the variants of how the system during runtime. I (strongly) suggest the following: - The kthread is created in nbcon_alloc(). If the kthread fails, then nbcon_alloc() fails and the console will not register. - nbcon_kthread_should_wakeup() will return false if the console is not registered or if @have_boot_console=1. Then there would be no need to ever check con->kthread. Instead we can rely on the global state of the system transitioning to relying on threading. I think it is totally appropriate that register_console() fails if the kthread cannot be created, just as it already fails if the kmalloc() for the pbufs fails. Any objections? John P.S. There are other minor details involved, such as calling kthread_stop() before removing a console from the list. But I have gone through all these and it appears to generally simplify things a lot.
On Thu 2024-06-13 17:27:33, John Ogness wrote: > On 2024-06-11, Petr Mladek <pmladek@suse.com> wrote: > > Honestly, if the system is not able to start the kthread then > > it is probably useless anyway. I would prefer if printk keeps working > > so that people know what is going on ;-) > > I have been looking into and thinking about this some more. I do not > like the idea of the system doing some sort of fallback if some of the > kthreads fail to start. Not only does it complicate the code, but it > greatly increases the variants of how the system during runtime. Fair enough. > I (strongly) suggest the following: > > - The kthread is created in nbcon_alloc(). If the kthread fails, then > nbcon_alloc() fails and the console will not register. > > - nbcon_kthread_should_wakeup() will return false if the console is not > registered or if @have_boot_console=1. I like this. I guess that this would require moving some pieces from nbcon_init to nbcon_alloc(). It might make sense to move there everything except for setting the initial seq number. > Then there would be no need to ever check con->kthread. Instead we can > rely on the global state of the system transitioning to relying on > threading. > > I think it is totally appropriate that register_console() fails if the > kthread cannot be created, just as it already fails if the kmalloc() for > the pbufs fails. > > Any objections? Not from my side ;-) Best Regards, Petr
On Wed 2024-06-12 10:57:11, John Ogness wrote:
> On 2024-06-11, Petr Mladek <pmladek@suse.com> wrote:
> >> --- a/kernel/printk/printk.c
> >> +++ b/kernel/printk/printk.c
> >> The thread failing to start is a serious issue. Particularly for
> >> PREEMPT_RT.
> >
> > I agree.
> >
> >> Probably it should be something like:
> >>
> >> if (WARN_ON(IS_ERR(kt))) {
> >
> > Might make sense.
>
> I will add this for v2.
>
> > Honestly, if the system is not able to start the kthread then
> > it is probably useless anyway. I would prefer if printk keeps working
> > so that people know what is going on ;-)
>
> OK. For v2 I will change it to fallback to the legacy printing for those
> consoles that do not have a kthread.
>
> > After all, I would add two comments, like these:
> >
> > <proposal-2>
> > /*
> > * Any access to the console device is serialized either by
> > * device_lock() or console context or both.
> > */
> > kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name,
> > con->index);
> > [...]
> >
> > /*
> > * Some users check con->kthread to decide whether to flush
> > * the messages directly using con->write_atomic(). But they
> > * do so only when the console is already in @console_list.
> > */
>
> I do not understand how @console_list is related to racing between
> non-thread and thread. kthreads are not only created during
> registration. For example, they can be created much later when the last
> boot console unregisters.
I had in mind two particular code paths:
1. The check of con->kthread in nbcon_device_release() before
calling __nbcon_atomic_flush_pending_con().
But it is called only when __uart_port_using_nbcon() returns true.
And it would fail when nbcon_kthread_create() is called because
checks hlist_unhashed_lockless(&up->cons->node)
would fail. Which checks of the console is in @console_list
2. The following check in console_flush_all()
if ((flags & CON_NBCON) && con->kthread)
continue;
The result affects whether the legacy flush would call
nbcon_legacy_emit_next_record().
But this is called only for_each_console_srcu(con)
=> it could not race with nbcon_kthread_create()
because this console is not in @console_list at this moment.
By other words, I was curious whether some other code paths might
call con->write_atomic() while the kthread is already running.
It is not that important because it would be safe anyway.
I was checking this before I realized that it would be safe.
Anyway, the information about that the console is not in @console_list
when we set con->kthread still looks useful. At minimum,
the check would be racy if the console was on the list.
Does it make any sense now?
> I am OK with the first comment of this proposal. I do not understand the
> second comment.
Feel free to propose another comment. Or you could ignore the proposal
if you think that it does more harm than good.
Best Regards,
Petr
On 2024-06-12, Petr Mladek <pmladek@suse.com> wrote: >> > After all, I would add two comments, like these: >> > >> > <proposal-2> >> > /* >> > * Any access to the console device is serialized either by >> > * device_lock() or console context or both. >> > */ >> > kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, >> > con->index); >> > [...] >> > >> > /* >> > * Some users check con->kthread to decide whether to flush >> > * the messages directly using con->write_atomic(). But they >> > * do so only when the console is already in @console_list. >> > */ >> >> I do not understand how @console_list is related to racing between >> non-thread and thread. kthreads are not only created during >> registration. For example, they can be created much later when the last >> boot console unregisters. > > I had in mind two particular code paths: > > 1. The check of con->kthread in nbcon_device_release() before > calling __nbcon_atomic_flush_pending_con(). > > But it is called only when __uart_port_using_nbcon() returns true. > And it would fail when nbcon_kthread_create() is called because > > checks hlist_unhashed_lockless(&up->cons->node) > > would fail. Which checks of the console is in @console_list > > > 2. The following check in console_flush_all() > > if ((flags & CON_NBCON) && con->kthread) > continue; > > The result affects whether the legacy flush would call > nbcon_legacy_emit_next_record(). > > But this is called only for_each_console_srcu(con) > => it could not race with nbcon_kthread_create() > because this console is not in @console_list at this moment. > > By other words, I was curious whether some other code paths might > call con->write_atomic() while the kthread is already running. > > It is not that important because it would be safe anyway. > I was checking this before I realized that it would be safe. Yes, it must be safe because it can happen at any time. For example, when flushing after an emergency section. > Anyway, the information about that the console is not in @console_list > when we set con->kthread still looks useful. Except that it is not always true. If boot consoles are registered, the kthread is created later, after the console _is_ in @console_list. Setting con->kthread really has nothing to do with @console_list. > At minimum, the check would be racy if the console was on the list. The con->kthread check _is_ racey, but it doesn't matter. Perhaps you just want to make it clear that it is racey but it does not matter. How about: /* * Some users check con->kthread to decide whether to flush * the messages directly using con->write_atomic(). Although * racey, such a check for that purpose is safe because both * threaded and atomic printing are serialized by the * console context. */ con->kthread = kt; John
On Wed 2024-06-12 13:24:24, John Ogness wrote: > On 2024-06-12, Petr Mladek <pmladek@suse.com> wrote: > >> > After all, I would add two comments, like these: > >> > > >> > <proposal-2> > >> > /* > >> > * Any access to the console device is serialized either by > >> > * device_lock() or console context or both. > >> > */ > >> > kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, > >> > con->index); > >> > [...] > >> > > >> > /* > >> > * Some users check con->kthread to decide whether to flush > >> > * the messages directly using con->write_atomic(). But they > >> > * do so only when the console is already in @console_list. > >> > */ > >> > >> I do not understand how @console_list is related to racing between > >> non-thread and thread. kthreads are not only created during > >> registration. For example, they can be created much later when the last > >> boot console unregisters. > > > > I had in mind two particular code paths: > > > > 1. The check of con->kthread in nbcon_device_release() before > > calling __nbcon_atomic_flush_pending_con(). > > > > But it is called only when __uart_port_using_nbcon() returns true. > > And it would fail when nbcon_kthread_create() is called because > > > > checks hlist_unhashed_lockless(&up->cons->node) > > > > would fail. Which checks of the console is in @console_list > > > > > > 2. The following check in console_flush_all() > > > > if ((flags & CON_NBCON) && con->kthread) > > continue; > > > > The result affects whether the legacy flush would call > > nbcon_legacy_emit_next_record(). > > > > But this is called only for_each_console_srcu(con) > > => it could not race with nbcon_kthread_create() > > because this console is not in @console_list at this moment. > > > > By other words, I was curious whether some other code paths might > > call con->write_atomic() while the kthread is already running. > > > > It is not that important because it would be safe anyway. > > I was checking this before I realized that it would be safe. > > Yes, it must be safe because it can happen at any time. For example, > when flushing after an emergency section. > > > Anyway, the information about that the console is not in @console_list > > when we set con->kthread still looks useful. > > Except that it is not always true. If boot consoles are registered, the > kthread is created later, after the console _is_ in > @console_list. Setting con->kthread really has nothing to do with > @console_list. Right. I have missed this. > > At minimum, the check would be racy if the console was on the list. > > The con->kthread check _is_ racey, but it doesn't matter. Perhaps you > just want to make it clear that it is racey but it does not matter. > > How about: > > /* > * Some users check con->kthread to decide whether to flush > * the messages directly using con->write_atomic(). Although > * racey, such a check for that purpose is safe because both > * threaded and atomic printing are serialized by the > * console context. > */ > con->kthread = kt; Yes, it sounds good. Best Regards, Petr
© 2016 - 2026 Red Hat, Inc.