[PATCH v7 07/13] printk: console: Introduce sysfs interface for per-console loglevels

Chris Down posted 13 patches 1 week, 6 days ago
There is a newer version of this series
[PATCH v7 07/13] printk: console: Introduce sysfs interface for per-console loglevels
Posted by Chris Down 1 week, 6 days ago
A sysfs interface under /sys/class/console/ is created that permits
viewing and configuring per-console attributes. This is the main
interface with which we expect users to interact with and configure
per-console loglevels.

Each console device now has its own directory (for example,
/sys/class/console/ttyS0/) containing the following attributes:

- effective_loglevel (ro): The effective loglevel for the console after
  considering all loglevel authorities (e.g., global loglevel,
  per-console loglevel).
- effective_loglevel_source (ro): The source of the effective loglevel
  (e.g., local, global, ignore_loglevel).
- loglevel (rw): The per-console loglevel. Writing a value between 0
  (KERN_EMERG) and 8 (KERN_DEBUG + 1) sets the per-console loglevel.
  Writing -1 disables the per-console loglevel.

In terms of technical implementation, we embed a device pointer in the
console struct, and register each console using it so we can expose
attributes in sysfs. We currently expose the following attributes:

    % ls -l /sys/class/console/ttyS0/
    total 0
    lrwxrwxrwx 1 root root    0 Oct 23 13:17 subsystem -> ../../../../class/console/
    -r--r--r-- 1 root root 4096 Oct 23 13:18 effective_loglevel
    -r--r--r-- 1 root root 4096 Oct 23 13:18 effective_loglevel_source
    -rw-r--r-- 1 root root 4096 Oct 23 13:18 loglevel
    -rw-r--r-- 1 root root 4096 Oct 23 13:17 uevent

The lifecycle of this classdev looks like this on registration:

    register_console(con)/printk_late_init()
      console_register_device(con)
        device_initialize(con->classdev) # kref_init: refcount = 1
        device_add(con->classdev)        # get_device: refcount++ (to 2)

At stable state, the refcount is two.

Console unregistration looks like this:

    unregister_console_locked(con)
      struct device *dev = console->classdev;
      console->classdev = NULL;
      device_unregister(dev)
        device_del(dev)
          device_remove_class_symlinks(dev)
            sysfs_delete_link()
              kernfs_remove_by_name_ns()
                __kernfs_remove()
                  kernfs_drain()
                    kernfs_drain_open_files() # wait for close()
          kobject_del(&dev->kobj) # removes from sysfs, does NOT change refcount
        put_device(dev) # kref_put: refcount-- (from 2 to 1)
      put_device(dev) # kref_put: refcount-- (from 1 to 0)
        kobject_release()
          kobject_cleanup()
            device_release()
              console_classdev_release(dev)
                kfree(dev)

Signed-off-by: Chris Down <chris@chrisdown.name>
---
 Documentation/ABI/testing/sysfs-class-console |  58 +++++
 .../admin-guide/per-console-loglevel.rst      |  38 ++++
 Documentation/core-api/printk-basics.rst      |  35 +--
 Documentation/networking/netconsole.rst       |  13 ++
 MAINTAINERS                                   |   1 +
 include/linux/console.h                       |   4 +
 kernel/printk/Makefile                        |   2 +-
 kernel/printk/internal.h                      |   6 +
 kernel/printk/printk.c                        |  14 ++
 kernel/printk/sysfs.c                         | 213 ++++++++++++++++++
 10 files changed, 366 insertions(+), 18 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-class-console
 create mode 100644 kernel/printk/sysfs.c

diff --git a/Documentation/ABI/testing/sysfs-class-console b/Documentation/ABI/testing/sysfs-class-console
new file mode 100644
index 000000000000..8c0f0cf3f6c5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-console
@@ -0,0 +1,58 @@
+What:		/sys/class/console/
+Date:		November 2025
+Contact:	Chris Down <chris@chrisdown.name>
+Description:	Interface for viewing and setting per-console attributes, like
+		the per-console loglevel. For a high-level document describing
+		the motivations for this interface and related non-sysfs
+		controls, see
+		Documentation/admin-guide/per-console-loglevel.rst.
+
+What:		/sys/class/console/<C>/effective_loglevel
+Date:		November 2025
+Contact:	Chris Down <chris@chrisdown.name>
+Permissions:	0444 (world readable)
+Description:	Read only. The currently effective loglevel for this console.
+		All messages emitted with a loglevel below the effective value
+		will be emitted to the console.
+
+What:		/sys/class/console/<C>/effective_loglevel_source
+Date:		November 2025
+Contact:	Chris Down <chris@chrisdown.name>
+Permissions:	0444 (world readable)
+Description:	Read only. The currently effective loglevel source for this
+		console -- for example, whether it was set globally, or whether
+		it was set locally for this console.
+
+		Possible values are:
+			=============== ============================================
+			local           The loglevel comes from the console's
+			                per-console loglevel setting.
+			global          The loglevel comes from the global
+			                console_loglevel.
+			ignore_loglevel Both the per-console loglevel and global
+			                loglevel are ignored as ignore_loglevel is
+			                present on the kernel command line.
+			=============== ============================================
+
+What:		/sys/class/console/<C>/loglevel
+Date:		November 2025
+Contact:	Chris Down <chris@chrisdown.name>
+Permissions:	0644 (root read/write, user read)
+Description:	Read write. The current per-console loglevel, which will take
+		effect if not overridden by other non-sysfs controls (see
+		Documentation/admin-guide/per-console-loglevel.rst).
+
+		Valid values:
+			1-8:  LOGLEVEL_ALERT (1) to LOGLEVEL_DEBUG + 1 (8)
+			-1:   Use global console_loglevel (default)
+			0:    Explicitly rejected (KERN_EMERG not allowed)
+
+		Error codes:
+			EINVAL: Non-numeric input
+			ERANGE: Value out of valid range (< 1 or > 8, excluding -1)
+			ERANGE: Value is 0 (KERN_EMERG not allowed for per-console)
+			ERANGE: Value below system minimum_console_loglevel
+
+		The special value -1 disables the per-console loglevel, making
+		the console use the global loglevel instead.
+
diff --git a/Documentation/admin-guide/per-console-loglevel.rst b/Documentation/admin-guide/per-console-loglevel.rst
index 1f8f1eabc75c..4908d5d8ed4f 100644
--- a/Documentation/admin-guide/per-console-loglevel.rst
+++ b/Documentation/admin-guide/per-console-loglevel.rst
@@ -69,3 +69,41 @@ The default value for ``kernel.console_loglevel`` comes from
 ``CONFIG_CONSOLE_LOGLEVEL_DEFAULT``, or ``CONFIG_CONSOLE_LOGLEVEL_QUIET`` if
 ``quiet`` is passed on the kernel command line.
 
+Console attributes
+~~~~~~~~~~~~~~~~~~
+
+Registered consoles are exposed at ``/sys/class/console``. For example, if you
+are using ``ttyS0``, the console backing it can be viewed at
+``/sys/class/console/ttyS0/``. The following files are available:
+
+* ``effective_loglevel`` (r): The effective loglevel after considering all
+  loglevel authorities. For example, it shows the value of the console-specific
+  loglevel when a console-specific loglevel is defined, and shows the global
+  console loglevel value when the console-specific one is not defined.
+
+* ``effective_loglevel_source`` (r): The loglevel authority which resulted in
+  the effective loglevel being set. The following values can be present:
+
+    * ``local``: The console-specific loglevel is in effect.
+
+    * ``global``: The global loglevel (``kernel.console_loglevel``) is in
+      effect. Set a console-specific loglevel to override it.
+
+    * ``ignore_loglevel``: ``ignore_loglevel`` was specified on the kernel
+      command line or at ``/sys/module/printk/parameters/ignore_loglevel``.
+      Disable it to use level controls.
+
+* ``loglevel`` (rw): The local, console-specific loglevel for this console.
+  This will be in effect if no other global control overrides it. Look at
+  ``effective_loglevel`` and ``effective_loglevel_source`` to verify that.
+
+Deprecated
+~~~~~~~~~~
+
+* ``kernel.printk`` sysctl: this takes four values, setting
+  ``kernel.console_loglevel``, ``kernel.default_message_loglevel``, the minimum
+  console loglevel, and a fourth unused value. The interface is generally
+  considered to quite confusing, doesn't perform checks on the values given,
+  and is unaware of per-console loglevel semantics.
+
+Chris Down <chris@chrisdown.name>, 18-November-2025
diff --git a/Documentation/core-api/printk-basics.rst b/Documentation/core-api/printk-basics.rst
index 2dde24ca7d9f..bfad359505bb 100644
--- a/Documentation/core-api/printk-basics.rst
+++ b/Documentation/core-api/printk-basics.rst
@@ -54,32 +54,33 @@ string, the log level is not a separate argument). The available log levels are:
 
 The log level specifies the importance of a message. The kernel decides whether
 to show the message immediately (printing it to the current console) depending
-on its log level and the current *console_loglevel* (a kernel variable). If the
-message priority is higher (lower log level value) than the *console_loglevel*
-the message will be printed to the console.
+on its log level and the current global *console_loglevel* or local per-console
+loglevel (kernel variables). If the message priority is higher (lower log level
+value) than the effective loglevel the message will be printed to the console.
 
 If the log level is omitted, the message is printed with ``KERN_DEFAULT``
 level.
 
-You can check the current *console_loglevel* with::
+You can check the current console's loglevel -- for example if you want to
+check the loglevel for serial consoles:
 
-  $ cat /proc/sys/kernel/printk
-  4        4        1        7
+  $ cat /sys/class/console/ttyS0/effective_loglevel
+  6
+  $ cat /sys/class/console/ttyS0/effective_loglevel_source
+  local
 
-The result shows the *current*, *default*, *minimum* and *boot-time-default* log
-levels.
+To change the default loglevel for all consoles, simply write the desired level
+to ``/proc/sys/kernel/console_loglevel``. For example::
 
-To change the current console_loglevel simply write the desired level to
-``/proc/sys/kernel/printk``. For example, to print all messages to the console::
+  # echo 5 > /proc/sys/kernel/console_loglevel
 
-  # echo 8 > /proc/sys/kernel/printk
+This sets the console_loglevel to print KERN_WARNING (4) or more severe
+messages to console. Consoles with a per-console loglevel set will ignore it
+unless ``ignore_per_console_loglevel`` is set on the kernel command line or at
+``/sys/module/printk/parameters/ignore_per_console_loglevel``.
 
-Another way, using ``dmesg``::
-
-  # dmesg -n 5
-
-sets the console_loglevel to print KERN_WARNING (4) or more severe messages to
-console. See ``dmesg(1)`` for more information.
+For more information on per-console loglevels, see
+Documentation/admin-guide/per-console-loglevel.rst.
 
 As an alternative to printk() you can use the ``pr_*()`` aliases for
 logging. This family of macros embed the log level in the macro names. For
diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst
index 59cb9982afe6..5fabcf9367d1 100644
--- a/Documentation/networking/netconsole.rst
+++ b/Documentation/networking/netconsole.rst
@@ -78,6 +78,19 @@ Built-in netconsole starts immediately after the TCP stack is
 initialized and attempts to bring up the supplied dev at the supplied
 address.
 
+You can also set a loglevel at runtime::
+
+  $ ls -l /sys/class/console/netcon0/
+  total 0
+  lrwxrwxrwx 1 root root    0 May 18 13:28 subsystem -> ../../../../class/console/
+  -r--r--r-- 1 root root 4096 May 18 13:28 effective_loglevel
+  -r--r--r-- 1 root root 4096 May 18 13:28 effective_loglevel_source
+  -r--r--r-- 1 root root 4096 May 18 13:28 enabled
+  -rw-r--r-- 1 root root 4096 May 18 13:28 loglevel
+  -rw-r--r-- 1 root root 4096 May 18 13:28 uevent
+
+See Documentation/admin-guide/per-console-loglevel.rst for more information.
+
 The remote host has several options to receive the kernel messages,
 for example:
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 8018a4db2d9f..9a509a0bc65a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -20505,6 +20505,7 @@ R:	John Ogness <john.ogness@linutronix.de>
 R:	Sergey Senozhatsky <senozhatsky@chromium.org>
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux.git
+F:	Documentation/ABI/testing/sysfs-class-console
 F:	Documentation/admin-guide/per-console-loglevel.rst
 F:	Documentation/core-api/printk-basics.rst
 F:	include/linux/printk.h
diff --git a/include/linux/console.h b/include/linux/console.h
index c0749a48fc3f..4b2b87079cd8 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -16,6 +16,7 @@
 
 #include <linux/atomic.h>
 #include <linux/bits.h>
+#include <linux/device.h>
 #include <linux/irq_work.h>
 #include <linux/rculist.h>
 #include <linux/rcuwait.h>
@@ -323,6 +324,8 @@ struct nbcon_write_context {
  * @dropped:		Number of unreported dropped ringbuffer records
  * @data:		Driver private data
  * @node:		hlist node for the console list
+ * @classdev:		sysfs class device for this console, used to expose
+ *			per-console controls in /sys/class/console/<name>/
  *
  * @nbcon_state:	State for nbcon consoles
  * @nbcon_seq:		Sequence number of the next record for nbcon to print
@@ -352,6 +355,7 @@ struct console {
 	unsigned long		dropped;
 	void			*data;
 	struct hlist_node	node;
+	struct device		*classdev;
 
 	/* nbcon console specific members */
 
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
index f8004ac3983d..19e4919a13a7 100644
--- a/kernel/printk/Makefile
+++ b/kernel/printk/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-y	= printk.o
-obj-$(CONFIG_PRINTK)	+= printk_safe.o nbcon.o
+obj-$(CONFIG_PRINTK)	+= sysfs.o printk_safe.o nbcon.o
 obj-$(CONFIG_A11Y_BRAILLE_CONSOLE)	+= braille.o
 obj-$(CONFIG_PRINTK_INDEX)	+= index.o
 
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 41e37b44778f..3b3a3c982412 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -48,6 +48,9 @@ int console_effective_loglevel(int con_level);
 
 #ifdef CONFIG_PRINTK
 
+void console_register_device(struct console *new);
+void console_setup_class(void);
+
 #ifdef CONFIG_PRINTK_CALLER
 #define PRINTK_PREFIX_MAX	48
 #else
@@ -219,6 +222,9 @@ static inline void nbcon_kthreads_wake(void) { }
 static inline bool console_is_usable(struct console *con, short flags,
 				     bool use_atomic) { return false; }
 
+static inline void console_register_device(struct console *new) { }
+static inline void console_setup_class(void) { }
+
 #endif /* CONFIG_PRINTK */
 
 extern bool have_boot_console;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 80204cbb7bc8..62114aa61999 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -4010,6 +4010,9 @@ static void try_enable_default_console(struct console *newcon)
 	if (newcon->index < 0)
 		newcon->index = 0;
 
+	newcon->level = LOGLEVEL_DEFAULT;
+	newcon->classdev = NULL;
+
 	if (console_call_setup(newcon, NULL) != 0)
 		return;
 
@@ -4264,6 +4267,7 @@ void register_console(struct console *newcon)
 	if (use_device_lock)
 		newcon->device_unlock(newcon, flags);
 
+	console_register_device(newcon);
 	console_sysfs_notify();
 
 	/*
@@ -4379,6 +4383,13 @@ static int unregister_console_locked(struct console *console)
 	if (console->flags & CON_NBCON)
 		nbcon_free(console);
 
+	if (console->classdev) {
+		struct device *dev = console->classdev;
+		console->classdev = NULL;
+		device_unregister(dev);
+		put_device(dev);
+	}
+
 	console_sysfs_notify();
 
 	if (console->exit)
@@ -4528,6 +4539,9 @@ static int __init printk_late_init(void)
 					console_cpu_notify, NULL);
 	WARN_ON(ret < 0);
 	printk_sysctl_init();
+
+	console_setup_class();
+
 	return 0;
 }
 late_initcall(printk_late_init);
diff --git a/kernel/printk/sysfs.c b/kernel/printk/sysfs.c
new file mode 100644
index 000000000000..5252e6e04908
--- /dev/null
+++ b/kernel/printk/sysfs.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/console.h>
+#include <linux/device.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+static const char *
+console_effective_loglevel_source_str(const struct console *con)
+{
+	enum loglevel_source source;
+	const char *str;
+	int con_level;
+	int cookie;
+
+	cookie = console_srcu_read_lock();
+	con_level = console_srcu_read_loglevel(con);
+	console_srcu_read_unlock(cookie);
+	source = console_effective_loglevel_source(con_level);
+
+	switch (source) {
+	case LLS_IGNORE_LOGLEVEL:
+		str = "ignore_loglevel";
+		break;
+	case LLS_LOCAL:
+		str = "local";
+		break;
+	case LLS_GLOBAL:
+		str = "global";
+		break;
+	default:
+		str = "unknown";
+		break;
+	}
+
+	return str;
+}
+
+static ssize_t effective_loglevel_source_show(struct device *dev,
+					      struct device_attribute *attr,
+					      char *buf)
+{
+	struct console *con = dev_get_drvdata(dev);
+	const char *str;
+
+	str = console_effective_loglevel_source_str(con);
+	return sysfs_emit(buf, "%s\n", str);
+}
+
+static DEVICE_ATTR_RO(effective_loglevel_source);
+
+static ssize_t effective_loglevel_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct console *con = dev_get_drvdata(dev);
+	int con_level;
+	int cookie;
+
+	cookie = console_srcu_read_lock();
+	con_level = console_srcu_read_loglevel(con);
+	console_srcu_read_unlock(cookie);
+	return sysfs_emit(buf, "%d\n", console_effective_loglevel(con_level));
+}
+
+static DEVICE_ATTR_RO(effective_loglevel);
+
+static ssize_t loglevel_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct console *con = dev_get_drvdata(dev);
+	int con_level;
+	int cookie;
+
+	cookie = console_srcu_read_lock();
+	con_level = console_srcu_read_loglevel(con);
+	console_srcu_read_unlock(cookie);
+	return sysfs_emit(buf, "%d\n", con_level);
+}
+
+static ssize_t loglevel_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	struct console *con = dev_get_drvdata(dev);
+	ssize_t ret;
+	int level;
+	int cookie;
+
+	ret = kstrtoint(buf, 10, &level);
+	if (ret < 0)
+		return ret;
+
+	/* -1 means "use global loglevel" */
+	if (level == -1)
+		goto out;
+
+	/*
+	 * Reject level 0 (KERN_EMERG) - per-console loglevel must be > 0.
+	 * Emergency messages should go to all consoles, so they cannot be
+	 * filtered per-console.
+	 */
+	if (level == 0)
+		return -ERANGE;
+
+	if (console_clamp_loglevel(level) != level)
+		return -ERANGE;
+
+	/*
+	 * If the system has a minimum console loglevel set (via sysctl or
+	 * kernel parameter), enforce it. This prevents setting per-console
+	 * loglevels below the system minimum.
+	 */
+	if (minimum_console_loglevel > CONSOLE_LOGLEVEL_MIN &&
+	    level < minimum_console_loglevel)
+		return -ERANGE;
+
+out:
+	cookie = console_srcu_read_lock();
+	WRITE_ONCE(con->level, level);
+	console_srcu_read_unlock(cookie);
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(loglevel);
+
+static struct attribute *console_sysfs_attrs[] = {
+	&dev_attr_loglevel.attr,
+	&dev_attr_effective_loglevel_source.attr,
+	&dev_attr_effective_loglevel.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(console_sysfs);
+
+static const struct class console_class = {
+	.name = "console",
+	.dev_groups = console_sysfs_groups,
+};
+static bool console_class_registered;
+
+static void console_classdev_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+void console_register_device(struct console *con)
+{
+	/*
+	 * We might be called from register_console() before the class is
+	 * registered. If that happens, we'll take care of it in
+	 * printk_late_init.
+	 */
+	if (!console_class_registered)
+		return;
+
+	if (WARN_ON(con->classdev))
+		return;
+
+	con->classdev = kzalloc(sizeof(struct device), GFP_KERNEL);
+	if (!con->classdev)
+		return;
+
+	device_initialize(con->classdev);
+	dev_set_name(con->classdev, "%s%d", con->name, con->index);
+	dev_set_drvdata(con->classdev, con);
+	con->classdev->release = console_classdev_release;
+	con->classdev->class = &console_class;
+
+	/*
+	 * This class device exists solely to expose attributes (like loglevel)
+	 * and does not control physical power states. Power is managed by the
+	 * underlying hardware device. Disable PM entirely to prevent the
+	 * creation of confusing and unused power sysfs attributes.
+	 */
+	device_set_pm_not_required(con->classdev);
+
+	if (device_add(con->classdev)) {
+		put_device(con->classdev);
+		con->classdev = NULL;
+	}
+}
+
+void console_setup_class(void)
+{
+	struct console *con;
+	int cookie;
+	int err;
+
+	/*
+	 * printk exists for the lifetime of the kernel, it cannot be unloaded,
+	 * so we should never end up back in here.
+	 */
+	if (WARN_ON(console_class_registered))
+		return;
+
+	err = class_register(&console_class);
+	if (err)
+		return;
+
+	/*
+	 * Take console_list_lock() before exposing the class globally.
+	 * This ensures register_console() (which holds the lock) cannot
+	 * see the class until it's fully initialized with dev_groups.
+	 */
+	console_list_lock();
+	console_class_registered = true;
+	cookie = console_srcu_read_lock();
+	for_each_console_srcu(con)
+		console_register_device(con);
+	console_srcu_read_unlock(cookie);
+	console_list_unlock();
+}
-- 
2.51.2
Re: [PATCH v7 07/13] printk: console: Introduce sysfs interface for per-console loglevels
Posted by Petr Mladek 1 week, 3 days ago
On Wed 2025-11-19 03:07:27, Chris Down wrote:
> A sysfs interface under /sys/class/console/ is created that permits
> viewing and configuring per-console attributes. This is the main
> interface with which we expect users to interact with and configure
> per-console loglevels.

I made the following test:

1. Enable some debugging (CONFIG_DEBUG_KOBJECT=y, CONFIG_DEBUG_DRIVER=y)
2. Compile ttynull driver as a module (CONFIG_NULL_TTY=m)
3. Add "console=null" on the command line
4. Load the module "modprobe ttynull"
5. Remove the module "rmmod ttynull"

And I got the following warning when the module was removed:

[  382.299081] printk: legacy console [ttynull0] disabled
[  382.301829] device: 'ttynull0': device_unregister
[  382.302057] kobject: 'ttynull0' (000000006a6b229d): kobject_uevent_env
[  382.302088] kobject: 'ttynull0' (000000006a6b229d): fill_kobj_path: path = '/devices/virtual/console/ttynull0'
[  382.302250] kobject: 'ttynull0' (000000006a6b229d): kobject_cleanup, parent 0000000000000000
[  382.302261] kobject: 'ttynull0' (000000006a6b229d): calling ktype release
[  382.302272] kobject: 'ttynull0': free name
[  382.302281] ------------[ cut here ]------------
[  382.302289] refcount_t: underflow; use-after-free.
[  382.302319] WARNING: CPU: 4 PID: 1857 at lib/refcount.c:28 refcount_warn_saturate+0xbe/0x110
[  382.302335] Modules linked in: ttynull(E-)
[  382.302366] CPU: 4 UID: 0 PID: 1857 Comm: rmmod Tainted: G            E       6.18.0-rc6-default+ #448 PREEMPT(full)  b65a5eeebb0a78c479429a4b06dbf6320bbcd33d
[  382.302379] Tainted: [E]=UNSIGNED_MODULE
[  382.302388] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014
[  382.302396] RIP: 0010:refcount_warn_saturate+0xbe/0x110
[  382.302407] Code: 01 01 e8 05 11 60 ff 0f 0b e9 29 2f 3b ff 80 3d cb 17 b0 01 00 75 85 48 c7 c7 28 d8 00 9d c6 05 bb 17 b0 01 01 e8 e2 10 60 ff <0f> 0b e9 06 2f 3b ff 80 3d a9 17 b0 01 00 0f 85 5e ff ff ff 48 c7
[  382.302417] RSP: 0018:ffffcbd70189fe60 EFLAGS: 00010282
[  382.302433] RAX: 0000000000000000 RBX: ffffffffc0459040 RCX: 0000000000000002
[  382.302441] RDX: ffff8b32f57dff48 RSI: 0000000000000001 RDI: 00000000ffffffff
[  382.302450] RBP: ffff8b328b4f7000 R08: 00000000fff7ffff R09: ffff8b32fe3fdfa8
[  382.302458] R10: 0000000000000004 R11: 00000000fff80000 R12: ffffcbd70189ff58
[  382.302467] R13: 00000000000000b0 R14: 0000000000000000 R15: 0000000000000000
[  382.302479] FS:  00007f2e97442740(0000) GS:ffff8b3357178000(0000) knlGS:0000000000000000
[  382.302488] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  382.302497] CR2: 00007fa3088176f8 CR3: 0000000105373000 CR4: 0000000000750ef0
[  382.302524] PKRU: 55555554
[  382.302532] Call Trace:
[  382.302542]  <TASK>
[  382.302551]  unregister_console_locked.cold+0x28e/0x2cd
[  382.302582]  unregister_console+0x1f/0x40
[  382.302599]  ttynull_exit+0x10/0xed0 [ttynull 4b641f8e4981ec0e180c1229c097d2bddb0fbe2a]
[  382.302613]  __do_sys_delete_module.isra.0+0x19b/0x270
[  382.302648]  ? srso_alias_return_thunk+0x5/0xfbef5
[  382.302665]  do_syscall_64+0x7a/0x360
[  382.302692]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[  382.302703] RIP: 0033:0x7f2e95d18437
[  382.302715] Code: 73 01 c3 48 8b 0d 59 0a 2d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 29 0a 2d 00 f7 d8 64 89 01 48
[  382.302725] RSP: 002b:00007fff591d8468 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
[  382.302740] RAX: ffffffffffffffda RBX: 00007fff591d84c8 RCX: 00007f2e95d18437
[  382.302748] RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055fe1453b5e8
[  382.302757] RBP: 000055fe1453b580 R08: 00007fff591d73e1 R09: 0000000000000000
[  382.302765] R10: 00007f2e95d9d660 R11: 0000000000000206 R12: 00007fff591d86a0
[  382.302774] R13: 00007fff591da738 R14: 000055fe1453a3c0 R15: 000055fe1453b580
[  382.302834]  </TASK>
[  382.302843] irq event stamp: 2523
[  382.302851] hardirqs last  enabled at (2529): [<ffffffff9b40932e>] __up_console_sem+0x5e/0x70
[  382.302863] hardirqs last disabled at (2534): [<ffffffff9b409313>] __up_console_sem+0x43/0x70
[  382.302873] softirqs last  enabled at (1622): [<ffffffff9b35cb2c>] handle_softirqs+0x32c/0x400
[  382.302885] softirqs last disabled at (1617): [<ffffffff9b35cce9>] __irq_exit_rcu+0xd9/0x150
[  382.302896] ---[ end trace 0000000000000000 ]---
[  382.303050] device: 'ttynull': device_unregister
[  382.303445] PM: Removing info for No Bus:ttynull
[  382.303454] kobject: 'ttynull' (0000000012b1dfce): kobject_uevent_env
[  382.303459] kobject: 'ttynull' (0000000012b1dfce): fill_kobj_path: path = '/devices/virtual/tty/ttynull'
[  382.303494] kobject: 'ttynull' (0000000012b1dfce): kobject_cleanup, parent 0000000000000000
[  382.303498] kobject: 'ttynull' (0000000012b1dfce): calling ktype release
[  382.303502] kobject: 'ttynull': free name
[  382.303506] kobject: '(null)' (00000000eec627ea): kobject_cleanup, parent 0000000000000000
[  382.303516] kobject: '(null)' (00000000eec627ea): calling ktype release
[  382.325859] kobject: 'holders' (0000000087b71c70): kobject_cleanup, parent 0000000040844ced
[  382.325880] kobject: 'holders' (0000000087b71c70): auto cleanup kobject_del
[  382.325918] kobject: 'holders' (0000000087b71c70): calling ktype release
[  382.325931] kobject: (0000000087b71c70): dynamic_kobj_release
[  382.325943] kobject: 'holders': free name
[  382.326895] kobject: 'ttynull' (0000000040844ced): kobject_cleanup, parent 00000000546c84c0
[  382.326915] kobject: 'ttynull' (0000000040844ced): auto cleanup kobject_del
[  382.326928] kobject: 'ttynull' (0000000040844ced): auto cleanup 'remove' event
[  382.326941] kobject: 'ttynull' (0000000040844ced): kobject_uevent_env
[  382.326959] kobject: 'ttynull' (0000000040844ced): fill_kobj_path: path = '/module/ttynull'
[  382.327049] kobject: 'ttynull' (0000000040844ced): calling ktype release
[  382.327148] kobject: 'ttynull': free name

> The lifecycle of this classdev looks like this on registration:
> 
>     register_console(con)/printk_late_init()
>       console_register_device(con)
>         device_initialize(con->classdev) # kref_init: refcount = 1
>         device_add(con->classdev)        # get_device: refcount++ (to 2)

I think that the problem is here. It is true that device_add() gets
extra refcount at the beginning but it is decremented on success,
see:

int device_add(struct device *dev)
{
[...]
	dev = get_device(dev);		<----  refcount++ (to 2)
[...]
done:
	put_device(dev);		<----- refcount-- (back to 1)
	return error;

> At stable state, the refcount is two.
> 
> Console unregistration looks like this:
> 
>     unregister_console_locked(con)
>       struct device *dev = console->classdev;
>       console->classdev = NULL;
>       device_unregister(dev)
>         device_del(dev)
>           device_remove_class_symlinks(dev)
>             sysfs_delete_link()
>               kernfs_remove_by_name_ns()
>                 __kernfs_remove()
>                   kernfs_drain()
>                     kernfs_drain_open_files() # wait for close()
>           kobject_del(&dev->kobj) # removes from sysfs, does NOT change refcount
>         put_device(dev) # kref_put: refcount-- (from 2 to 1)
>       put_device(dev) # kref_put: refcount-- (from 1 to 0)

This extra put_device() is superfluous and must not be called,
see below.

>         kobject_release()
>           kobject_cleanup()
>             device_release()
>               console_classdev_release(dev)
>                 kfree(dev)

> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -4379,6 +4383,13 @@ static int unregister_console_locked(struct console *console)
>  	if (console->flags & CON_NBCON)
>  		nbcon_free(console);
>  
> +	if (console->classdev) {
> +		struct device *dev = console->classdev;
> +		console->classdev = NULL;
> +		device_unregister(dev);
> +		put_device(dev);

The WARNING has gone after I removed this extra put_device().
And it seems to work well. The sysfs interface gets removed...

Just for record. I tried to load/remove the "ttynull" module
several times and it worked as well.

> +	}
> +
>  	console_sysfs_notify();
>  
>  	if (console->exit)

Best Regards,
Petr
Re: [PATCH v7 07/13] printk: console: Introduce sysfs interface for per-console loglevels
Posted by Petr Mladek 1 week, 4 days ago
On Wed 2025-11-19 03:07:27, Chris Down wrote:
> A sysfs interface under /sys/class/console/ is created that permits
> viewing and configuring per-console attributes. This is the main
> interface with which we expect users to interact with and configure
> per-console loglevels.
> 
> --- a/Documentation/admin-guide/per-console-loglevel.rst
> +++ b/Documentation/admin-guide/per-console-loglevel.rst
> @@ -69,3 +69,41 @@ The default value for ``kernel.console_loglevel`` comes from
>  ``CONFIG_CONSOLE_LOGLEVEL_DEFAULT``, or ``CONFIG_CONSOLE_LOGLEVEL_QUIET`` if
>  ``quiet`` is passed on the kernel command line.
>  
> +Console attributes
> +~~~~~~~~~~~~~~~~~~
> +
> +Registered consoles are exposed at ``/sys/class/console``. For example, if you
> +are using ``ttyS0``, the console backing it can be viewed at
> +``/sys/class/console/ttyS0/``. The following files are available:
> +
> +* ``effective_loglevel`` (r): The effective loglevel after considering all
> +  loglevel authorities. For example, it shows the value of the console-specific
> +  loglevel when a console-specific loglevel is defined, and shows the global
> +  console loglevel value when the console-specific one is not defined.
> +
> +* ``effective_loglevel_source`` (r): The loglevel authority which resulted in
> +  the effective loglevel being set. The following values can be present:
> +
> +    * ``local``: The console-specific loglevel is in effect.
> +
> +    * ``global``: The global loglevel (``kernel.console_loglevel``) is in
> +      effect. Set a console-specific loglevel to override it.
> +
> +    * ``ignore_loglevel``: ``ignore_loglevel`` was specified on the kernel
> +      command line or at ``/sys/module/printk/parameters/ignore_loglevel``.
> +      Disable it to use level controls.
> +
> +* ``loglevel`` (rw): The local, console-specific loglevel for this console.
> +  This will be in effect if no other global control overrides it. Look at
> +  ``effective_loglevel`` and ``effective_loglevel_source`` to verify that.
> +
> +Deprecated
> +~~~~~~~~~~
> +
> +* ``kernel.printk`` sysctl: this takes four values, setting
> +  ``kernel.console_loglevel``, ``kernel.default_message_loglevel``, the minimum
> +  console loglevel, and a fourth unused value. The interface is generally
> +  considered to quite confusing, doesn't perform checks on the values given,

The seems to be a typo. I would use either of

   + "considered as quite confusing"
   + "considered to be quite confusing"

> +  and is unaware of per-console loglevel semantics.
> +
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -4010,6 +4010,9 @@ static void try_enable_default_console(struct console *newcon)
>  	if (newcon->index < 0)
>  		newcon->index = 0;
>  
> +	newcon->level = LOGLEVEL_DEFAULT;

This does not fit in this patch. A better place would be the 3rd patch
which added .level into struct console.

But I am not sure if we need it at all. The LOGLEVEL_DEFAULT will
be set by register_console() as a fallback anyway. It is the same
reason why I suggested to remove a similar code from
try_enable_preferred_console().

> +	newcon->classdev = NULL;

This should not be needed. struct console is defined statically
and this field should always be NULL before the console
is registered.

> +
>  	if (console_call_setup(newcon, NULL) != 0)
>  		return;
>  
> diff --git a/kernel/printk/sysfs.c b/kernel/printk/sysfs.c
> new file mode 100644
> index 000000000000..5252e6e04908
> --- /dev/null
> +++ b/kernel/printk/sysfs.c
> @@ -0,0 +1,213 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/kernel.h>
> +#include <linux/console.h>
> +#include <linux/device.h>
> +#include <linux/printk.h>
> +#include <linux/slab.h>
> +#include "internal.h"
> +
> +static const char *
> +console_effective_loglevel_source_str(const struct console *con)
> +{
> +	enum loglevel_source source;
> +	const char *str;
> +	int con_level;
> +	int cookie;
> +
> +	cookie = console_srcu_read_lock();
> +	con_level = console_srcu_read_loglevel(con);
> +	console_srcu_read_unlock(cookie);

I think that this was discussed in v6. But I guess that we did not
reach a consensus.

The SRCU read lock is not need here. In fact, it is wrong because
unregister_console_locked() calls device_unregister() after
synchronize_srcu(&console_srcu).

The sysfs API makes sure that struct console could not get removed
when the interface is accessed. I suggest to create a custom
wrappers for the sysfs interface. Something like:

/**
 * console_srcu_read_loglevel - Locklessly read the console specific loglevel
 *				when accessing the related sysfs interface
 * @con:	struct console pointer of console to read loglevel from
 *
 * Locklessly reading @con->level provides a consistent read value because
 * there is at most one CPU modifying @con->level and that CPU is using only
 * read-modify-write operations to do so.
 *
 * Only use this function to read flags via the related sysfs interface.
 * The sysfs API makes sure that the structure could not disappear while
 * the interface is used.
 *
 * Context: Sysfs interface for the given console.
 * Return: The current value of the @con->level field.
 */
static inline
int console_sysfs_read_loglevel(const struct console *con)
{
	/*
	 * The READ_ONCE() matches the WRITE_ONCE() when @level is modified
	 * for registered consoles.
	 */
	return data_race(READ_ONCE(con->level));
}

/**
 * console_sysfs_write_flags - Write the console specific loglevel via
 *	sysfs interface.
 * @con:	struct console pointer of console to write flags to
 * @flags:	new flags value to write
 *
 * Only use this function to write flags via the related sysfs interface.
 * The sysfs API makes sure that the structure could not disappear while
 * the interface is used.
 *
 * Context: Any context.
 */
static inline
void console_sysfs_write_loglevel(struct console *con, int con_level)
{
	/* This matches the READ_ONCE() in console_srcu_read_flags(). */
	WRITE_ONCE(con->level, con_level);
}

> +	source = console_effective_loglevel_source(con_level);
> +
> +	switch (source) {
> +	case LLS_IGNORE_LOGLEVEL:
> +		str = "ignore_loglevel";
> +		break;
> +	case LLS_LOCAL:
> +		str = "local";
> +		break;
> +	case LLS_GLOBAL:
> +		str = "global";
> +		break;
> +	default:
> +		str = "unknown";
> +		break;
> +	}
> +
> +	return str;
> +}
> +
> +static ssize_t effective_loglevel_source_show(struct device *dev,
> +					      struct device_attribute *attr,
> +					      char *buf)
> +{
> +	struct console *con = dev_get_drvdata(dev);
> +	const char *str;
> +
> +	str = console_effective_loglevel_source_str(con);
> +	return sysfs_emit(buf, "%s\n", str);
> +}
> +
> +static DEVICE_ATTR_RO(effective_loglevel_source);
> +
> +static ssize_t effective_loglevel_show(struct device *dev,
> +				       struct device_attribute *attr, char *buf)
> +{
> +	struct console *con = dev_get_drvdata(dev);
> +	int con_level;
> +	int cookie;
> +
> +	cookie = console_srcu_read_lock();
> +	con_level = console_srcu_read_loglevel(con);
> +	console_srcu_read_unlock(cookie);

Same here.

> +	return sysfs_emit(buf, "%d\n", console_effective_loglevel(con_level));
> +}
> +
> +static DEVICE_ATTR_RO(effective_loglevel);
> +
> +static ssize_t loglevel_show(struct device *dev, struct device_attribute *attr,
> +			     char *buf)
> +{
> +	struct console *con = dev_get_drvdata(dev);
> +	int con_level;
> +	int cookie;
> +
> +	cookie = console_srcu_read_lock();
> +	con_level = console_srcu_read_loglevel(con);
> +	console_srcu_read_unlock(cookie);

and here.

> +	return sysfs_emit(buf, "%d\n", con_level);
> +}
> +
> +static ssize_t loglevel_store(struct device *dev, struct device_attribute *attr,
> +			      const char *buf, size_t size)
> +{
> +	struct console *con = dev_get_drvdata(dev);
> +	ssize_t ret;
> +	int level;
> +	int cookie;
> +
> +	ret = kstrtoint(buf, 10, &level);
> +	if (ret < 0)
> +		return ret;
> +
> +	/* -1 means "use global loglevel" */
> +	if (level == -1)
> +		goto out;
> +
> +	/*
> +	 * Reject level 0 (KERN_EMERG) - per-console loglevel must be > 0.
> +	 * Emergency messages should go to all consoles, so they cannot be
> +	 * filtered per-console.
> +	 */
> +	if (level == 0)
> +		return -ERANGE;
> +
> +	if (console_clamp_loglevel(level) != level)
> +		return -ERANGE;
> +
> +	/*
> +	 * If the system has a minimum console loglevel set (via sysctl or
> +	 * kernel parameter), enforce it. This prevents setting per-console
> +	 * loglevels below the system minimum.
> +	 */
> +	if (minimum_console_loglevel > CONSOLE_LOGLEVEL_MIN &&
> +	    level < minimum_console_loglevel)
> +		return -ERANGE;
> +
> +out:
> +	cookie = console_srcu_read_lock();
> +	WRITE_ONCE(con->level, level);
> +	console_srcu_read_unlock(cookie);

and here. Note that we need to add the data_race() as discussed
in v6.

> +
> +	return size;
> +}
> +


I propose to squash the following changes into this patch:

diff --git a/Documentation/admin-guide/per-console-loglevel.rst b/Documentation/admin-guide/per-console-loglevel.rst
index 69eede12e20f..f621a6785f81 100644
--- a/Documentation/admin-guide/per-console-loglevel.rst
+++ b/Documentation/admin-guide/per-console-loglevel.rst
@@ -255,7 +255,7 @@ Deprecated
 * ``kernel.printk`` sysctl: this takes four values, setting
   ``kernel.console_loglevel``, ``kernel.default_message_loglevel``, the minimum
   console loglevel, and a fourth unused value. The interface is generally
-  considered to quite confusing, doesn't perform checks on the values given,
+  considered to be quite confusing, doesn't perform checks on the values given,
   and is unaware of per-console loglevel semantics.
 
 Chris Down <chris@chrisdown.name>, 18-November-2025
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 170fa8e14ea9..e73276880d51 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -4118,9 +4118,6 @@ static void try_enable_default_console(struct console *newcon)
 	if (newcon->index < 0)
 		newcon->index = 0;
 
-	newcon->level = LOGLEVEL_DEFAULT;
-	newcon->classdev = NULL;
-
 	if (console_call_setup(newcon, NULL) != 0)
 		return;
 
diff --git a/kernel/printk/sysfs.c b/kernel/printk/sysfs.c
index 5252e6e04908..fc029b425809 100644
--- a/kernel/printk/sysfs.c
+++ b/kernel/printk/sysfs.c
@@ -6,17 +6,57 @@
 #include <linux/slab.h>
 #include "internal.h"
 
+/**
+ * console_srcu_read_loglevel - Locklessly read the console specific loglevel
+ *				when accessing the related sysfs interface
+ * @con:	struct console pointer of console to read loglevel from
+ *
+ * Locklessly reading @con->level provides a consistent read value because
+ * there is at most one CPU modifying @con->level and that CPU is using only
+ * read-modify-write operations to do so.
+ *
+ * Only use this function to read flags via the related sysfs interface.
+ * The sysfs API makes sure that the structure could not disappear while
+ * the interface is used.
+ *
+ * Context: Sysfs interface for the given console.
+ * Return: The current value of the @con->level field.
+ */
+static inline
+int console_sysfs_read_loglevel(const struct console *con)
+{
+	/*
+	 * The READ_ONCE() matches the WRITE_ONCE() when @level is modified
+	 * for registered consoles.
+	 */
+	return data_race(READ_ONCE(con->level));
+}
+
+/**
+ * console_sysfs_write_flags - Write the console specific loglevel via
+ *	sysfs interface.
+ * @con:	struct console pointer of console to write flags to
+ * @flags:	new flags value to write
+ *
+ * Only use this function to write flags via the related sysfs interface.
+ * The sysfs API makes sure that the structure could not disappear while
+ * the interface is used.
+ *
+ * Context: Any context.
+ */
+static inline
+void console_sysfs_write_loglevel(struct console *con, int con_level)
+{
+	/* This matches the READ_ONCE() in console_srcu_read_flags(). */
+	WRITE_ONCE(con->level, con_level);
+}
+
 static const char *
-console_effective_loglevel_source_str(const struct console *con)
+console_effective_loglevel_source_str(int con_level)
 {
 	enum loglevel_source source;
 	const char *str;
-	int con_level;
-	int cookie;
 
-	cookie = console_srcu_read_lock();
-	con_level = console_srcu_read_loglevel(con);
-	console_srcu_read_unlock(cookie);
 	source = console_effective_loglevel_source(con_level);
 
 	switch (source) {
@@ -43,8 +83,10 @@ static ssize_t effective_loglevel_source_show(struct device *dev,
 {
 	struct console *con = dev_get_drvdata(dev);
 	const char *str;
+	int con_level;
 
-	str = console_effective_loglevel_source_str(con);
+	con_level = console_sysfs_read_loglevel(con);
+	str = console_effective_loglevel_source_str(con_level);
 	return sysfs_emit(buf, "%s\n", str);
 }
 
@@ -55,11 +97,8 @@ static ssize_t effective_loglevel_show(struct device *dev,
 {
 	struct console *con = dev_get_drvdata(dev);
 	int con_level;
-	int cookie;
 
-	cookie = console_srcu_read_lock();
-	con_level = console_srcu_read_loglevel(con);
-	console_srcu_read_unlock(cookie);
+	con_level = console_sysfs_read_loglevel(con);
 	return sysfs_emit(buf, "%d\n", console_effective_loglevel(con_level));
 }
 
@@ -70,11 +109,8 @@ static ssize_t loglevel_show(struct device *dev, struct device_attribute *attr,
 {
 	struct console *con = dev_get_drvdata(dev);
 	int con_level;
-	int cookie;
 
-	cookie = console_srcu_read_lock();
-	con_level = console_srcu_read_loglevel(con);
-	console_srcu_read_unlock(cookie);
+	con_level = console_sysfs_read_loglevel(con);
 	return sysfs_emit(buf, "%d\n", con_level);
 }
 
@@ -84,7 +120,6 @@ static ssize_t loglevel_store(struct device *dev, struct device_attribute *attr,
 	struct console *con = dev_get_drvdata(dev);
 	ssize_t ret;
 	int level;
-	int cookie;
 
 	ret = kstrtoint(buf, 10, &level);
 	if (ret < 0)
@@ -115,9 +150,7 @@ static ssize_t loglevel_store(struct device *dev, struct device_attribute *attr,
 		return -ERANGE;
 
 out:
-	cookie = console_srcu_read_lock();
-	WRITE_ONCE(con->level, level);
-	console_srcu_read_unlock(cookie);
+	console_sysfs_write_loglevel(con, level);
 
 	return size;
 }

Best Regards,
Petr