Currently, there is no convenient way to see the info that the
ref_tracking infrastructure collects. Attempt to create a file in
debugfs when called from ref_tracker_dir_init().
The file is given the name "class@%px", as having the unmodified address
is helpful for debugging. This should be safe since this directory is only
accessible by root
While ref_tracker_dir_init() is generally called from a context where
sleeping is OK, ref_tracker_dir_exit() can be called from anywhere.
Thus, dentry cleanup must be handled asynchronously.
Add a new global xarray that has entries with the ref_tracker_dir
pointer as the index and the corresponding debugfs dentry pointer as the
value. Instead of removing the debugfs dentry, have
ref_tracker_dir_exit() set a mark on the xarray entry and schedule a
workqueue job. The workqueue job then walks the xarray looking for
marked entries, and removes their xarray entries and the debugfs
dentries.
Because of this, the debugfs dentry can outlive the corresponding
ref_tracker_dir. Have ref_tracker_debugfs_show() take extra care to
ensure that it's safe to dereference the dir pointer before using it.
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
include/linux/ref_tracker.h | 17 +++++
lib/ref_tracker.c | 152 ++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 164 insertions(+), 5 deletions(-)
diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index 3968f993db81e95c0d58c81454311841c1b9cd35..28bbf436a8f4646cfac181d618195a9460bda196 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -26,6 +26,18 @@ struct ref_tracker_dir {
#ifdef CONFIG_REF_TRACKER
+#ifdef CONFIG_DEBUG_FS
+
+void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir);
+
+#else /* CONFIG_DEBUG_FS */
+
+static inline void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
unsigned int quarantine_count,
const char *class,
@@ -40,6 +52,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
refcount_set(&dir->no_tracker, 1);
dir->class = class;
strscpy(dir->name, name, sizeof(dir->name));
+ ref_tracker_dir_debugfs(dir);
stack_depot_init();
}
@@ -68,6 +81,10 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
{
}
+static inline void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir)
+{
+}
+
static inline void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
{
}
diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index 73b606570cce9e551d13e65365a87dc4ce748b13..c938ef56954b2169458e9b23a3db2441bcf91aa6 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -29,6 +29,40 @@ struct ref_tracker_dir_stats {
} stacks[];
};
+#ifdef CONFIG_DEBUG_FS
+#include <linux/xarray.h>
+
+/*
+ * ref_tracker_dir_init() is usually called in allocation-safe contexts, but
+ * the same is not true of ref_tracker_dir_exit() which can be called from
+ * anywhere an object is freed. Removing debugfs dentries is a blocking
+ * operation, so we defer that work to the debugfs_reap_worker.
+ *
+ * Each dentry is tracked in the appropriate xarray. When
+ * ref_tracker_dir_exit() is called, its entries in the xarrays are marked and
+ * the workqueue job is scheduled. The worker then runs and deletes any marked
+ * dentries asynchronously.
+ */
+static struct xarray debugfs_dentries;
+static struct work_struct debugfs_reap_worker;
+
+#define REF_TRACKER_DIR_DEAD XA_MARK_0
+static inline void ref_tracker_debugfs_mark(struct ref_tracker_dir *dir)
+{
+ unsigned long flags;
+
+ xa_lock_irqsave(&debugfs_dentries, flags);
+ __xa_set_mark(&debugfs_dentries, (unsigned long)dir, REF_TRACKER_DIR_DEAD);
+ xa_unlock_irqrestore(&debugfs_dentries, flags);
+
+ schedule_work(&debugfs_reap_worker);
+}
+#else
+static inline void ref_tracker_debugfs_mark(struct ref_tracker_dir *dir)
+{
+}
+#endif
+
static struct ref_tracker_dir_stats *
ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
{
@@ -185,6 +219,11 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
bool leak = false;
dir->dead = true;
+ /*
+ * The xarray entries must be marked before the dir->lock is taken to
+ * protect simultaneous debugfs readers.
+ */
+ ref_tracker_debugfs_mark(dir);
spin_lock_irqsave(&dir->lock, flags);
list_for_each_entry_safe(tracker, n, &dir->quarantine, head) {
list_del(&tracker->head);
@@ -312,23 +351,126 @@ static void __ostream_printf pr_ostream_seq(struct ostream *stream, char *fmt, .
va_end(args);
}
-static __maybe_unused int
-ref_tracker_dir_seq_print(struct ref_tracker_dir *dir, struct seq_file *seq)
+static int ref_tracker_dir_seq_print(struct ref_tracker_dir *dir, struct seq_file *seq)
{
struct ostream os = { .func = pr_ostream_seq,
.prefix = "",
.seq = seq };
- unsigned long flags;
- spin_lock_irqsave(&dir->lock, flags);
__ref_tracker_dir_pr_ostream(dir, 16, &os);
- spin_unlock_irqrestore(&dir->lock, flags);
return os.used;
}
+static int ref_tracker_debugfs_show(struct seq_file *f, void *v)
+{
+ struct ref_tracker_dir *dir = f->private;
+ unsigned long index = (unsigned long)dir;
+ unsigned long flags;
+ int ret;
+
+ /*
+ * "dir" may not exist at this point if ref_tracker_dir_exit() has
+ * already been called. Take care not to dereference it until its
+ * legitimacy is established.
+ *
+ * The xa_lock is necessary to ensure that "dir" doesn't disappear
+ * before its lock can be taken. If it's in the hash and not marked
+ * dead, then it's safe to take dir->lock which prevents
+ * ref_tracker_dir_exit() from completing. Once the dir->lock is
+ * acquired, the xa_lock can be released. All of this must be IRQ-safe.
+ */
+ xa_lock_irqsave(&debugfs_dentries, flags);
+ if (!xa_load(&debugfs_dentries, index) ||
+ xa_get_mark(&debugfs_dentries, index, REF_TRACKER_DIR_DEAD)) {
+ xa_unlock_irqrestore(&debugfs_dentries, flags);
+ return -ENODATA;
+ }
+
+ spin_lock(&dir->lock);
+ xa_unlock(&debugfs_dentries);
+ ret = ref_tracker_dir_seq_print(dir, f);
+ spin_unlock_irqrestore(&dir->lock, flags);
+ return ret;
+}
+
+static int ref_tracker_debugfs_open(struct inode *inode, struct file *filp)
+{
+ struct ref_tracker_dir *dir = inode->i_private;
+
+ return single_open(filp, ref_tracker_debugfs_show, dir);
+}
+
+static const struct file_operations ref_tracker_debugfs_fops = {
+ .owner = THIS_MODULE,
+ .open = ref_tracker_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+/**
+ * ref_tracker_dir_debugfs - create debugfs file for ref_tracker_dir
+ * @dir: ref_tracker_dir to be associated with debugfs file
+ *
+ * In most cases, a debugfs file will be created automatically for every
+ * ref_tracker_dir. If the object was created before debugfs is brought up
+ * then that may fail. In those cases, it is safe to call this at a later
+ * time to create the file.
+ */
+void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir)
+{
+ char name[NAME_MAX + 1];
+ struct dentry *dentry;
+ int ret;
+
+ /* No-op if already created */
+ dentry = xa_load(&debugfs_dentries, (unsigned long)dir);
+ if (dentry && !xa_is_err(dentry))
+ return;
+
+ ret = snprintf(name, sizeof(name), "%s@%px", dir->class, dir);
+ name[sizeof(name) - 1] = '\0';
+
+ if (ret < sizeof(name)) {
+ dentry = debugfs_create_file(name, S_IFREG | 0400,
+ ref_tracker_debug_dir, dir,
+ &ref_tracker_debugfs_fops);
+ if (!IS_ERR(dentry)) {
+ void *old;
+
+ old = xa_store_irq(&debugfs_dentries, (unsigned long)dir,
+ dentry, GFP_KERNEL);
+
+ if (xa_is_err(old))
+ debugfs_remove(dentry);
+ else
+ WARN_ON_ONCE(old);
+ }
+ }
+}
+EXPORT_SYMBOL(ref_tracker_dir_debugfs);
+
+static void debugfs_reap_work(struct work_struct *work)
+{
+ struct dentry *dentry;
+ unsigned long index;
+ bool reaped;
+
+ do {
+ reaped = false;
+ xa_for_each_marked(&debugfs_dentries, index, dentry, REF_TRACKER_DIR_DEAD) {
+ xa_erase_irq(&debugfs_dentries, index);
+ debugfs_remove(dentry);
+ reaped = true;
+ }
+ } while (reaped);
+}
+
static int __init ref_tracker_debugfs_init(void)
{
+ INIT_WORK(&debugfs_reap_worker, debugfs_reap_work);
+ xa_init_flags(&debugfs_dentries, XA_FLAGS_LOCK_IRQ);
ref_tracker_debug_dir = debugfs_create_dir("ref_tracker", NULL);
return 0;
}
--
2.49.0
On Wed, Jun 18, 2025 at 10:24:19AM -0400, Jeff Layton wrote:
> [...]
> The file is given the name "class@%px", as having the unmodified address
> is helpful for debugging. This should be safe since this directory is only
> accessible by root
> [...]
> +void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir)
> +{
> + char name[NAME_MAX + 1];
> + struct dentry *dentry;
> + int ret;
> +
> + /* No-op if already created */
> + dentry = xa_load(&debugfs_dentries, (unsigned long)dir);
> + if (dentry && !xa_is_err(dentry))
> + return;
> +
> + ret = snprintf(name, sizeof(name), "%s@%px", dir->class, dir);
> + name[sizeof(name) - 1] = '\0';
Yikes! Never use %px, and especially don't use it for a stable
identifier nor expose it to userspace like this. If you absolutely must,
use %p, but never %px. This is a kernel address leak:
https://docs.kernel.org/process/deprecated.html#p-format-specifier
"helpful for debugging" is not a sufficiently good reason; and "only
accessible by root" has nothing to do with kernel address integrity.
Those kinds of things are (roughly) managed by various capabilities,
not DAC uid==0.
--
Kees Cook
On Wed, 2025-07-30 at 16:07 -0700, Kees Cook wrote:
> On Wed, Jun 18, 2025 at 10:24:19AM -0400, Jeff Layton wrote:
> > [...]
> > The file is given the name "class@%px", as having the unmodified address
> > is helpful for debugging. This should be safe since this directory is only
> > accessible by root
> > [...]
> > +void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir)
> > +{
> > + char name[NAME_MAX + 1];
> > + struct dentry *dentry;
> > + int ret;
> > +
> > + /* No-op if already created */
> > + dentry = xa_load(&debugfs_dentries, (unsigned long)dir);
> > + if (dentry && !xa_is_err(dentry))
> > + return;
> > +
> > + ret = snprintf(name, sizeof(name), "%s@%px", dir->class, dir);
> > + name[sizeof(name) - 1] = '\0';
>
> Yikes! Never use %px, and especially don't use it for a stable
> identifier nor expose it to userspace like this. If you absolutely must,
> use %p, but never %px. This is a kernel address leak:
> https://docs.kernel.org/process/deprecated.html#p-format-specifier
>
> "helpful for debugging" is not a sufficiently good reason; and "only
> accessible by root" has nothing to do with kernel address integrity.
> Those kinds of things are (roughly) managed by various capabilities,
> not DAC uid==0.
From the link above:
"If you think you can justify it (in comments and commit log) well
enough to stand up to Linus’s scrutiny, maybe you can use “%px”, along
with making sure you have sensible permissions."
Is making it only accessible by root not sensible enough? What are
"sensible permissions" in this instance?
Those questions asked, I'm not dead-set on using %px here. I just
figured it would be more convenient to have the actual address if you
needed to go poke around with drgn. We can change it to %p (or
something else) if it's really a problem.
--
Jeff Layton <jlayton@kernel.org>
On Thu, Jul 31, 2025 at 06:29:00AM -0400, Jeff Layton wrote: > "If you think you can justify it (in comments and commit log) well > enough to stand up to Linus’s scrutiny, maybe you can use “%px”, along > with making sure you have sensible permissions." > > Is making it only accessible by root not sensible enough? What are > "sensible permissions" in this instance? Yes, I should have been more clear (or probably should update the document), but root (uid==0) isn't a sufficient permission check, as address exposure is supposed to be bounded by capabilities. Putting a filename into the tree exposes the address to anything that can get a file listing, and DAC access control isn't granular enough. (Thank you again for the fix patch I saw in the other thread!) -- Kees Cook
© 2016 - 2026 Red Hat, Inc.