[PATCH] vfs: transitive upgrade restrictions for fds

Jori Koolstra posted 1 patch 1 week, 3 days ago
There is a newer version of this series
fs/file_table.c                  |  2 ++
fs/internal.h                    |  1 +
fs/namei.c                       | 38 ++++++++++++++++++++++++++++----
fs/open.c                        |  9 ++++++++
fs/proc/base.c                   | 24 ++++++++++++++------
fs/proc/fd.c                     |  6 ++++-
fs/proc/internal.h               |  4 +++-
include/linux/fcntl.h            |  6 ++++-
include/linux/fs.h               |  1 +
include/linux/namei.h            | 15 ++++++++++++-
include/uapi/asm-generic/fcntl.h |  4 ++++
include/uapi/linux/openat2.h     |  1 +
12 files changed, 96 insertions(+), 15 deletions(-)
[PATCH] vfs: transitive upgrade restrictions for fds
Posted by Jori Koolstra 1 week, 3 days ago
Add upgrade restrictions to openat2(). Extend struct open_how to allow
setting transitive restrictions on using file descriptors to open other
files. A use case for this feature is to block services or containers
from re-opening/upgrading an O_PATH file descriptor through e.g.
/proc/<pid>/fd/<nr as O_WRONLY.

The idea for this features comes form the UAPI group kernel feature idea
list [1].

[1] https://github.com/uapi-group/kernel-features?tab=readme-ov-file#upgrade-masks-in-openat2

Signed-off-by: Jori Koolstra <jkoolstra@xs4all.nl>
---
 fs/file_table.c                  |  2 ++
 fs/internal.h                    |  1 +
 fs/namei.c                       | 38 ++++++++++++++++++++++++++++----
 fs/open.c                        |  9 ++++++++
 fs/proc/base.c                   | 24 ++++++++++++++------
 fs/proc/fd.c                     |  6 ++++-
 fs/proc/internal.h               |  4 +++-
 include/linux/fcntl.h            |  6 ++++-
 include/linux/fs.h               |  1 +
 include/linux/namei.h            | 15 ++++++++++++-
 include/uapi/asm-generic/fcntl.h |  4 ++++
 include/uapi/linux/openat2.h     |  1 +
 12 files changed, 96 insertions(+), 15 deletions(-)

diff --git a/fs/file_table.c b/fs/file_table.c
index aaa5faaace1e..b98038009fd2 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -196,6 +196,8 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
 	f->f_wb_err	= 0;
 	f->f_sb_err	= 0;
 
+	f->f_allowed_upgrades = VALID_UPGRADE_FLAGS;
+
 	/*
 	 * We're SLAB_TYPESAFE_BY_RCU so initialize f_ref last. While
 	 * fget-rcu pattern users need to be able to handle spurious
diff --git a/fs/internal.h b/fs/internal.h
index cbc384a1aa09..0a37bb208184 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -189,6 +189,7 @@ struct open_flags {
 	int acc_mode;
 	int intent;
 	int lookup_flags;
+	unsigned int allowed_upgrades;
 };
 extern struct file *do_file_open(int dfd, struct filename *pathname,
 		const struct open_flags *op);
diff --git a/fs/namei.c b/fs/namei.c
index 58f715f7657e..3982908ff995 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -743,6 +743,7 @@ struct nameidata {
 	int		dfd;
 	vfsuid_t	dir_vfsuid;
 	umode_t		dir_mode;
+	unsigned int	allowed_upgrades;
 } __randomize_layout;
 
 #define ND_ROOT_PRESET 1
@@ -760,6 +761,7 @@ static void __set_nameidata(struct nameidata *p, int dfd, struct filename *name)
 	p->path.mnt = NULL;
 	p->path.dentry = NULL;
 	p->total_link_count = old ? old->total_link_count : 0;
+	p->allowed_upgrades = VALID_UPGRADE_FLAGS;
 	p->saved = old;
 	current->nameidata = p;
 }
@@ -1155,12 +1157,11 @@ static int nd_jump_root(struct nameidata *nd)
 	nd->state |= ND_JUMPED;
 	return 0;
 }
-
 /*
  * Helper to directly jump to a known parsed path from ->get_link,
  * caller must have taken a reference to path beforehand.
  */
-int nd_jump_link(const struct path *path)
+int nd_jump_link_how(const struct path *path, const struct jump_how how)
 {
 	int error = -ELOOP;
 	struct nameidata *nd = current->nameidata;
@@ -1181,6 +1182,7 @@ int nd_jump_link(const struct path *path)
 	nd->path = *path;
 	nd->inode = nd->path.dentry->d_inode;
 	nd->state |= ND_JUMPED;
+	nd->allowed_upgrades &= how.allowed_upgrades;
 	return 0;
 
 err:
@@ -2738,6 +2740,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
 		if (fd_empty(f))
 			return ERR_PTR(-EBADF);
 
+		nd->allowed_upgrades = fd_file(f)->f_allowed_upgrades;
+
 		if (flags & LOOKUP_LINKAT_EMPTY) {
 			if (fd_file(f)->f_cred != current_cred() &&
 			    !ns_capable(fd_file(f)->f_cred->user_ns, CAP_DAC_READ_SEARCH))
@@ -4266,6 +4270,28 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path,
 	return 0;
 }
 
+static bool may_upgrade(const int flag, const unsigned int allowed_upgrades)
+{
+	int mode = flag & O_ACCMODE;
+	unsigned int allowed = allowed_upgrades & ~DENY_UPGRADES;
+
+	if (mode != O_WRONLY && !(allowed & READ_UPGRADABLE))
+		return false;
+	if (mode != O_RDONLY && !(allowed & WRITE_UPGRADABLE))
+		return false;
+	return true;
+}
+
+static int may_open_upgrade(struct mnt_idmap *idmap, const struct path *path,
+			    int acc_mode, int flag,
+			    const unsigned int allowed_upgrades)
+{
+	if (!may_upgrade(flag, allowed_upgrades))
+		return -EACCES;
+
+	return may_open(idmap, path, acc_mode, flag);
+}
+
 static int handle_truncate(struct mnt_idmap *idmap, struct file *filp)
 {
 	const struct path *path = &filp->f_path;
@@ -4666,7 +4692,8 @@ static int do_open(struct nameidata *nd,
 			return error;
 		do_truncate = true;
 	}
-	error = may_open(idmap, &nd->path, acc_mode, open_flag);
+	error = may_open_upgrade(idmap, &nd->path, acc_mode, open_flag,
+				 nd->allowed_upgrades);
 	if (!error && !(file->f_mode & FMODE_OPENED))
 		error = vfs_open(&nd->path, file);
 	if (!error)
@@ -4831,8 +4858,11 @@ static struct file *path_openat(struct nameidata *nd,
 		terminate_walk(nd);
 	}
 	if (likely(!error)) {
-		if (likely(file->f_mode & FMODE_OPENED))
+		if (likely(file->f_mode & FMODE_OPENED)) {
+			file->f_allowed_upgrades =
+				op->allowed_upgrades & nd->allowed_upgrades;
 			return file;
+		}
 		WARN_ON(1);
 		error = -EINVAL;
 	}
diff --git a/fs/open.c b/fs/open.c
index 91f1139591ab..212a1d260947 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1167,6 +1167,7 @@ inline struct open_how build_open_how(int flags, umode_t mode)
 	struct open_how how = {
 		.flags = flags & VALID_OPEN_FLAGS,
 		.mode = mode & S_IALLUGO,
+		.allowed_upgrades = VALID_UPGRADE_FLAGS
 	};
 
 	/* O_PATH beats everything else. */
@@ -1300,6 +1301,14 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
 	}
 
 	op->lookup_flags = lookup_flags;
+
+	if (how->allowed_upgrades == 0)
+		op->allowed_upgrades = VALID_UPGRADE_FLAGS;
+	else if (how->allowed_upgrades & ~VALID_UPGRADE_FLAGS)
+		return -EINVAL;
+	else
+		op->allowed_upgrades = how->allowed_upgrades;
+
 	return 0;
 }
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4c863d17dfb4..84c54f9dffd9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -218,7 +218,8 @@ static int get_task_root(struct task_struct *task, struct path *root)
 	return result;
 }
 
-static int proc_cwd_link(struct dentry *dentry, struct path *path)
+static int proc_cwd_link(struct dentry *dentry, struct path *path,
+			 struct jump_how *jump_how)
 {
 	struct task_struct *task = get_proc_task(d_inode(dentry));
 	int result = -ENOENT;
@@ -227,6 +228,7 @@ static int proc_cwd_link(struct dentry *dentry, struct path *path)
 		task_lock(task);
 		if (task->fs) {
 			get_fs_pwd(task->fs, path);
+			*jump_how = JUMP_HOW_UNRESTRICTED;
 			result = 0;
 		}
 		task_unlock(task);
@@ -235,7 +237,8 @@ static int proc_cwd_link(struct dentry *dentry, struct path *path)
 	return result;
 }
 
-static int proc_root_link(struct dentry *dentry, struct path *path)
+static int proc_root_link(struct dentry *dentry, struct path *path,
+			  struct jump_how *jump_how)
 {
 	struct task_struct *task = get_proc_task(d_inode(dentry));
 	int result = -ENOENT;
@@ -243,6 +246,7 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
 	if (task) {
 		result = get_task_root(task, path);
 		put_task_struct(task);
+		*jump_how = JUMP_HOW_UNRESTRICTED;
 	}
 	return result;
 }
@@ -1777,7 +1781,8 @@ static const struct file_operations proc_pid_set_comm_operations = {
 	.release	= single_release,
 };
 
-static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
+static int proc_exe_link(struct dentry *dentry, struct path *exe_path,
+			 struct jump_how *jump_how)
 {
 	struct task_struct *task;
 	struct file *exe_file;
@@ -1789,6 +1794,7 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
 	put_task_struct(task);
 	if (exe_file) {
 		*exe_path = exe_file->f_path;
+		*jump_how = JUMP_HOW_UNRESTRICTED;
 		path_get(&exe_file->f_path);
 		fput(exe_file);
 		return 0;
@@ -1801,6 +1807,7 @@ static const char *proc_pid_get_link(struct dentry *dentry,
 				     struct delayed_call *done)
 {
 	struct path path;
+	struct jump_how jump_how;
 	int error = -EACCES;
 
 	if (!dentry)
@@ -1810,11 +1817,11 @@ static const char *proc_pid_get_link(struct dentry *dentry,
 	if (!proc_fd_access_allowed(inode))
 		goto out;
 
-	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
+	error = PROC_I(inode)->op.proc_get_link(dentry, &path, &jump_how);
 	if (error)
 		goto out;
 
-	error = nd_jump_link(&path);
+	error = nd_jump_link_how(&path, jump_how);
 out:
 	return ERR_PTR(error);
 }
@@ -1848,12 +1855,13 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
 	int error = -EACCES;
 	struct inode *inode = d_inode(dentry);
 	struct path path;
+	struct jump_how jump_how;
 
 	/* Are we allowed to snoop on the tasks file descriptors? */
 	if (!proc_fd_access_allowed(inode))
 		goto out;
 
-	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
+	error = PROC_I(inode)->op.proc_get_link(dentry, &path, &jump_how);
 	if (error)
 		goto out;
 
@@ -2250,7 +2258,8 @@ static const struct dentry_operations tid_map_files_dentry_operations = {
 	.d_delete	= pid_delete_dentry,
 };
 
-static int map_files_get_link(struct dentry *dentry, struct path *path)
+static int map_files_get_link(struct dentry *dentry, struct path *path,
+			      struct jump_how *jump_how)
 {
 	unsigned long vm_start, vm_end;
 	struct vm_area_struct *vma;
@@ -2279,6 +2288,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
 	rc = -ENOENT;
 	vma = find_exact_vma(mm, vm_start, vm_end);
 	if (vma && vma->vm_file) {
+		*jump_how = JUMP_HOW_UNRESTRICTED;
 		*path = *file_user_path(vma->vm_file);
 		path_get(path);
 		rc = 0;
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 9eeccff49b2a..344485e8cb6f 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -171,7 +171,8 @@ static const struct dentry_operations tid_fd_dentry_operations = {
 	.d_delete	= pid_delete_dentry,
 };
 
-static int proc_fd_link(struct dentry *dentry, struct path *path)
+static int proc_fd_link(struct dentry *dentry, struct path *path,
+			struct jump_how *jump_how)
 {
 	struct task_struct *task;
 	int ret = -ENOENT;
@@ -183,6 +184,9 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
 
 		fd_file = fget_task(task, fd);
 		if (fd_file) {
+			*jump_how = (struct jump_how) {
+				.allowed_upgrades = fd_file->f_allowed_upgrades
+			};
 			*path = fd_file->f_path;
 			path_get(&fd_file->f_path);
 			ret = 0;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c1e8eb984da8..42f668059a30 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -14,6 +14,7 @@
 #include <linux/sched/coredump.h>
 #include <linux/sched/task.h>
 #include <linux/mm.h>
+#include <linux/namei.h>
 
 struct ctl_table_header;
 struct mempolicy;
@@ -107,7 +108,8 @@ extern struct kmem_cache *proc_dir_entry_cache;
 void pde_free(struct proc_dir_entry *pde);
 
 union proc_op {
-	int (*proc_get_link)(struct dentry *, struct path *);
+	int (*proc_get_link)(struct dentry *, struct path *,
+		struct jump_how *);
 	int (*proc_show)(struct seq_file *m,
 		struct pid_namespace *ns, struct pid *pid,
 		struct task_struct *task);
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index a332e79b3207..6b15b488d542 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -12,6 +12,9 @@
 	 FASYNC	| O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \
 	 O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE)
 
+#define VALID_UPGRADE_FLAGS \
+	(DENY_UPGRADES | READ_UPGRADABLE | WRITE_UPGRADABLE)
+
 /* List of all valid flags for the how->resolve argument: */
 #define VALID_RESOLVE_FLAGS \
 	(RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS | \
@@ -19,7 +22,8 @@
 
 /* List of all open_how "versions". */
 #define OPEN_HOW_SIZE_VER0	24 /* sizeof first published struct */
-#define OPEN_HOW_SIZE_LATEST	OPEN_HOW_SIZE_VER0
+#define OPEN_HOW_SIZE_VER1	32 /* added allowed_upgrades */
+#define OPEN_HOW_SIZE_LATEST	OPEN_HOW_SIZE_VER1
 
 #ifndef force_o_largefile
 #define force_o_largefile() (!IS_ENABLED(CONFIG_ARCH_32BIT_OFF_T))
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b3dd145b25e..697d2fc6322b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1296,6 +1296,7 @@ struct file {
 	};
 	file_ref_t			f_ref;
 	/* --- cacheline 3 boundary (192 bytes) --- */
+	unsigned int			f_allowed_upgrades;
 } __randomize_layout
   __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 58600cf234bc..b827df5b59d9 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -203,7 +203,20 @@ static inline umode_t __must_check mode_strip_umask(const struct inode *dir, umo
 	return mode;
 }
 
-extern int __must_check nd_jump_link(const struct path *path);
+struct jump_how {
+	unsigned int allowed_upgrades;
+};
+
+#define JUMP_HOW_UNRESTRICTED \
+	((const struct jump_how){ .allowed_upgrades = VALID_UPGRADE_FLAGS })
+
+extern int __must_check nd_jump_link_how(const struct path *path,
+					 const struct jump_how how);
+
+static inline int nd_jump_link(const struct path *path)
+{
+	return nd_jump_link_how(path, JUMP_HOW_UNRESTRICTED);
+}
 
 static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 {
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 613475285643..a3e36d86af1d 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -95,6 +95,10 @@
 #define O_NDELAY	O_NONBLOCK
 #endif
 
+#define DENY_UPGRADES		0x01
+#define READ_UPGRADABLE		(0x02 | DENY_UPGRADES)
+#define WRITE_UPGRADABLE	(0x04 | DENY_UPGRADES)
+
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get close_on_exec */
 #define F_SETFD		2	/* set/clear close_on_exec */
diff --git a/include/uapi/linux/openat2.h b/include/uapi/linux/openat2.h
index a5feb7604948..32c302758e72 100644
--- a/include/uapi/linux/openat2.h
+++ b/include/uapi/linux/openat2.h
@@ -20,6 +20,7 @@ struct open_how {
 	__u64 flags;
 	__u64 mode;
 	__u64 resolve;
+	__u64 allowed_upgrades;
 };
 
 /* how->resolve flags for openat2(2). */
-- 
2.53.0
Re: [PATCH] vfs: transitive upgrade restrictions for fds
Posted by Greg KH 1 week, 3 days ago
On Mon, Mar 23, 2026 at 11:00:22PM +0100, Jori Koolstra wrote:
> Add upgrade restrictions to openat2(). Extend struct open_how to allow
> setting transitive restrictions on using file descriptors to open other
> files. A use case for this feature is to block services or containers
> from re-opening/upgrading an O_PATH file descriptor through e.g.
> /proc/<pid>/fd/<nr as O_WRONLY.
> 
> The idea for this features comes form the UAPI group kernel feature idea
> list [1].
> 
> [1] https://github.com/uapi-group/kernel-features?tab=readme-ov-file#upgrade-masks-in-openat2
> 
> Signed-off-by: Jori Koolstra <jkoolstra@xs4all.nl>
> ---
>  fs/file_table.c                  |  2 ++
>  fs/internal.h                    |  1 +
>  fs/namei.c                       | 38 ++++++++++++++++++++++++++++----
>  fs/open.c                        |  9 ++++++++
>  fs/proc/base.c                   | 24 ++++++++++++++------
>  fs/proc/fd.c                     |  6 ++++-
>  fs/proc/internal.h               |  4 +++-
>  include/linux/fcntl.h            |  6 ++++-
>  include/linux/fs.h               |  1 +
>  include/linux/namei.h            | 15 ++++++++++++-
>  include/uapi/asm-generic/fcntl.h |  4 ++++
>  include/uapi/linux/openat2.h     |  1 +
>  12 files changed, 96 insertions(+), 15 deletions(-)

Any chance to get a test for this as well to know if this keeps working
(or works at all)?

>  /*
>   * Helper to directly jump to a known parsed path from ->get_link,
>   * caller must have taken a reference to path beforehand.
>   */
> -int nd_jump_link(const struct path *path)
> +int nd_jump_link_how(const struct path *path, const struct jump_how how)

Shouldn't that be "const struct jump_how *how"?

Or do you really want to pass this structure on the stack?

thanks,

greg k-h
Re: [PATCH] vfs: transitive upgrade restrictions for fds
Posted by Jori Koolstra 1 week, 1 day ago
> Op 24-03-2026 15:37 CET schreef Greg KH <gregkh@linuxfoundation.org>:
> 
> Any chance to get a test for this as well to know if this keeps working
> (or works at all)?
> 

I do have some tests, can shape them up a bit so that they may go in selftest
(if this fd stuff goes through). How should I do this? Send all as a v2, or
inline it as a response here?

> >  /*
> >   * Helper to directly jump to a known parsed path from ->get_link,
> >   * caller must have taken a reference to path beforehand.
> >   */
> > -int nd_jump_link(const struct path *path)
> > +int nd_jump_link_how(const struct path *path, const struct jump_how how)
> 
> Shouldn't that be "const struct jump_how *how"?
> 
> Or do you really want to pass this structure on the stack?
> 

I save a pointer indirection, and jump_how is right now just an int. That was
at least my reasoning here.

> thanks,
> 
> greg k-h

Thanks,
Jori.
Re: [PATCH] vfs: transitive upgrade restrictions for fds
Posted by Greg KH 1 week, 1 day ago
On Thu, Mar 26, 2026 at 12:09:08PM +0100, Jori Koolstra wrote:
> 
> > Op 24-03-2026 15:37 CET schreef Greg KH <gregkh@linuxfoundation.org>:
> > 
> > Any chance to get a test for this as well to know if this keeps working
> > (or works at all)?
> > 
> 
> I do have some tests, can shape them up a bit so that they may go in selftest
> (if this fd stuff goes through). How should I do this? Send all as a v2, or
> inline it as a response here?

v2 when you send it.

> > >  /*
> > >   * Helper to directly jump to a known parsed path from ->get_link,
> > >   * caller must have taken a reference to path beforehand.
> > >   */
> > > -int nd_jump_link(const struct path *path)
> > > +int nd_jump_link_how(const struct path *path, const struct jump_how how)
> > 
> > Shouldn't that be "const struct jump_how *how"?
> > 
> > Or do you really want to pass this structure on the stack?
> > 
> 
> I save a pointer indirection, and jump_how is right now just an int. That was
> at least my reasoning here.

It looks "odd" as it's a struct, and we don't know the size, and it's
the only place in the patch that does it this way, so it "stood out".

thanks,

greg k-h
Re: [PATCH] vfs: transitive upgrade restrictions for fds
Posted by Christian Brauner 1 week, 3 days ago
On Mon, Mar 23, 2026 at 11:00:22PM +0100, Jori Koolstra wrote:
> Add upgrade restrictions to openat2(). Extend struct open_how to allow
> setting transitive restrictions on using file descriptors to open other
> files. A use case for this feature is to block services or containers
> from re-opening/upgrading an O_PATH file descriptor through e.g.
> /proc/<pid>/fd/<nr as O_WRONLY.
> 
> The idea for this features comes form the UAPI group kernel feature idea
> list [1].
> 
> [1] https://github.com/uapi-group/kernel-features?tab=readme-ov-file#upgrade-masks-in-openat2
> 
> Signed-off-by: Jori Koolstra <jkoolstra@xs4all.nl>
> ---

Aleksa has thought long about this feature so I'll let him do the first
pass review here. Historically this was a bit of a can of worms...