For pidfs, there is no reason to restrict file handle decoding by
CAP_DAC_READ_SEARCH. Introduce an export_ops flag that can indicate
this
Signed-off-by: Erin Shepherd <erin.shepherd@e43.eu>
---
fs/fhandle.c | 36 +++++++++++++++++++++---------------
include/linux/exportfs.h | 3 +++
2 files changed, 24 insertions(+), 15 deletions(-)
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 82df28d45cd70a7df525f50bbb398d646110cd99..056116e58f43983bc7bb86da170fb554c7a2fac7 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -235,26 +235,32 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path,
return 0;
}
-/*
- * Allow relaxed permissions of file handles if the caller has the
- * ability to mount the filesystem or create a bind-mount of the
- * provided @mountdirfd.
- *
- * In both cases the caller may be able to get an unobstructed way to
- * the encoded file handle. If the caller is only able to create a
- * bind-mount we need to verify that there are no locked mounts on top
- * of it that could prevent us from getting to the encoded file.
- *
- * In principle, locked mounts can prevent the caller from mounting the
- * filesystem but that only applies to procfs and sysfs neither of which
- * support decoding file handles.
- */
static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
unsigned int o_flags)
{
struct path *root = &ctx->root;
+ struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
+
+ if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN)
+ return true;
+
+ if (capable(CAP_DAC_READ_SEARCH))
+ return true;
/*
+ * Allow relaxed permissions of file handles if the caller has the
+ * ability to mount the filesystem or create a bind-mount of the
+ * provided @mountdirfd.
+ *
+ * In both cases the caller may be able to get an unobstructed way to
+ * the encoded file handle. If the caller is only able to create a
+ * bind-mount we need to verify that there are no locked mounts on top
+ * of it that could prevent us from getting to the encoded file.
+ *
+ * In principle, locked mounts can prevent the caller from mounting the
+ * filesystem but that only applies to procfs and sysfs neither of which
+ * support decoding file handles.
+ *
* Restrict to O_DIRECTORY to provide a deterministic API that avoids a
* confusing api in the face of disconnected non-dir dentries.
*
@@ -293,7 +299,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
if (retval)
goto out_err;
- if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
+ if (!may_decode_fh(&ctx, o_flags)) {
retval = -EPERM;
goto out_path;
}
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 893a1d21dc1c4abc7e52325d7a4cf0adb407f039..459508b53e77ed0597cee217ffe3d82cc7cc11a4 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -247,6 +247,9 @@ struct export_operations {
*/
#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */
#define EXPORT_OP_ASYNC_LOCK (0x40) /* fs can do async lock request */
+#define EXPORT_OP_UNRESTRICTED_OPEN (0x80) /* FS allows open_by_handle_at
+ without CAP_DAC_READ_SEARCH
+ */
unsigned long flags;
};
--
2.46.1
On Wed, Nov 13, 2024 at 8:11 PM Erin Shepherd <erin.shepherd@e43.eu> wrote: > > For pidfs, there is no reason to restrict file handle decoding by > CAP_DAC_READ_SEARCH. Introduce an export_ops flag that can indicate > this > > Signed-off-by: Erin Shepherd <erin.shepherd@e43.eu> > --- > fs/fhandle.c | 36 +++++++++++++++++++++--------------- > include/linux/exportfs.h | 3 +++ > 2 files changed, 24 insertions(+), 15 deletions(-) > > diff --git a/fs/fhandle.c b/fs/fhandle.c > index 82df28d45cd70a7df525f50bbb398d646110cd99..056116e58f43983bc7bb86da170fb554c7a2fac7 100644 > --- a/fs/fhandle.c > +++ b/fs/fhandle.c > @@ -235,26 +235,32 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path, > return 0; > } > > -/* > - * Allow relaxed permissions of file handles if the caller has the > - * ability to mount the filesystem or create a bind-mount of the > - * provided @mountdirfd. > - * > - * In both cases the caller may be able to get an unobstructed way to > - * the encoded file handle. If the caller is only able to create a > - * bind-mount we need to verify that there are no locked mounts on top > - * of it that could prevent us from getting to the encoded file. > - * > - * In principle, locked mounts can prevent the caller from mounting the > - * filesystem but that only applies to procfs and sysfs neither of which > - * support decoding file handles. > - */ > static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, > unsigned int o_flags) > { > struct path *root = &ctx->root; > + struct export_operations *nop = root->mnt->mnt_sb->s_export_op; > + > + if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN) > + return true; > + > + if (capable(CAP_DAC_READ_SEARCH)) > + return true; > > /* > + * Allow relaxed permissions of file handles if the caller has the > + * ability to mount the filesystem or create a bind-mount of the > + * provided @mountdirfd. > + * > + * In both cases the caller may be able to get an unobstructed way to > + * the encoded file handle. If the caller is only able to create a > + * bind-mount we need to verify that there are no locked mounts on top > + * of it that could prevent us from getting to the encoded file. > + * > + * In principle, locked mounts can prevent the caller from mounting the > + * filesystem but that only applies to procfs and sysfs neither of which > + * support decoding file handles. > + * > * Restrict to O_DIRECTORY to provide a deterministic API that avoids a > * confusing api in the face of disconnected non-dir dentries. > * > @@ -293,7 +299,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, > if (retval) > goto out_err; > > - if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) { > + if (!may_decode_fh(&ctx, o_flags)) { > retval = -EPERM; > goto out_path; > } > diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h > index 893a1d21dc1c4abc7e52325d7a4cf0adb407f039..459508b53e77ed0597cee217ffe3d82cc7cc11a4 100644 > --- a/include/linux/exportfs.h > +++ b/include/linux/exportfs.h > @@ -247,6 +247,9 @@ struct export_operations { > */ > #define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ > #define EXPORT_OP_ASYNC_LOCK (0x40) /* fs can do async lock request */ > +#define EXPORT_OP_UNRESTRICTED_OPEN (0x80) /* FS allows open_by_handle_at > + without CAP_DAC_READ_SEARCH > + */ Don't love the name, but I wonder, isn't SB_NOUSER already a good enough indication that CAP_DAC_READ_SEARCH is irrelevant? Essentially, mnt_fd is the user's proof that they can access the mount and CAP_DAC_READ_SEARCH is the legacy "proof" that the user can reach from mount the inode by path lookup. Which reminds me, what is the mnt_fd expected for opening a pidfd file by handle? Thanks, Amir.
On Thu, Nov 14, 2024 at 07:37:19AM +0100, Amir Goldstein wrote: > On Wed, Nov 13, 2024 at 8:11 PM Erin Shepherd <erin.shepherd@e43.eu> wrote: > > > > For pidfs, there is no reason to restrict file handle decoding by > > CAP_DAC_READ_SEARCH. Introduce an export_ops flag that can indicate > > this > > > > Signed-off-by: Erin Shepherd <erin.shepherd@e43.eu> > > --- > > fs/fhandle.c | 36 +++++++++++++++++++++--------------- > > include/linux/exportfs.h | 3 +++ > > 2 files changed, 24 insertions(+), 15 deletions(-) > > > > diff --git a/fs/fhandle.c b/fs/fhandle.c > > index 82df28d45cd70a7df525f50bbb398d646110cd99..056116e58f43983bc7bb86da170fb554c7a2fac7 100644 > > --- a/fs/fhandle.c > > +++ b/fs/fhandle.c > > @@ -235,26 +235,32 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path, > > return 0; > > } > > > > -/* > > - * Allow relaxed permissions of file handles if the caller has the > > - * ability to mount the filesystem or create a bind-mount of the > > - * provided @mountdirfd. > > - * > > - * In both cases the caller may be able to get an unobstructed way to > > - * the encoded file handle. If the caller is only able to create a > > - * bind-mount we need to verify that there are no locked mounts on top > > - * of it that could prevent us from getting to the encoded file. > > - * > > - * In principle, locked mounts can prevent the caller from mounting the > > - * filesystem but that only applies to procfs and sysfs neither of which > > - * support decoding file handles. > > - */ > > static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, > > unsigned int o_flags) > > { > > struct path *root = &ctx->root; > > + struct export_operations *nop = root->mnt->mnt_sb->s_export_op; > > + > > + if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN) > > + return true; > > + > > + if (capable(CAP_DAC_READ_SEARCH)) > > + return true; > > > > /* > > + * Allow relaxed permissions of file handles if the caller has the > > + * ability to mount the filesystem or create a bind-mount of the > > + * provided @mountdirfd. > > + * > > + * In both cases the caller may be able to get an unobstructed way to > > + * the encoded file handle. If the caller is only able to create a > > + * bind-mount we need to verify that there are no locked mounts on top > > + * of it that could prevent us from getting to the encoded file. > > + * > > + * In principle, locked mounts can prevent the caller from mounting the > > + * filesystem but that only applies to procfs and sysfs neither of which > > + * support decoding file handles. > > + * > > * Restrict to O_DIRECTORY to provide a deterministic API that avoids a > > * confusing api in the face of disconnected non-dir dentries. > > * > > @@ -293,7 +299,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, > > if (retval) > > goto out_err; > > > > - if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) { > > + if (!may_decode_fh(&ctx, o_flags)) { > > retval = -EPERM; > > goto out_path; > > } > > diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h > > index 893a1d21dc1c4abc7e52325d7a4cf0adb407f039..459508b53e77ed0597cee217ffe3d82cc7cc11a4 100644 > > --- a/include/linux/exportfs.h > > +++ b/include/linux/exportfs.h > > @@ -247,6 +247,9 @@ struct export_operations { > > */ > > #define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ > > #define EXPORT_OP_ASYNC_LOCK (0x40) /* fs can do async lock request */ > > +#define EXPORT_OP_UNRESTRICTED_OPEN (0x80) /* FS allows open_by_handle_at > > + without CAP_DAC_READ_SEARCH > > + */ > > Don't love the name, but I wonder, isn't SB_NOUSER already a good > enough indication that CAP_DAC_READ_SEARCH is irrelevant? > > Essentially, mnt_fd is the user's proof that they can access the mount > and CAP_DAC_READ_SEARCH is the legacy "proof" that the user can > reach from mount the inode by path lookup. > > Which reminds me, what is the mnt_fd expected for opening a pidfd > file by handle? int pidfd_self = pidfd_open(getpid(), 0); open_by_handle_at(pidfd_self, ...); is sufficient.
On Wed, Nov 13, 2024 at 05:55:24PM +0000, Erin Shepherd wrote: > For pidfs, there is no reason to restrict file handle decoding by > CAP_DAC_READ_SEARCH. Why is there no reason, i.e. why do you think it is safe. >Introduce an export_ops flag that can indicate > this Also why is is desirable? To be this looks more than sketchy with the actual exporting hat on, but I guess that's now how the cool kids use open by handle these days.
On 14/11/2024 05:37, Christoph Hellwig wrote: > On Wed, Nov 13, 2024 at 05:55:24PM +0000, Erin Shepherd wrote: >> For pidfs, there is no reason to restrict file handle decoding by >> CAP_DAC_READ_SEARCH. > Why is there no reason, i.e. why do you think it is safe. A process can use both open_by_handle_at to open the exact same set of pidfds as they can by pidfd_open. i.e. there is no reason to additionally restrict access to the former API. >> Introduce an export_ops flag that can indicate >> this > Also why is is desirable? > > To be this looks more than sketchy with the actual exporting hat on, > but I guess that's now how the cool kids use open by handle these days. Right - we have a bunch of API file systems where userspace wants stable non-reused file references for the same reasons network filesystems do. The first example of this was cgroupfs, but the same rationale exists for pidfs and process tracking.
Hi Erin, kernel test robot noticed the following build warnings: [auto build test WARNING on 14b6320953a3f856a3f93bf9a0e423395baa593d] url: https://github.com/intel-lab-lkp/linux/commits/Erin-Shepherd/pseudofs-add-support-for-export_ops/20241114-020539 base: 14b6320953a3f856a3f93bf9a0e423395baa593d patch link: https://lore.kernel.org/r/20241113-pidfs_fh-v2-2-9a4d28155a37%40e43.eu patch subject: [PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check config: openrisc-defconfig (https://download.01.org/0day-ci/archive/20241114/202411140905.a0ntnQQG-lkp@intel.com/config) compiler: or1k-linux-gcc (GCC) 14.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241114/202411140905.a0ntnQQG-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202411140905.a0ntnQQG-lkp@intel.com/ All warnings (new ones prefixed by >>): fs/fhandle.c: In function 'may_decode_fh': >> fs/fhandle.c:242:41: warning: initialization discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] 242 | struct export_operations *nop = root->mnt->mnt_sb->s_export_op; | ^~~~ vim +/const +242 fs/fhandle.c 237 238 static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, 239 unsigned int o_flags) 240 { 241 struct path *root = &ctx->root; > 242 struct export_operations *nop = root->mnt->mnt_sb->s_export_op; 243 244 if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN) 245 return true; 246 247 if (capable(CAP_DAC_READ_SEARCH)) 248 return true; 249 250 /* 251 * Allow relaxed permissions of file handles if the caller has the 252 * ability to mount the filesystem or create a bind-mount of the 253 * provided @mountdirfd. 254 * 255 * In both cases the caller may be able to get an unobstructed way to 256 * the encoded file handle. If the caller is only able to create a 257 * bind-mount we need to verify that there are no locked mounts on top 258 * of it that could prevent us from getting to the encoded file. 259 * 260 * In principle, locked mounts can prevent the caller from mounting the 261 * filesystem but that only applies to procfs and sysfs neither of which 262 * support decoding file handles. 263 * 264 * Restrict to O_DIRECTORY to provide a deterministic API that avoids a 265 * confusing api in the face of disconnected non-dir dentries. 266 * 267 * There's only one dentry for each directory inode (VFS rule)... 268 */ 269 if (!(o_flags & O_DIRECTORY)) 270 return false; 271 272 if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN)) 273 ctx->flags = HANDLE_CHECK_PERMS; 274 else if (is_mounted(root->mnt) && 275 ns_capable(real_mount(root->mnt)->mnt_ns->user_ns, 276 CAP_SYS_ADMIN) && 277 !has_locked_children(real_mount(root->mnt), root->dentry)) 278 ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE; 279 else 280 return false; 281 282 /* Are we able to override DAC permissions? */ 283 if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH)) 284 return false; 285 286 ctx->fh_flags = EXPORT_FH_DIR_ONLY; 287 return true; 288 } 289 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Hi Erin, kernel test robot noticed the following build errors: [auto build test ERROR on 14b6320953a3f856a3f93bf9a0e423395baa593d] url: https://github.com/intel-lab-lkp/linux/commits/Erin-Shepherd/pseudofs-add-support-for-export_ops/20241114-020539 base: 14b6320953a3f856a3f93bf9a0e423395baa593d patch link: https://lore.kernel.org/r/20241113-pidfs_fh-v2-2-9a4d28155a37%40e43.eu patch subject: [PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check config: x86_64-allnoconfig (https://download.01.org/0day-ci/archive/20241114/202411140603.E03vXsdz-lkp@intel.com/config) compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241114/202411140603.E03vXsdz-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202411140603.E03vXsdz-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from fs/fhandle.c:2: In file included from include/linux/syscalls.h:93: In file included from include/trace/syscall.h:7: In file included from include/linux/trace_events.h:6: In file included from include/linux/ring_buffer.h:5: In file included from include/linux/mm.h:2213: include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion] 518 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" | ~~~~~~~~~~~ ^ ~~~ >> fs/fhandle.c:242:28: error: initializing 'struct export_operations *' with an expression of type 'const struct export_operations *' discards qualifiers [-Werror,-Wincompatible-pointer-types-discards-qualifiers] 242 | struct export_operations *nop = root->mnt->mnt_sb->s_export_op; | ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 warning and 1 error generated. vim +242 fs/fhandle.c 237 238 static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, 239 unsigned int o_flags) 240 { 241 struct path *root = &ctx->root; > 242 struct export_operations *nop = root->mnt->mnt_sb->s_export_op; 243 244 if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN) 245 return true; 246 247 if (capable(CAP_DAC_READ_SEARCH)) 248 return true; 249 250 /* 251 * Allow relaxed permissions of file handles if the caller has the 252 * ability to mount the filesystem or create a bind-mount of the 253 * provided @mountdirfd. 254 * 255 * In both cases the caller may be able to get an unobstructed way to 256 * the encoded file handle. If the caller is only able to create a 257 * bind-mount we need to verify that there are no locked mounts on top 258 * of it that could prevent us from getting to the encoded file. 259 * 260 * In principle, locked mounts can prevent the caller from mounting the 261 * filesystem but that only applies to procfs and sysfs neither of which 262 * support decoding file handles. 263 * 264 * Restrict to O_DIRECTORY to provide a deterministic API that avoids a 265 * confusing api in the face of disconnected non-dir dentries. 266 * 267 * There's only one dentry for each directory inode (VFS rule)... 268 */ 269 if (!(o_flags & O_DIRECTORY)) 270 return false; 271 272 if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN)) 273 ctx->flags = HANDLE_CHECK_PERMS; 274 else if (is_mounted(root->mnt) && 275 ns_capable(real_mount(root->mnt)->mnt_ns->user_ns, 276 CAP_SYS_ADMIN) && 277 !has_locked_children(real_mount(root->mnt), root->dentry)) 278 ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE; 279 else 280 return false; 281 282 /* Are we able to override DAC permissions? */ 283 if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH)) 284 return false; 285 286 ctx->fh_flags = EXPORT_FH_DIR_ONLY; 287 return true; 288 } 289 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
© 2016 - 2024 Red Hat, Inc.