[PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check

Erin Shepherd posted 3 patches 1 week, 2 days ago
[PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check
Posted by Erin Shepherd 1 week, 2 days ago
For pidfs, there is no reason to restrict file handle decoding by
CAP_DAC_READ_SEARCH. Introduce an export_ops flag that can indicate
this

Signed-off-by: Erin Shepherd <erin.shepherd@e43.eu>
---
 fs/fhandle.c             | 36 +++++++++++++++++++++---------------
 include/linux/exportfs.h |  3 +++
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/fs/fhandle.c b/fs/fhandle.c
index 82df28d45cd70a7df525f50bbb398d646110cd99..056116e58f43983bc7bb86da170fb554c7a2fac7 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -235,26 +235,32 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path,
 	return 0;
 }
 
-/*
- * Allow relaxed permissions of file handles if the caller has the
- * ability to mount the filesystem or create a bind-mount of the
- * provided @mountdirfd.
- *
- * In both cases the caller may be able to get an unobstructed way to
- * the encoded file handle. If the caller is only able to create a
- * bind-mount we need to verify that there are no locked mounts on top
- * of it that could prevent us from getting to the encoded file.
- *
- * In principle, locked mounts can prevent the caller from mounting the
- * filesystem but that only applies to procfs and sysfs neither of which
- * support decoding file handles.
- */
 static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
 				 unsigned int o_flags)
 {
 	struct path *root = &ctx->root;
+	struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
+
+	if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN)
+		return true;
+
+	if (capable(CAP_DAC_READ_SEARCH))
+		return true;
 
 	/*
+	 * Allow relaxed permissions of file handles if the caller has the
+	 * ability to mount the filesystem or create a bind-mount of the
+	 * provided @mountdirfd.
+	 *
+	 * In both cases the caller may be able to get an unobstructed way to
+	 * the encoded file handle. If the caller is only able to create a
+	 * bind-mount we need to verify that there are no locked mounts on top
+	 * of it that could prevent us from getting to the encoded file.
+	 *
+	 * In principle, locked mounts can prevent the caller from mounting the
+	 * filesystem but that only applies to procfs and sysfs neither of which
+	 * support decoding file handles.
+	 *
 	 * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
 	 * confusing api in the face of disconnected non-dir dentries.
 	 *
@@ -293,7 +299,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
 	if (retval)
 		goto out_err;
 
-	if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
+	if (!may_decode_fh(&ctx, o_flags)) {
 		retval = -EPERM;
 		goto out_path;
 	}
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 893a1d21dc1c4abc7e52325d7a4cf0adb407f039..459508b53e77ed0597cee217ffe3d82cc7cc11a4 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -247,6 +247,9 @@ struct export_operations {
 						*/
 #define EXPORT_OP_FLUSH_ON_CLOSE	(0x20) /* fs flushes file data on close */
 #define EXPORT_OP_ASYNC_LOCK		(0x40) /* fs can do async lock request */
+#define EXPORT_OP_UNRESTRICTED_OPEN	(0x80) /* FS allows open_by_handle_at
+						  without CAP_DAC_READ_SEARCH
+						*/
 	unsigned long	flags;
 };
 

-- 
2.46.1
Re: [PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check
Posted by Amir Goldstein 1 week, 2 days ago
On Wed, Nov 13, 2024 at 8:11 PM Erin Shepherd <erin.shepherd@e43.eu> wrote:
>
> For pidfs, there is no reason to restrict file handle decoding by
> CAP_DAC_READ_SEARCH. Introduce an export_ops flag that can indicate
> this
>
> Signed-off-by: Erin Shepherd <erin.shepherd@e43.eu>
> ---
>  fs/fhandle.c             | 36 +++++++++++++++++++++---------------
>  include/linux/exportfs.h |  3 +++
>  2 files changed, 24 insertions(+), 15 deletions(-)
>
> diff --git a/fs/fhandle.c b/fs/fhandle.c
> index 82df28d45cd70a7df525f50bbb398d646110cd99..056116e58f43983bc7bb86da170fb554c7a2fac7 100644
> --- a/fs/fhandle.c
> +++ b/fs/fhandle.c
> @@ -235,26 +235,32 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path,
>         return 0;
>  }
>
> -/*
> - * Allow relaxed permissions of file handles if the caller has the
> - * ability to mount the filesystem or create a bind-mount of the
> - * provided @mountdirfd.
> - *
> - * In both cases the caller may be able to get an unobstructed way to
> - * the encoded file handle. If the caller is only able to create a
> - * bind-mount we need to verify that there are no locked mounts on top
> - * of it that could prevent us from getting to the encoded file.
> - *
> - * In principle, locked mounts can prevent the caller from mounting the
> - * filesystem but that only applies to procfs and sysfs neither of which
> - * support decoding file handles.
> - */
>  static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
>                                  unsigned int o_flags)
>  {
>         struct path *root = &ctx->root;
> +       struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
> +
> +       if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN)
> +               return true;
> +
> +       if (capable(CAP_DAC_READ_SEARCH))
> +               return true;
>
>         /*
> +        * Allow relaxed permissions of file handles if the caller has the
> +        * ability to mount the filesystem or create a bind-mount of the
> +        * provided @mountdirfd.
> +        *
> +        * In both cases the caller may be able to get an unobstructed way to
> +        * the encoded file handle. If the caller is only able to create a
> +        * bind-mount we need to verify that there are no locked mounts on top
> +        * of it that could prevent us from getting to the encoded file.
> +        *
> +        * In principle, locked mounts can prevent the caller from mounting the
> +        * filesystem but that only applies to procfs and sysfs neither of which
> +        * support decoding file handles.
> +        *
>          * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
>          * confusing api in the face of disconnected non-dir dentries.
>          *
> @@ -293,7 +299,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
>         if (retval)
>                 goto out_err;
>
> -       if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
> +       if (!may_decode_fh(&ctx, o_flags)) {
>                 retval = -EPERM;
>                 goto out_path;
>         }
> diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
> index 893a1d21dc1c4abc7e52325d7a4cf0adb407f039..459508b53e77ed0597cee217ffe3d82cc7cc11a4 100644
> --- a/include/linux/exportfs.h
> +++ b/include/linux/exportfs.h
> @@ -247,6 +247,9 @@ struct export_operations {
>                                                 */
>  #define EXPORT_OP_FLUSH_ON_CLOSE       (0x20) /* fs flushes file data on close */
>  #define EXPORT_OP_ASYNC_LOCK           (0x40) /* fs can do async lock request */
> +#define EXPORT_OP_UNRESTRICTED_OPEN    (0x80) /* FS allows open_by_handle_at
> +                                                 without CAP_DAC_READ_SEARCH
> +                                               */

Don't love the name, but I wonder, isn't SB_NOUSER already a good
enough indication that CAP_DAC_READ_SEARCH is irrelevant?

Essentially, mnt_fd is the user's proof that they can access the mount
and CAP_DAC_READ_SEARCH is the legacy "proof" that the user can
reach from mount the inode by path lookup.

Which reminds me, what is the mnt_fd expected for opening a pidfd
file by handle?

Thanks,
Amir.
Re: [PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check
Posted by Christian Brauner 1 week, 1 day ago
On Thu, Nov 14, 2024 at 07:37:19AM +0100, Amir Goldstein wrote:
> On Wed, Nov 13, 2024 at 8:11 PM Erin Shepherd <erin.shepherd@e43.eu> wrote:
> >
> > For pidfs, there is no reason to restrict file handle decoding by
> > CAP_DAC_READ_SEARCH. Introduce an export_ops flag that can indicate
> > this
> >
> > Signed-off-by: Erin Shepherd <erin.shepherd@e43.eu>
> > ---
> >  fs/fhandle.c             | 36 +++++++++++++++++++++---------------
> >  include/linux/exportfs.h |  3 +++
> >  2 files changed, 24 insertions(+), 15 deletions(-)
> >
> > diff --git a/fs/fhandle.c b/fs/fhandle.c
> > index 82df28d45cd70a7df525f50bbb398d646110cd99..056116e58f43983bc7bb86da170fb554c7a2fac7 100644
> > --- a/fs/fhandle.c
> > +++ b/fs/fhandle.c
> > @@ -235,26 +235,32 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path,
> >         return 0;
> >  }
> >
> > -/*
> > - * Allow relaxed permissions of file handles if the caller has the
> > - * ability to mount the filesystem or create a bind-mount of the
> > - * provided @mountdirfd.
> > - *
> > - * In both cases the caller may be able to get an unobstructed way to
> > - * the encoded file handle. If the caller is only able to create a
> > - * bind-mount we need to verify that there are no locked mounts on top
> > - * of it that could prevent us from getting to the encoded file.
> > - *
> > - * In principle, locked mounts can prevent the caller from mounting the
> > - * filesystem but that only applies to procfs and sysfs neither of which
> > - * support decoding file handles.
> > - */
> >  static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
> >                                  unsigned int o_flags)
> >  {
> >         struct path *root = &ctx->root;
> > +       struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
> > +
> > +       if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN)
> > +               return true;
> > +
> > +       if (capable(CAP_DAC_READ_SEARCH))
> > +               return true;
> >
> >         /*
> > +        * Allow relaxed permissions of file handles if the caller has the
> > +        * ability to mount the filesystem or create a bind-mount of the
> > +        * provided @mountdirfd.
> > +        *
> > +        * In both cases the caller may be able to get an unobstructed way to
> > +        * the encoded file handle. If the caller is only able to create a
> > +        * bind-mount we need to verify that there are no locked mounts on top
> > +        * of it that could prevent us from getting to the encoded file.
> > +        *
> > +        * In principle, locked mounts can prevent the caller from mounting the
> > +        * filesystem but that only applies to procfs and sysfs neither of which
> > +        * support decoding file handles.
> > +        *
> >          * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
> >          * confusing api in the face of disconnected non-dir dentries.
> >          *
> > @@ -293,7 +299,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
> >         if (retval)
> >                 goto out_err;
> >
> > -       if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
> > +       if (!may_decode_fh(&ctx, o_flags)) {
> >                 retval = -EPERM;
> >                 goto out_path;
> >         }
> > diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
> > index 893a1d21dc1c4abc7e52325d7a4cf0adb407f039..459508b53e77ed0597cee217ffe3d82cc7cc11a4 100644
> > --- a/include/linux/exportfs.h
> > +++ b/include/linux/exportfs.h
> > @@ -247,6 +247,9 @@ struct export_operations {
> >                                                 */
> >  #define EXPORT_OP_FLUSH_ON_CLOSE       (0x20) /* fs flushes file data on close */
> >  #define EXPORT_OP_ASYNC_LOCK           (0x40) /* fs can do async lock request */
> > +#define EXPORT_OP_UNRESTRICTED_OPEN    (0x80) /* FS allows open_by_handle_at
> > +                                                 without CAP_DAC_READ_SEARCH
> > +                                               */
> 
> Don't love the name, but I wonder, isn't SB_NOUSER already a good
> enough indication that CAP_DAC_READ_SEARCH is irrelevant?
> 
> Essentially, mnt_fd is the user's proof that they can access the mount
> and CAP_DAC_READ_SEARCH is the legacy "proof" that the user can
> reach from mount the inode by path lookup.
> 
> Which reminds me, what is the mnt_fd expected for opening a pidfd
> file by handle?

int pidfd_self = pidfd_open(getpid(), 0);
open_by_handle_at(pidfd_self, ...);

is sufficient.
Re: [PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check
Posted by Christoph Hellwig 1 week, 2 days ago
On Wed, Nov 13, 2024 at 05:55:24PM +0000, Erin Shepherd wrote:
> For pidfs, there is no reason to restrict file handle decoding by
> CAP_DAC_READ_SEARCH.

Why is there no reason, i.e. why do you think it is safe.

>Introduce an export_ops flag that can indicate
> this

Also why is is desirable?

To be this looks more than sketchy with the actual exporting hat on,
but I guess that's now how the cool kids use open by handle these days.
Re: [PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check
Posted by Erin Shepherd 1 week, 1 day ago
On 14/11/2024 05:37, Christoph Hellwig wrote:
> On Wed, Nov 13, 2024 at 05:55:24PM +0000, Erin Shepherd wrote:
>> For pidfs, there is no reason to restrict file handle decoding by
>> CAP_DAC_READ_SEARCH.
> Why is there no reason, i.e. why do you think it is safe.

A process can use both open_by_handle_at to open the exact same set of
pidfds as they can by pidfd_open. i.e. there is no reason to additionally
restrict access to the former API.

>> Introduce an export_ops flag that can indicate
>> this
> Also why is is desirable?
>
> To be this looks more than sketchy with the actual exporting hat on,
> but I guess that's now how the cool kids use open by handle these days.
Right - we have a bunch of API file systems where userspace wants stable
non-reused file references for the same reasons network filesystems do.
The first example of this was cgroupfs, but the same rationale exists for
pidfs and process tracking.
Re: [PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check
Posted by kernel test robot 1 week, 2 days ago
Hi Erin,

kernel test robot noticed the following build warnings:

[auto build test WARNING on 14b6320953a3f856a3f93bf9a0e423395baa593d]

url:    https://github.com/intel-lab-lkp/linux/commits/Erin-Shepherd/pseudofs-add-support-for-export_ops/20241114-020539
base:   14b6320953a3f856a3f93bf9a0e423395baa593d
patch link:    https://lore.kernel.org/r/20241113-pidfs_fh-v2-2-9a4d28155a37%40e43.eu
patch subject: [PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check
config: openrisc-defconfig (https://download.01.org/0day-ci/archive/20241114/202411140905.a0ntnQQG-lkp@intel.com/config)
compiler: or1k-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241114/202411140905.a0ntnQQG-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202411140905.a0ntnQQG-lkp@intel.com/

All warnings (new ones prefixed by >>):

   fs/fhandle.c: In function 'may_decode_fh':
>> fs/fhandle.c:242:41: warning: initialization discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
     242 |         struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
         |                                         ^~~~


vim +/const +242 fs/fhandle.c

   237	
   238	static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
   239					 unsigned int o_flags)
   240	{
   241		struct path *root = &ctx->root;
 > 242		struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
   243	
   244		if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN)
   245			return true;
   246	
   247		if (capable(CAP_DAC_READ_SEARCH))
   248			return true;
   249	
   250		/*
   251		 * Allow relaxed permissions of file handles if the caller has the
   252		 * ability to mount the filesystem or create a bind-mount of the
   253		 * provided @mountdirfd.
   254		 *
   255		 * In both cases the caller may be able to get an unobstructed way to
   256		 * the encoded file handle. If the caller is only able to create a
   257		 * bind-mount we need to verify that there are no locked mounts on top
   258		 * of it that could prevent us from getting to the encoded file.
   259		 *
   260		 * In principle, locked mounts can prevent the caller from mounting the
   261		 * filesystem but that only applies to procfs and sysfs neither of which
   262		 * support decoding file handles.
   263		 *
   264		 * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
   265		 * confusing api in the face of disconnected non-dir dentries.
   266		 *
   267		 * There's only one dentry for each directory inode (VFS rule)...
   268		 */
   269		if (!(o_flags & O_DIRECTORY))
   270			return false;
   271	
   272		if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
   273			ctx->flags = HANDLE_CHECK_PERMS;
   274		else if (is_mounted(root->mnt) &&
   275			 ns_capable(real_mount(root->mnt)->mnt_ns->user_ns,
   276				    CAP_SYS_ADMIN) &&
   277			 !has_locked_children(real_mount(root->mnt), root->dentry))
   278			ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE;
   279		else
   280			return false;
   281	
   282		/* Are we able to override DAC permissions? */
   283		if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH))
   284			return false;
   285	
   286		ctx->fh_flags = EXPORT_FH_DIR_ONLY;
   287		return true;
   288	}
   289	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check
Posted by kernel test robot 1 week, 2 days ago
Hi Erin,

kernel test robot noticed the following build errors:

[auto build test ERROR on 14b6320953a3f856a3f93bf9a0e423395baa593d]

url:    https://github.com/intel-lab-lkp/linux/commits/Erin-Shepherd/pseudofs-add-support-for-export_ops/20241114-020539
base:   14b6320953a3f856a3f93bf9a0e423395baa593d
patch link:    https://lore.kernel.org/r/20241113-pidfs_fh-v2-2-9a4d28155a37%40e43.eu
patch subject: [PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check
config: x86_64-allnoconfig (https://download.01.org/0day-ci/archive/20241114/202411140603.E03vXsdz-lkp@intel.com/config)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241114/202411140603.E03vXsdz-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202411140603.E03vXsdz-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from fs/fhandle.c:2:
   In file included from include/linux/syscalls.h:93:
   In file included from include/trace/syscall.h:7:
   In file included from include/linux/trace_events.h:6:
   In file included from include/linux/ring_buffer.h:5:
   In file included from include/linux/mm.h:2213:
   include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
>> fs/fhandle.c:242:28: error: initializing 'struct export_operations *' with an expression of type 'const struct export_operations *' discards qualifiers [-Werror,-Wincompatible-pointer-types-discards-qualifiers]
     242 |         struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
         |                                   ^     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1 warning and 1 error generated.


vim +242 fs/fhandle.c

   237	
   238	static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
   239					 unsigned int o_flags)
   240	{
   241		struct path *root = &ctx->root;
 > 242		struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
   243	
   244		if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN)
   245			return true;
   246	
   247		if (capable(CAP_DAC_READ_SEARCH))
   248			return true;
   249	
   250		/*
   251		 * Allow relaxed permissions of file handles if the caller has the
   252		 * ability to mount the filesystem or create a bind-mount of the
   253		 * provided @mountdirfd.
   254		 *
   255		 * In both cases the caller may be able to get an unobstructed way to
   256		 * the encoded file handle. If the caller is only able to create a
   257		 * bind-mount we need to verify that there are no locked mounts on top
   258		 * of it that could prevent us from getting to the encoded file.
   259		 *
   260		 * In principle, locked mounts can prevent the caller from mounting the
   261		 * filesystem but that only applies to procfs and sysfs neither of which
   262		 * support decoding file handles.
   263		 *
   264		 * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
   265		 * confusing api in the face of disconnected non-dir dentries.
   266		 *
   267		 * There's only one dentry for each directory inode (VFS rule)...
   268		 */
   269		if (!(o_flags & O_DIRECTORY))
   270			return false;
   271	
   272		if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
   273			ctx->flags = HANDLE_CHECK_PERMS;
   274		else if (is_mounted(root->mnt) &&
   275			 ns_capable(real_mount(root->mnt)->mnt_ns->user_ns,
   276				    CAP_SYS_ADMIN) &&
   277			 !has_locked_children(real_mount(root->mnt), root->dentry))
   278			ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE;
   279		else
   280			return false;
   281	
   282		/* Are we able to override DAC permissions? */
   283		if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH))
   284			return false;
   285	
   286		ctx->fh_flags = EXPORT_FH_DIR_ONLY;
   287		return true;
   288	}
   289	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki