This extends the current PR_SET_MDWE prctl arg with a bit to indicate
that the process doesn't want MDWE protection to propagate to children.
To implement this no-inherit mode, the tag in current->mm->flags must be
absent from MMF_INIT_MASK. This means that the encoding for "MDWE but
without inherit" is different in the prctl than in the mm flags. This
leads to a bit of bit-mangling in the prctl implementation.
Signed-off-by: Florent Revest <revest@chromium.org>
---
include/linux/sched/coredump.h | 10 ++++++++++
include/uapi/linux/prctl.h | 1 +
kernel/fork.c | 2 +-
kernel/sys.c | 32 ++++++++++++++++++++++++++------
tools/include/uapi/linux/prctl.h | 1 +
5 files changed, 39 insertions(+), 7 deletions(-)
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 0ee96ea7a0e9..1b37fa8fc723 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -91,4 +91,14 @@ static inline int get_dumpable(struct mm_struct *mm)
MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK)
#define MMF_VM_MERGE_ANY 29
+#define MMF_HAS_MDWE_NO_INHERIT 30
+
+static inline unsigned long mmf_init_flags(unsigned long flags)
+{
+ if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT))
+ flags &= ~((1UL << MMF_HAS_MDWE) |
+ (1UL << MMF_HAS_MDWE_NO_INHERIT));
+ return flags & MMF_INIT_MASK;
+}
+
#endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 6e9af6cbc950..dacbe824e7c3 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -284,6 +284,7 @@ struct prctl_mm_map {
/* Memory deny write / execute */
#define PR_SET_MDWE 65
# define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0)
+# define PR_MDWE_NO_INHERIT (1UL << 1)
#define PR_GET_MDWE 66
diff --git a/kernel/fork.c b/kernel/fork.c
index d17995934eb4..bc3c762d378f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1284,7 +1284,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
hugetlb_count_init(mm);
if (current->mm) {
- mm->flags = current->mm->flags & MMF_INIT_MASK;
+ mm->flags = mmf_init_flags(current->mm->flags);
mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
} else {
mm->flags = default_dump_filter;
diff --git a/kernel/sys.c b/kernel/sys.c
index 339fee3eff6a..1a2dc3da43ea 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2362,19 +2362,41 @@ static int prctl_set_vma(unsigned long opt, unsigned long start,
}
#endif /* CONFIG_ANON_VMA_NAME */
+static inline unsigned long get_current_mdwe(void)
+{
+ unsigned long ret = 0;
+
+ if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
+ ret |= PR_MDWE_REFUSE_EXEC_GAIN;
+ if (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags))
+ ret |= PR_MDWE_NO_INHERIT;
+
+ return ret;
+}
+
static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
unsigned long arg4, unsigned long arg5)
{
+ unsigned long current_bits;
+
if (arg3 || arg4 || arg5)
return -EINVAL;
- if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN))
+ if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT))
+ return -EINVAL;
+
+ /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */
+ if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
return -EINVAL;
+ current_bits = get_current_mdwe();
+ if (current_bits && current_bits != bits)
+ return -EPERM; /* Cannot unset the flags */
+
+ if (bits & PR_MDWE_NO_INHERIT)
+ set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags);
if (bits & PR_MDWE_REFUSE_EXEC_GAIN)
set_bit(MMF_HAS_MDWE, ¤t->mm->flags);
- else if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
- return -EPERM; /* Cannot unset the flag */
return 0;
}
@@ -2384,9 +2406,7 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3,
{
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
-
- return test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ?
- PR_MDWE_REFUSE_EXEC_GAIN : 0;
+ return (int)get_current_mdwe();
}
static int prctl_get_auxv(void __user *addr, unsigned long len)
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 6e9af6cbc950..dacbe824e7c3 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -284,6 +284,7 @@ struct prctl_mm_map {
/* Memory deny write / execute */
#define PR_SET_MDWE 65
# define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0)
+# define PR_MDWE_NO_INHERIT (1UL << 1)
#define PR_GET_MDWE 66
--
2.41.0.255.g8b1d071c50-goog
On Tue, Jul 04, 2023 at 05:36:28PM +0200, Florent Revest wrote:
> static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
> unsigned long arg4, unsigned long arg5)
> {
> + unsigned long current_bits;
> +
> if (arg3 || arg4 || arg5)
> return -EINVAL;
>
> - if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN))
> + if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT))
> + return -EINVAL;
> +
> + /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */
> + if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
> return -EINVAL;
>
> + current_bits = get_current_mdwe();
> + if (current_bits && current_bits != bits)
> + return -EPERM; /* Cannot unset the flags */
> +
> + if (bits & PR_MDWE_NO_INHERIT)
> + set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags);
> if (bits & PR_MDWE_REFUSE_EXEC_GAIN)
> set_bit(MMF_HAS_MDWE, ¤t->mm->flags);
> - else if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
> - return -EPERM; /* Cannot unset the flag */
>
> return 0;
> }
This works for me, it's easier to read. I don't expect a use-case where
the app starts with MDWE no-inherit, forks some new processes and wants
to turn inherit on for further forking.
> @@ -2384,9 +2406,7 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3,
> {
> if (arg2 || arg3 || arg4 || arg5)
> return -EINVAL;
> -
> - return test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ?
> - PR_MDWE_REFUSE_EXEC_GAIN : 0;
> + return (int)get_current_mdwe();
Nitpick: the type conversion should be handled by the compiler as
prctl_get_mdwe() returns an int already.
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
On Sun, Aug 27, 2023 at 3:10 PM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> On Tue, Jul 04, 2023 at 05:36:28PM +0200, Florent Revest wrote:
> > @@ -2384,9 +2406,7 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3,
> > {
> > if (arg2 || arg3 || arg4 || arg5)
> > return -EINVAL;
> > -
> > - return test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ?
> > - PR_MDWE_REFUSE_EXEC_GAIN : 0;
> > + return (int)get_current_mdwe();
>
> Nitpick: the type conversion should be handled by the compiler as
> prctl_get_mdwe() returns an int already.
Ah yes. Not sure why I added this one... :) thank you!
On Tue, Jul 04, 2023 at 05:36:28PM +0200, Florent Revest wrote:
> This extends the current PR_SET_MDWE prctl arg with a bit to indicate
> that the process doesn't want MDWE protection to propagate to children.
>
> To implement this no-inherit mode, the tag in current->mm->flags must be
> absent from MMF_INIT_MASK. This means that the encoding for "MDWE but
> without inherit" is different in the prctl than in the mm flags. This
> leads to a bit of bit-mangling in the prctl implementation.
>
> Signed-off-by: Florent Revest <revest@chromium.org>
> ---
> include/linux/sched/coredump.h | 10 ++++++++++
> include/uapi/linux/prctl.h | 1 +
> kernel/fork.c | 2 +-
> kernel/sys.c | 32 ++++++++++++++++++++++++++------
> tools/include/uapi/linux/prctl.h | 1 +
> 5 files changed, 39 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
> index 0ee96ea7a0e9..1b37fa8fc723 100644
> --- a/include/linux/sched/coredump.h
> +++ b/include/linux/sched/coredump.h
> @@ -91,4 +91,14 @@ static inline int get_dumpable(struct mm_struct *mm)
> MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK)
>
> #define MMF_VM_MERGE_ANY 29
> +#define MMF_HAS_MDWE_NO_INHERIT 30
> +
> +static inline unsigned long mmf_init_flags(unsigned long flags)
> +{
> + if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT))
> + flags &= ~((1UL << MMF_HAS_MDWE) |
> + (1UL << MMF_HAS_MDWE_NO_INHERIT));
> + return flags & MMF_INIT_MASK;
> +}
> +
> #endif /* _LINUX_SCHED_COREDUMP_H */
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index 6e9af6cbc950..dacbe824e7c3 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -284,6 +284,7 @@ struct prctl_mm_map {
> /* Memory deny write / execute */
> #define PR_SET_MDWE 65
> # define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0)
> +# define PR_MDWE_NO_INHERIT (1UL << 1)
>
> #define PR_GET_MDWE 66
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index d17995934eb4..bc3c762d378f 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1284,7 +1284,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
> hugetlb_count_init(mm);
>
> if (current->mm) {
> - mm->flags = current->mm->flags & MMF_INIT_MASK;
> + mm->flags = mmf_init_flags(current->mm->flags);
> mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
> } else {
> mm->flags = default_dump_filter;
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 339fee3eff6a..1a2dc3da43ea 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -2362,19 +2362,41 @@ static int prctl_set_vma(unsigned long opt, unsigned long start,
> }
> #endif /* CONFIG_ANON_VMA_NAME */
>
> +static inline unsigned long get_current_mdwe(void)
> +{
> + unsigned long ret = 0;
> +
> + if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
> + ret |= PR_MDWE_REFUSE_EXEC_GAIN;
> + if (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags))
> + ret |= PR_MDWE_NO_INHERIT;
> +
> + return ret;
> +}
> +
> static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
> unsigned long arg4, unsigned long arg5)
> {
> + unsigned long current_bits;
> +
> if (arg3 || arg4 || arg5)
> return -EINVAL;
>
> - if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN))
> + if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT))
> + return -EINVAL;
> +
> + /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */
> + if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
> return -EINVAL;
>
> + current_bits = get_current_mdwe();
> + if (current_bits && current_bits != bits)
> + return -EPERM; /* Cannot unset the flags */
I was pondering why PR_MDWE_NO_INHERIT can't be unset, but I guess it
doesn't matter. Anything forked with have it off, and any process
wanting to launch stuff before locking down can just skip running the
prctl() until later.
> +
> + if (bits & PR_MDWE_NO_INHERIT)
> + set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags);
> if (bits & PR_MDWE_REFUSE_EXEC_GAIN)
> set_bit(MMF_HAS_MDWE, ¤t->mm->flags);
> - else if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
> - return -EPERM; /* Cannot unset the flag */
>
> return 0;
> }
> @@ -2384,9 +2406,7 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3,
> {
> if (arg2 || arg3 || arg4 || arg5)
> return -EINVAL;
> -
> - return test_bit(MMF_HAS_MDWE, ¤t->mm->flags) ?
> - PR_MDWE_REFUSE_EXEC_GAIN : 0;
> + return (int)get_current_mdwe();
> }
>
> static int prctl_get_auxv(void __user *addr, unsigned long len)
> diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
> index 6e9af6cbc950..dacbe824e7c3 100644
> --- a/tools/include/uapi/linux/prctl.h
> +++ b/tools/include/uapi/linux/prctl.h
> @@ -284,6 +284,7 @@ struct prctl_mm_map {
> /* Memory deny write / execute */
> #define PR_SET_MDWE 65
> # define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0)
> +# define PR_MDWE_NO_INHERIT (1UL << 1)
>
> #define PR_GET_MDWE 66
>
> --
> 2.41.0.255.g8b1d071c50-goog
>
Reviewed-by: Kees Cook <keescook@chromium.org>
--
Kees Cook
On Fri, Aug 25, 2023 at 03:38:36PM -0700, Kees Cook wrote:
> On Tue, Jul 04, 2023 at 05:36:28PM +0200, Florent Revest wrote:
> > static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
> > unsigned long arg4, unsigned long arg5)
> > {
> > + unsigned long current_bits;
> > +
> > if (arg3 || arg4 || arg5)
> > return -EINVAL;
> >
> > - if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN))
> > + if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT))
> > + return -EINVAL;
> > +
> > + /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */
> > + if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
> > return -EINVAL;
> >
> > + current_bits = get_current_mdwe();
> > + if (current_bits && current_bits != bits)
> > + return -EPERM; /* Cannot unset the flags */
>
> I was pondering why PR_MDWE_NO_INHERIT can't be unset, but I guess it
> doesn't matter. Anything forked with have it off, and any process
> wanting to launch stuff before locking down can just skip running the
> prctl() until later.
I had a similar doubt initially but then realised that the no-inherit
mode won't be inherited and concluded it's ok.
--
Catalin
On Sun, Aug 27, 2023 at 3:09 PM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> On Fri, Aug 25, 2023 at 03:38:36PM -0700, Kees Cook wrote:
> > On Tue, Jul 04, 2023 at 05:36:28PM +0200, Florent Revest wrote:
> > > static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
> > > unsigned long arg4, unsigned long arg5)
> > > {
> > > + unsigned long current_bits;
> > > +
> > > if (arg3 || arg4 || arg5)
> > > return -EINVAL;
> > >
> > > - if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN))
> > > + if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT))
> > > + return -EINVAL;
> > > +
> > > + /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */
> > > + if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
> > > return -EINVAL;
> > >
> > > + current_bits = get_current_mdwe();
> > > + if (current_bits && current_bits != bits)
> > > + return -EPERM; /* Cannot unset the flags */
> >
> > I was pondering why PR_MDWE_NO_INHERIT can't be unset, but I guess it
> > doesn't matter. Anything forked with have it off, and any process
> > wanting to launch stuff before locking down can just skip running the
> > prctl() until later.
>
> I had a similar doubt initially but then realised that the no-inherit
> mode won't be inherited and concluded it's ok.
Indeed. We previously discussed that in
https://lore.kernel.org/all/CABRcYmLt2KsCoD8WzyCTxuY=6ppuWEqyLSDRXSsmXSxPLHtEzQ@mail.gmail.com/
and I agreed this doesn't matter for our use case and this keeps the
code a lot more maintainable :)
© 2016 - 2026 Red Hat, Inc.