The mm->saved_auxv array stores the auxiliary vector, which can be
modified via prctl(PR_SET_MM_AUXV) or prctl(PR_SET_MM_MAP). Previously,
accesses to saved_auxv were not synchronized. This was a intentional
trade-off, as the vector was only used to provide information to
userspace via /proc/PID/auxv or prctl(PR_GET_AUXV), and consistency
between the auxv values left to userspace.
With the introduction of hardware capability (HWCAP) inheritance during
execve, the kernel now relies on the contents of saved_auxv to configure
the execution environment of new processes. An unsynchronized read
during execve could result in a new process inheriting an inconsistent
set of capabilities if the parent process updates its auxiliary vector
concurrently.
While it is still not strictly required to guarantee the consistency of
auxv values on the kernel side, doing so is relatively straightforward.
This change implements synchronization using arg_lock.
Signed-off-by: Andrei Vagin <avagin@google.com>
---
fs/exec.c | 8 ++++++--
fs/proc/base.c | 12 +++++++++---
kernel/fork.c | 7 ++++++-
kernel/sys.c | 29 ++++++++++++++---------------
4 files changed, 35 insertions(+), 21 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c
index 7401efbe4ba0..d7e3ad8c8051 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1793,6 +1793,7 @@ static int bprm_execve(struct linux_binprm *bprm)
static void inherit_hwcap(struct linux_binprm *bprm)
{
+ struct mm_struct *mm = current->mm;
int i, n;
#ifdef ELF_HWCAP4
@@ -1805,10 +1806,12 @@ static void inherit_hwcap(struct linux_binprm *bprm)
n = 1;
#endif
+ spin_lock(&mm->arg_lock);
for (i = 0; n && i < AT_VECTOR_SIZE; i += 2) {
- long val = current->mm->saved_auxv[i + 1];
+ unsigned long type = mm->saved_auxv[i];
+ unsigned long val = mm->saved_auxv[i + 1];
- switch (current->mm->saved_auxv[i]) {
+ switch (type) {
case AT_NULL:
goto done;
case AT_HWCAP:
@@ -1835,6 +1838,7 @@ static void inherit_hwcap(struct linux_binprm *bprm)
n--;
}
done:
+ spin_unlock(&mm->arg_lock);
mm_flags_set(MMF_USER_HWCAP, bprm->mm);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4eec684baca9..09d887741268 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1083,14 +1083,20 @@ static ssize_t auxv_read(struct file *file, char __user *buf,
{
struct mm_struct *mm = file->private_data;
unsigned int nwords = 0;
+ unsigned long saved_auxv[AT_VECTOR_SIZE];
if (!mm)
return 0;
+
+ spin_lock(&mm->arg_lock);
+ memcpy(saved_auxv, mm->saved_auxv, sizeof(saved_auxv));
+ spin_unlock(&mm->arg_lock);
+
do {
nwords += 2;
- } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
- return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
- nwords * sizeof(mm->saved_auxv[0]));
+ } while (saved_auxv[nwords - 2] != 0); /* AT_NULL */
+ return simple_read_from_buffer(buf, count, ppos, saved_auxv,
+ nwords * sizeof(saved_auxv[0]));
}
static const struct file_operations proc_auxv_operations = {
diff --git a/kernel/fork.c b/kernel/fork.c
index 0091315643de..c0a3dd94df22 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1104,8 +1104,13 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
__mm_flags_overwrite_word(mm, mmf_init_legacy_flags(flags));
mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
- if (mm_flags_test(MMF_USER_HWCAP, current->mm))
+ if (mm_flags_test(MMF_USER_HWCAP, current->mm)) {
+ spin_lock(¤t->mm->arg_lock);
mm_flags_set(MMF_USER_HWCAP, mm);
+ memcpy(mm->saved_auxv, current->mm->saved_auxv,
+ sizeof(mm->saved_auxv));
+ spin_unlock(¤t->mm->arg_lock);
+ }
} else {
__mm_flags_overwrite_word(mm, default_dump_filter);
mm->def_flags = 0;
diff --git a/kernel/sys.c b/kernel/sys.c
index 6fbd7be21a5f..eafb5f75cb5c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2147,20 +2147,11 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
mm->arg_end = prctl_map.arg_end;
mm->env_start = prctl_map.env_start;
mm->env_end = prctl_map.env_end;
- spin_unlock(&mm->arg_lock);
-
- /*
- * Note this update of @saved_auxv is lockless thus
- * if someone reads this member in procfs while we're
- * updating -- it may get partly updated results. It's
- * known and acceptable trade off: we leave it as is to
- * not introduce additional locks here making the kernel
- * more complex.
- */
if (prctl_map.auxv_size) {
- memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
mm_flags_set(MMF_USER_HWCAP, current->mm);
+ memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
}
+ spin_unlock(&mm->arg_lock);
mmap_read_unlock(mm);
return 0;
@@ -2190,10 +2181,10 @@ static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,
BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
- task_lock(current);
- memcpy(mm->saved_auxv, user_auxv, len);
+ spin_lock(&mm->arg_lock);
mm_flags_set(MMF_USER_HWCAP, current->mm);
- task_unlock(current);
+ memcpy(mm->saved_auxv, user_auxv, len);
+ spin_unlock(&mm->arg_lock);
return 0;
}
@@ -2466,9 +2457,17 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3,
static int prctl_get_auxv(void __user *addr, unsigned long len)
{
struct mm_struct *mm = current->mm;
+ unsigned long auxv[AT_VECTOR_SIZE];
unsigned long size = min_t(unsigned long, sizeof(mm->saved_auxv), len);
- if (size && copy_to_user(addr, mm->saved_auxv, size))
+ if (!size)
+ return sizeof(mm->saved_auxv);
+
+ spin_lock(&mm->arg_lock);
+ memcpy(auxv, mm->saved_auxv, size);
+ spin_unlock(&mm->arg_lock);
+
+ if (copy_to_user(addr, auxv, size))
return -EFAULT;
return sizeof(mm->saved_auxv);
}
--
2.53.0.239.g8d8fc8a987-goog
© 2016 - 2026 Red Hat, Inc.