Create a new robust_list() syscall. The current syscall can't be
expanded to cover the following use case, so a new one is needed. This
new syscall allows users to set multiple robust lists per process and to
have either 32bit or 64bit pointers in the list.
* Interface
This is the proposed interface:
long set_robust_list2(void *head, int index, unsigned int flags)
`head` is the head of the userspace struct robust_list_head, just as old
set_robust_list(). It needs to be a void pointer since it can point to a
normal robust_list_head or a compat_robust_list_head.
`flags` can be used for defining the list type:
enum robust_list_type {
ROBUST_LIST_32BIT,
ROBUST_LIST_64BIT,
};
`index` is the index in the internal robust_list's linked list (the
naming starts to get confusing, I reckon). If `index == -1`, that means
that user wants to set a new robust_list, and the kernel will append it
in the end of the list, assign a new index and return this index to the
user. If `index >= 0`, that means that user wants to re-set `*head` of
an already existing list (similarly to what happens when you call
set_robust_list() twice with different `*head`).
If `index` is out of range, or it points to a non-existing robust_list,
or if the internal list is full, an error is returned.
User cannot remove lists.
Signed-off-by: André Almeida <andrealmeid@igalia.com>
---
include/linux/futex.h | 1 +
include/linux/sched.h | 1 +
include/uapi/asm-generic/unistd.h | 5 +-
include/uapi/linux/futex.h | 24 +++++++++
init/init_task.c | 3 ++
kernel/futex/core.c | 85 ++++++++++++++++++++++++++++---
kernel/futex/futex.h | 3 ++
kernel/futex/syscalls.c | 36 +++++++++++++
8 files changed, 149 insertions(+), 9 deletions(-)
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 8217b5ebdd9c..997fe0013bc0 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -76,6 +76,7 @@ static inline void futex_init_task(struct task_struct *tsk)
#ifdef CONFIG_COMPAT
tsk->compat_robust_list = NULL;
#endif
+ INIT_LIST_HEAD(&tsk->robust_list2);
INIT_LIST_HEAD(&tsk->pi_state_list);
tsk->pi_state_cache = NULL;
tsk->futex_state = FUTEX_STATE_OK;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8f20b703557d..4a2455f1b07c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1284,6 +1284,7 @@ struct task_struct {
#ifdef CONFIG_COMPAT
struct robust_list_head32 __user *compat_robust_list;
#endif
+ struct list_head robust_list2;
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
struct mutex futex_exit_mutex;
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 5bf6148cac2b..c1f5c9635c07 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -841,8 +841,11 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
#define __NR_mseal 462
__SYSCALL(__NR_mseal, sys_mseal)
+#define __NR_set_robust_list2 463
+__SYSCALL(__NR_set_robust_list2, sys_set_robust_list2)
+
#undef __NR_syscalls
-#define __NR_syscalls 463
+#define __NR_syscalls 464
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index d2ee625ea189..13903a278b71 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -146,6 +146,30 @@ struct robust_list_head {
struct robust_list __user *list_op_pending;
};
+#define ROBUST_LISTS_PER_TASK 10
+
+enum robust_list2_type {
+ ROBUST_LIST_32BIT,
+ ROBUST_LIST_64BIT,
+};
+
+#define ROBUST_LIST_TYPE_MASK (ROBUST_LIST_32BIT | ROBUST_LIST_64BIT)
+
+/*
+ * This is an entry of a linked list of robust lists.
+ *
+ * @head: can point to a 64bit list or a 32bit list
+ * @list_type: determine the size of the futex pointers in the list
+ * @index: the index of this entry in the list
+ * @list: linked list element
+ */
+struct robust_list2_entry {
+ void __user *head;
+ enum robust_list2_type list_type;
+ unsigned int index;
+ struct list_head list;
+};
+
/*
* Are there any waiters for this robust futex:
*/
diff --git a/init/init_task.c b/init/init_task.c
index 136a8231355a..1b08e745c47d 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -219,6 +219,9 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
#ifdef CONFIG_SECCOMP_FILTER
.seccomp = { .filter_count = ATOMIC_INIT(0) },
#endif
+#ifdef CONFIG_FUTEX
+ .robust_list2 = LIST_HEAD_INIT(init_task.robust_list2),
+#endif
};
EXPORT_SYMBOL(init_task);
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index bcd0e2a7ba65..f74476d0bcc1 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -797,9 +797,9 @@ static inline int fetch_robust_entry(struct robust_list __user **entry,
*
* We silently return on any sign of list-walking problem.
*/
-static void exit_robust_list64(struct task_struct *curr)
+static void exit_robust_list64(struct task_struct *curr,
+ struct robust_list_head __user *head)
{
- struct robust_list_head __user *head = curr->robust_list;
struct robust_list __user *entry, *next_entry, *pending;
unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
unsigned int next_pi;
@@ -859,7 +859,8 @@ static void exit_robust_list64(struct task_struct *curr)
}
}
#else
-static void exit_robust_list64(struct task_struct *curr)
+static void exit_robust_list64(struct task_struct *curr,
+ struct robust_list_head __user *head)
{
pr_warn("32bit kernel should not allow ROBUST_LIST_64BIT");
return;
@@ -897,9 +898,9 @@ fetch_robust_entry32(u32 *uentry, struct robust_list __user **entry,
*
* We silently return on any sign of list-walking problem.
*/
-static void exit_robust_list32(struct task_struct *curr)
+static void exit_robust_list32(struct task_struct *curr,
+ struct robust_list_head32 __user *head)
{
- struct robust_list_head32 __user *head = curr->compat_robust_list;
struct robust_list __user *entry, *next_entry, *pending;
unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
unsigned int next_pi;
@@ -965,6 +966,54 @@ static void exit_robust_list32(struct task_struct *curr)
}
}
+long do_set_robust_list2(struct robust_list_head __user *head,
+ int index, unsigned int type)
+{
+ struct list_head *list2 = ¤t->robust_list2;
+ struct robust_list2_entry *prev, *new = NULL;
+
+ if (index == -1) {
+ if (list_empty(list2)) {
+ index = 0;
+ } else {
+ prev = list_last_entry(list2, struct robust_list2_entry, list);
+ index = prev->index + 1;
+ }
+
+ if (index >= ROBUST_LISTS_PER_TASK)
+ return -EINVAL;
+
+ new = kmalloc(sizeof(struct robust_list2_entry), GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ list_add_tail(&new->list, list2);
+ new->index = index;
+
+ } else if (index >= 0) {
+ struct robust_list2_entry *curr;
+
+ if (list_empty(list2))
+ return -ENOENT;
+
+ list_for_each_entry(curr, list2, list) {
+ if (index == curr->index) {
+ new = curr;
+ break;
+ }
+ }
+
+ if (!new)
+ return -ENOENT;
+ }
+
+ BUG_ON(!new);
+ new->head = head;
+ new->list_type = type;
+
+ return index;
+}
+
#ifdef CONFIG_FUTEX_PI
/*
@@ -1046,24 +1095,44 @@ static inline void exit_pi_state_list(struct task_struct *curr) { }
static void futex_cleanup(struct task_struct *tsk)
{
+ struct robust_list2_entry *curr, *n;
+ struct list_head *list2 = &tsk->robust_list2;
+
#ifdef CONFIG_64BIT
if (unlikely(tsk->robust_list)) {
- exit_robust_list64(tsk);
+ exit_robust_list64(tsk, tsk->robust_list);
tsk->robust_list = NULL;
}
#else
if (unlikely(tsk->robust_list)) {
- exit_robust_list32(tsk);
+ exit_robust_list32(tsk, (struct robust_list_head32 *) tsk->robust_list);
tsk->robust_list = NULL;
}
#endif
#ifdef CONFIG_COMPAT
if (unlikely(tsk->compat_robust_list)) {
- exit_robust_list32(tsk);
+ exit_robust_list32(tsk, tsk->compat_robust_list);
tsk->compat_robust_list = NULL;
}
#endif
+ /*
+ * Walk through the linked list, parsing robust lists and freeing the
+ * allocated lists
+ */
+ if (unlikely(!list_empty(list2))) {
+ list_for_each_entry_safe(curr, n, list2, list) {
+ if (curr->head != NULL) {
+ if (curr->list_type == ROBUST_LIST_64BIT)
+ exit_robust_list64(tsk, curr->head);
+ else if (curr->list_type == ROBUST_LIST_32BIT)
+ exit_robust_list32(tsk, curr->head);
+ curr->head = NULL;
+ }
+ list_del_init(&curr->list);
+ kfree(curr);
+ }
+ }
if (unlikely(!list_empty(&tsk->pi_state_list)))
exit_pi_state_list(tsk);
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 8b195d06f4e8..7247d5c583d5 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -349,6 +349,9 @@ extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
ktime_t *abs_time, u32 bitset);
+extern long do_set_robust_list2(struct robust_list_head __user *head,
+ int index, unsigned int type);
+
/**
* struct futex_vector - Auxiliary struct for futex_waitv()
* @w: Userspace provided data
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
index dba193dfd216..ff61570bb9c8 100644
--- a/kernel/futex/syscalls.c
+++ b/kernel/futex/syscalls.c
@@ -39,6 +39,42 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
return 0;
}
+#define ROBUST_LIST_FLAGS ROBUST_LIST_TYPE_MASK
+
+/*
+ * sys_set_robust_list2()
+ *
+ * When index == -1, create a new list for user. When index >= 0, try to find
+ * the corresponding list and re-set the head there.
+ *
+ * Return values:
+ * >= 0: success, index of the robust list
+ * -EINVAL: invalid flags, invalid index
+ * -ENOENT: requested index no where to be found
+ * -ENOMEM: error allocating new list
+ * -ESRCH: too many allocated lists
+ */
+SYSCALL_DEFINE3(set_robust_list2, struct robust_list_head __user *, head,
+ int, index, unsigned int, flags)
+{
+ unsigned int type;
+
+ type = flags & ROBUST_LIST_TYPE_MASK;
+
+ if (index < -1 || index >= ROBUST_LISTS_PER_TASK)
+ return -EINVAL;
+
+ if ((flags & ~ROBUST_LIST_FLAGS) != 0)
+ return -EINVAL;
+
+#ifndef CONFIG_64BIT
+ if (type == ROBUST_LIST_64BIT)
+ return -EINVAL;
+#endif
+
+ return do_set_robust_list2(head, index, type);
+}
+
/**
* sys_get_robust_list() - Get the robust-futex list head of a task
* @pid: pid of the process [zero for current task]
--
2.47.0
On Fri, Nov 01, 2024 at 01:21:46PM -0300, André Almeida wrote: > @@ -1046,24 +1095,44 @@ static inline void exit_pi_state_list(struct task_struct *curr) { } > > static void futex_cleanup(struct task_struct *tsk) > { > + struct robust_list2_entry *curr, *n; > + struct list_head *list2 = &tsk->robust_list2; > + > #ifdef CONFIG_64BIT > if (unlikely(tsk->robust_list)) { > - exit_robust_list64(tsk); > + exit_robust_list64(tsk, tsk->robust_list); > tsk->robust_list = NULL; > } > #else > if (unlikely(tsk->robust_list)) { > - exit_robust_list32(tsk); > + exit_robust_list32(tsk, (struct robust_list_head32 *) tsk->robust_list); > tsk->robust_list = NULL; > } > #endif > > #ifdef CONFIG_COMPAT > if (unlikely(tsk->compat_robust_list)) { > - exit_robust_list32(tsk); > + exit_robust_list32(tsk, tsk->compat_robust_list); > tsk->compat_robust_list = NULL; > } > #endif > + /* > + * Walk through the linked list, parsing robust lists and freeing the > + * allocated lists > + */ > + if (unlikely(!list_empty(list2))) { > + list_for_each_entry_safe(curr, n, list2, list) { > + if (curr->head != NULL) { > + if (curr->list_type == ROBUST_LIST_64BIT) > + exit_robust_list64(tsk, curr->head); > + else if (curr->list_type == ROBUST_LIST_32BIT) > + exit_robust_list32(tsk, curr->head); > + curr->head = NULL; > + } > + list_del_init(&curr->list); > + kfree(curr); > + } > + } > > if (unlikely(!list_empty(&tsk->pi_state_list))) > exit_pi_state_list(tsk); I'm still digesting this, but the above seems particularly silly. Should not the legacy lists also be on the list of lists? I mean, it makes no sense to have two completely separate means of tracking lists.
Hi Peter, Em 04/11/2024 08:22, Peter Zijlstra escreveu: > On Fri, Nov 01, 2024 at 01:21:46PM -0300, André Almeida wrote: >> @@ -1046,24 +1095,44 @@ static inline void exit_pi_state_list(struct task_struct *curr) { } >> >> static void futex_cleanup(struct task_struct *tsk) >> { >> + struct robust_list2_entry *curr, *n; >> + struct list_head *list2 = &tsk->robust_list2; >> + >> #ifdef CONFIG_64BIT >> if (unlikely(tsk->robust_list)) { >> - exit_robust_list64(tsk); >> + exit_robust_list64(tsk, tsk->robust_list); >> tsk->robust_list = NULL; >> } >> #else >> if (unlikely(tsk->robust_list)) { >> - exit_robust_list32(tsk); >> + exit_robust_list32(tsk, (struct robust_list_head32 *) tsk->robust_list); >> tsk->robust_list = NULL; >> } >> #endif >> >> #ifdef CONFIG_COMPAT >> if (unlikely(tsk->compat_robust_list)) { >> - exit_robust_list32(tsk); >> + exit_robust_list32(tsk, tsk->compat_robust_list); >> tsk->compat_robust_list = NULL; >> } >> #endif >> + /* >> + * Walk through the linked list, parsing robust lists and freeing the >> + * allocated lists >> + */ >> + if (unlikely(!list_empty(list2))) { >> + list_for_each_entry_safe(curr, n, list2, list) { >> + if (curr->head != NULL) { >> + if (curr->list_type == ROBUST_LIST_64BIT) >> + exit_robust_list64(tsk, curr->head); >> + else if (curr->list_type == ROBUST_LIST_32BIT) >> + exit_robust_list32(tsk, curr->head); >> + curr->head = NULL; >> + } >> + list_del_init(&curr->list); >> + kfree(curr); >> + } >> + } >> >> if (unlikely(!list_empty(&tsk->pi_state_list))) >> exit_pi_state_list(tsk); > > I'm still digesting this, but the above seems particularly silly. > > Should not the legacy lists also be on the list of lists? I mean, it > makes no sense to have two completely separate means of tracking lists. > You are asking if, whenever someone calls set_robust_list() or compat_set_robust_list() to be inserted into ¤t->robust_list2 instead of using tsk->robust_list and tsk->compat_robust_list? I was thinking of doing that, but my current implementation has a kmalloc() call for every insertion, and I wasn't sure if I could add this new latency to the old set_robust_list() syscall. Assuming it is usually called just once during the thread initialization perhaps it shouldn't cause much harm I guess.
On Mon, Nov 04, 2024 at 06:55:45PM -0300, André Almeida wrote: > Hi Peter, > > Em 04/11/2024 08:22, Peter Zijlstra escreveu: > > On Fri, Nov 01, 2024 at 01:21:46PM -0300, André Almeida wrote: > > > @@ -1046,24 +1095,44 @@ static inline void exit_pi_state_list(struct task_struct *curr) { } > > > static void futex_cleanup(struct task_struct *tsk) > > > { > > > + struct robust_list2_entry *curr, *n; > > > + struct list_head *list2 = &tsk->robust_list2; > > > + > > > #ifdef CONFIG_64BIT > > > if (unlikely(tsk->robust_list)) { > > > - exit_robust_list64(tsk); > > > + exit_robust_list64(tsk, tsk->robust_list); > > > tsk->robust_list = NULL; > > > } > > > #else > > > if (unlikely(tsk->robust_list)) { > > > - exit_robust_list32(tsk); > > > + exit_robust_list32(tsk, (struct robust_list_head32 *) tsk->robust_list); > > > tsk->robust_list = NULL; > > > } > > > #endif > > > #ifdef CONFIG_COMPAT > > > if (unlikely(tsk->compat_robust_list)) { > > > - exit_robust_list32(tsk); > > > + exit_robust_list32(tsk, tsk->compat_robust_list); > > > tsk->compat_robust_list = NULL; > > > } > > > #endif > > > + /* > > > + * Walk through the linked list, parsing robust lists and freeing the > > > + * allocated lists > > > + */ > > > + if (unlikely(!list_empty(list2))) { > > > + list_for_each_entry_safe(curr, n, list2, list) { > > > + if (curr->head != NULL) { > > > + if (curr->list_type == ROBUST_LIST_64BIT) > > > + exit_robust_list64(tsk, curr->head); > > > + else if (curr->list_type == ROBUST_LIST_32BIT) > > > + exit_robust_list32(tsk, curr->head); > > > + curr->head = NULL; > > > + } > > > + list_del_init(&curr->list); > > > + kfree(curr); > > > + } > > > + } > > > if (unlikely(!list_empty(&tsk->pi_state_list))) > > > exit_pi_state_list(tsk); > > > > I'm still digesting this, but the above seems particularly silly. > > > > Should not the legacy lists also be on the list of lists? I mean, it > > makes no sense to have two completely separate means of tracking lists. > > > > You are asking if, whenever someone calls set_robust_list() or > compat_set_robust_list() to be inserted into ¤t->robust_list2 instead > of using tsk->robust_list and tsk->compat_robust_list? Yes, that.
Hi André, kernel test robot noticed the following build warnings: [auto build test WARNING on tip/locking/core] [also build test WARNING on tip/sched/core linus/master v6.12-rc5 next-20241101] [cannot apply to tip/x86/asm] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Andr-Almeida/futex-Use-explicit-sizes-for-compat_exit_robust_list/20241102-002419 base: tip/locking/core patch link: https://lore.kernel.org/r/20241101162147.284993-3-andrealmeid%40igalia.com patch subject: [PATCH v2 2/3] futex: Create set_robust_list2 config: i386-randconfig-062-20241102 (https://download.01.org/0day-ci/archive/20241103/202411030038.W5DgvCYP-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241103/202411030038.W5DgvCYP-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202411030038.W5DgvCYP-lkp@intel.com/ sparse warnings: (new ones prefixed by >>) kernel/futex/core.c:915:59: sparse: sparse: cast removes address space '__user' of expression kernel/futex/core.c:915:59: sparse: sparse: incorrect type in argument 3 (different address spaces) @@ expected unsigned int [noderef] [usertype] __user *head @@ got unsigned int [usertype] * @@ kernel/futex/core.c:915:59: sparse: expected unsigned int [noderef] [usertype] __user *head kernel/futex/core.c:915:59: sparse: got unsigned int [usertype] * kernel/futex/core.c:1108:42: sparse: sparse: cast removes address space '__user' of expression >> kernel/futex/core.c:1108:42: sparse: sparse: incorrect type in argument 2 (different address spaces) @@ expected struct robust_list_head32 [noderef] __user *head @@ got struct robust_list_head32 * @@ kernel/futex/core.c:1108:42: sparse: expected struct robust_list_head32 [noderef] __user *head kernel/futex/core.c:1108:42: sparse: got struct robust_list_head32 * kernel/futex/core.c: note: in included file (through include/linux/smp.h, include/linux/alloc_tag.h, include/linux/percpu.h, ...): include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true vim +1108 kernel/futex/core.c 1095 1096 static void futex_cleanup(struct task_struct *tsk) 1097 { 1098 struct robust_list2_entry *curr, *n; 1099 struct list_head *list2 = &tsk->robust_list2; 1100 1101 #ifdef CONFIG_64BIT 1102 if (unlikely(tsk->robust_list)) { 1103 exit_robust_list64(tsk, tsk->robust_list); 1104 tsk->robust_list = NULL; 1105 } 1106 #else 1107 if (unlikely(tsk->robust_list)) { > 1108 exit_robust_list32(tsk, (struct robust_list_head32 *) tsk->robust_list); 1109 tsk->robust_list = NULL; 1110 } 1111 #endif 1112 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
© 2016 - 2024 Red Hat, Inc.