From nobody Sun Feb 8 21:42:14 2026 Received: from fanzine2.igalia.com (fanzine.igalia.com [178.60.130.6]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9218414830F; Tue, 25 Feb 2025 18:36:09 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=178.60.130.6 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740508572; cv=none; b=QNvJvcWngu9YPqghUuxGbBFw5BxCH2Nzzcgx7WEe4gmzKd0Yp6QAfG444Ig6aqnbqGuz0X+j2TxMfrb1z94qYSL3lXrbcf2Vgz+feQx0/vIy1Zu8EJNSIK7jHYk8EfNAEgVeBty/OflY0loWDvfy7j79GoZmLUDNSmjFb0/DAeQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740508572; c=relaxed/simple; bh=ZSo+xBZn7/Rq+fI9hZ/OOyrmf1GEeDabV54OKUmP20c=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=Tuk7mVEsZOnoiG/ARadfVFPYQrVcRKsD5BATBwtEMEhYbH0CXIgSK56/np20Nhp9+nSRZGkxvtGlbz5lwmoRnyXaZhrlbTYb9pQnPrqrtobHnVB6xqUZC8UAW41wIyd8KUVFT9QR4aVco3JGBL5XXqwBao5WvfYtLuBmX2fwxQU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=igalia.com; spf=pass smtp.mailfrom=igalia.com; dkim=pass (2048-bit key) header.d=igalia.com header.i=@igalia.com header.b=jn151IDE; arc=none smtp.client-ip=178.60.130.6 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=igalia.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=igalia.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=igalia.com header.i=@igalia.com header.b="jn151IDE" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=igalia.com; s=20170329; h=Content-Transfer-Encoding:Content-Type:MIME-Version:References: In-Reply-To:Message-ID:Date:Subject:Cc:To:From:Sender:Reply-To:Content-ID: Content-Description:Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc :Resent-Message-ID:List-Id:List-Help:List-Unsubscribe:List-Subscribe: List-Post:List-Owner:List-Archive; bh=h6w6N6SDGLTrTIua0M4xKjrdL+BORZGDXg6rR8oqoAQ=; b=jn151IDElYsHxUz01TYm4Q82dc lfAFSXG+948p6ZhIXzOLTypYyAocWQyTpeyGNJAOIt3RTwNjVpzrpZfc3X3HchCnxeMsSV+qWbLPb k4vA6KFAQt7SB5sA5S7PGqlyhcWDloqsd76VfMjcq47EW991RglnLFCrFXxVru8w0VHdA6jVXpKMU d6/kAjnsvT4wZVkAs/N2Oq37mdjcb2sxdemqfFd2trs8ulFs7AzCB71+lVsDwfTE4ukAhosaUfM12 TwxKUxwUgko4LaAx+iyL50/KN8kfczrG/bf2tjigQIfT8UHHRcbM1JnHTrmTpZamewQQst8taLnis xmpZPncQ==; Received: from [191.204.194.148] (helo=localhost.localdomain) by fanzine2.igalia.com with esmtpsa (Cipher TLS1.3:ECDHE_X25519__RSA_PSS_RSAE_SHA256__AES_256_GCM:256) (Exim) id 1tmzmK-000WtH-UD; Tue, 25 Feb 2025 19:35:43 +0100 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= To: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Darren Hart , Davidlohr Bueso , Arnd Bergmann , sonicadvance1@gmail.com Cc: linux-kernel@vger.kernel.org, kernel-dev@igalia.com, linux-api@vger.kernel.org, Vinicius Peixoto , Sebastian Andrzej Siewior , =?UTF-8?q?Andr=C3=A9=20Almeida?= Subject: [PATCH v4 1/5] futex: Use explicit sizes for compat_exit_robust_list Date: Tue, 25 Feb 2025 15:35:27 -0300 Message-ID: <20250225183531.682556-2-andrealmeid@igalia.com> X-Mailer: git-send-email 2.48.1 In-Reply-To: <20250225183531.682556-1-andrealmeid@igalia.com> References: <20250225183531.682556-1-andrealmeid@igalia.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable There are two functions for handling robust lists during the task exit: exit_robust_list() and compat_exit_robust_list(). The first one handles either 64bit or 32bit lists, depending if it's a 64bit or 32bit kernel. The compat_exit_robust_list() only exists in 64bit kernels that supports 32bit syscalls, and handles 32bit lists. For the new syscall set_robust_list2(), 64bit kernels need to be able to handle 32bit lists despite having or not support for 32bit syscalls, so make compat_exit_robust_list() exist regardless of compat_ config. Also, use explicitly sizing, otherwise in a 32bit kernel both exit_robust_list() and compat_exit_robust_list() would be the exactly same function, with none of them dealing with 64bit robust lists. Signed-off-by: Andr=C3=A9 Almeida --- include/linux/compat.h | 12 +---------- include/linux/futex.h | 11 +++++++++++ include/linux/sched.h | 2 +- kernel/futex/core.c | 44 ++++++++++++++++++++++++++--------------- kernel/futex/syscalls.c | 4 ++-- 5 files changed, 43 insertions(+), 30 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index 56cebaff0c91..968a9135ff48 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -385,16 +385,6 @@ struct compat_ifconf { compat_caddr_t ifcbuf; }; =20 -struct compat_robust_list { - compat_uptr_t next; -}; - -struct compat_robust_list_head { - struct compat_robust_list list; - compat_long_t futex_offset; - compat_uptr_t list_op_pending; -}; - #ifdef CONFIG_COMPAT_OLD_SIGACTION struct compat_old_sigaction { compat_uptr_t sa_handler; @@ -672,7 +662,7 @@ asmlinkage long compat_sys_waitid(int, compat_pid_t, struct compat_siginfo __user *, int, struct compat_rusage __user *); asmlinkage long -compat_sys_set_robust_list(struct compat_robust_list_head __user *head, +compat_sys_set_robust_list(struct robust_list_head32 __user *head, compat_size_t len); asmlinkage long compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, diff --git a/include/linux/futex.h b/include/linux/futex.h index b70df27d7e85..8217b5ebdd9c 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -53,6 +53,17 @@ union futex_key { #define FUTEX_KEY_INIT (union futex_key) { .both =3D { .ptr =3D 0ULL } } =20 #ifdef CONFIG_FUTEX + +struct robust_list32 { + u32 next; +}; + +struct robust_list_head32 { + struct robust_list32 list; + s32 futex_offset; + u32 list_op_pending; +}; + enum { FUTEX_STATE_OK, FUTEX_STATE_EXITING, diff --git a/include/linux/sched.h b/include/linux/sched.h index 9632e3318e0d..29e500d8d19d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1299,7 +1299,7 @@ struct task_struct { #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT - struct compat_robust_list_head __user *compat_robust_list; + struct robust_list_head32 __user *compat_robust_list; #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 3db8567f5a44..3d81a53c114c 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -769,13 +769,14 @@ static inline int fetch_robust_entry(struct robust_li= st __user **entry, return 0; } =20 +#ifdef CONFIG_64BIT /* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. * * We silently return on any sign of list-walking problem. */ -static void exit_robust_list(struct task_struct *curr) +static void exit_robust_list64(struct task_struct *curr) { struct robust_list_head __user *head =3D curr->robust_list; struct robust_list __user *entry, *next_entry, *pending; @@ -836,8 +837,13 @@ static void exit_robust_list(struct task_struct *curr) curr, pip, HANDLE_DEATH_PENDING); } } +#else +static void exit_robust_list64(struct task_struct *curr) +{ + pr_warn("32bit kernel should not allow ROBUST_LIST_64BIT"); +} +#endif =20 -#ifdef CONFIG_COMPAT static void __user *futex_uaddr(struct robust_list __user *entry, compat_long_t futex_offset) { @@ -851,13 +857,13 @@ static void __user *futex_uaddr(struct robust_list __= user *entry, * Fetch a robust-list pointer. Bit 0 signals PI futexes: */ static inline int -compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user= **entry, - compat_uptr_t __user *head, unsigned int *pi) +fetch_robust_entry32(u32 *uentry, struct robust_list __user **entry, + u32 __user *head, unsigned int *pi) { if (get_user(*uentry, head)) return -EFAULT; =20 - *entry =3D compat_ptr((*uentry) & ~1); + *entry =3D (void __user *)(unsigned long)((*uentry) & ~1); *pi =3D (unsigned int)(*uentry) & 1; =20 return 0; @@ -869,21 +875,21 @@ compat_fetch_robust_entry(compat_uptr_t *uentry, stru= ct robust_list __user **ent * * We silently return on any sign of list-walking problem. */ -static void compat_exit_robust_list(struct task_struct *curr) +static void exit_robust_list32(struct task_struct *curr) { - struct compat_robust_list_head __user *head =3D curr->compat_robust_list; + struct robust_list_head32 __user *head =3D curr->compat_robust_list; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit =3D ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; - compat_uptr_t uentry, next_uentry, upending; - compat_long_t futex_offset; + u32 uentry, next_uentry, upending; + s32 futex_offset; int rc; =20 /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ - if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) + if (fetch_robust_entry32((u32 *)&uentry, &entry, (u32 *)&head->list.next,= &pi)) return; /* * Fetch the relative futex offset: @@ -894,7 +900,7 @@ static void compat_exit_robust_list(struct task_struct = *curr) * Fetch any possibly pending lock-add first, and handle it * if it exists: */ - if (compat_fetch_robust_entry(&upending, &pending, + if (fetch_robust_entry32(&upending, &pending, &head->list_op_pending, &pip)) return; =20 @@ -904,8 +910,8 @@ static void compat_exit_robust_list(struct task_struct = *curr) * Fetch the next entry in the list before calling * handle_futex_death: */ - rc =3D compat_fetch_robust_entry(&next_uentry, &next_entry, - (compat_uptr_t __user *)&entry->next, &next_pi); + rc =3D fetch_robust_entry32(&next_uentry, &next_entry, + (u32 __user *)&entry->next, &next_pi); /* * A pending lock might already be on the list, so * dont process it twice: @@ -936,7 +942,6 @@ static void compat_exit_robust_list(struct task_struct = *curr) handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING); } } -#endif =20 #ifdef CONFIG_FUTEX_PI =20 @@ -1019,14 +1024,21 @@ static inline void exit_pi_state_list(struct task_s= truct *curr) { } =20 static void futex_cleanup(struct task_struct *tsk) { +#ifdef CONFIG_64BIT if (unlikely(tsk->robust_list)) { - exit_robust_list(tsk); + exit_robust_list64(tsk); tsk->robust_list =3D NULL; } +#else + if (unlikely(tsk->robust_list)) { + exit_robust_list32(tsk); + tsk->robust_list =3D NULL; + } +#endif =20 #ifdef CONFIG_COMPAT if (unlikely(tsk->compat_robust_list)) { - compat_exit_robust_list(tsk); + exit_robust_list32(tsk); tsk->compat_robust_list =3D NULL; } #endif diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 4b6da9116aa6..dba193dfd216 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -440,7 +440,7 @@ SYSCALL_DEFINE4(futex_requeue, =20 #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, - struct compat_robust_list_head __user *, head, + struct robust_list_head32 __user *, head, compat_size_t, len) { if (unlikely(len !=3D sizeof(*head))) @@ -455,7 +455,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, compat_uptr_t __user *, head_ptr, compat_size_t __user *, len_ptr) { - struct compat_robust_list_head __user *head; + struct robust_list_head32 __user *head; unsigned long ret; struct task_struct *p; =20 --=20 2.48.1 From nobody Sun Feb 8 21:42:14 2026 Received: from fanzine2.igalia.com (fanzine.igalia.com [178.60.130.6]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CEE56189F57; Tue, 25 Feb 2025 18:36:09 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=178.60.130.6 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740508573; cv=none; b=gFL71+Q9MAr1WBptP/xyr/tajqk8aWiOzx7SS8U7apbNeIJ3Vd4egLsiSS7b+9eoNgr0pS5yiGsXEDhBiDtd96pJ/QRA41jqHDFDUaBIZ/AE9YPWKRLtjXr3ComX+5a6Wjgt27wlCUvQMmmU/fXN52BfuXxsh7fTb1V20WhIgJU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740508573; c=relaxed/simple; bh=XprEHY1PKqcJl864Alch3vme7EJ4cRy0TdihnbUJR0k=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=rFO3PW4EF7jwmul83HkZAsaqkAv3Jc7w9ZdI2YIjFM7hZ5lLLvaGtg0ljX3I1dxY68vePYmcnpqRqeJyUUjdi4k9iMaLNifgRFzB6D3uphtV1DotsJ+K24pvSRiwsNPh3GYvYQmadhswcIX6adpz6PvXSXaccAy4cZNDt9JlHKk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=igalia.com; spf=pass smtp.mailfrom=igalia.com; dkim=pass (2048-bit key) header.d=igalia.com header.i=@igalia.com header.b=o9wu0x/Z; arc=none smtp.client-ip=178.60.130.6 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=igalia.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=igalia.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=igalia.com header.i=@igalia.com header.b="o9wu0x/Z" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=igalia.com; s=20170329; h=Content-Transfer-Encoding:Content-Type:MIME-Version:References: In-Reply-To:Message-ID:Date:Subject:Cc:To:From:Sender:Reply-To:Content-ID: Content-Description:Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc :Resent-Message-ID:List-Id:List-Help:List-Unsubscribe:List-Subscribe: List-Post:List-Owner:List-Archive; bh=O4EGBN0wCJHx90+QKkOJ4zGeEp3LzQZKoYRjto0MbmM=; b=o9wu0x/ZYIpoODJP0/c6aqM1Le VHV5kql7FS1nNFtniZAlPpfiTCsx+hSFhX89lvxsCANvB2je1qkltaxQzZZpaqUMqMf/YaZwgMX4b lFnKwIv6s8rLI6M2HFqBAgDhIwQwo6yrSSb1xheSy6n6tlfUcVFaEv5UVB2QL5+6NpTjzxZCArQUu LxobXoYmfmDz/C3BAdwjsTaX06IqRVO2ZAFrquYLPXPJFhaRDRgEZBm5wHqw01Cpn4EcZMJ6I80Ei /DTHOz5H72nUnSAmCz+u2BN0K7FiMrH78Rf/sqButL309V2DOjqoH5RPS6sK7c1vrkVIH/pTU03ns su3wbEhg==; Received: from [191.204.194.148] (helo=localhost.localdomain) by fanzine2.igalia.com with esmtpsa (Cipher TLS1.3:ECDHE_X25519__RSA_PSS_RSAE_SHA256__AES_256_GCM:256) (Exim) id 1tmzmO-000WtH-KQ; Tue, 25 Feb 2025 19:35:47 +0100 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= To: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Darren Hart , Davidlohr Bueso , Arnd Bergmann , sonicadvance1@gmail.com Cc: linux-kernel@vger.kernel.org, kernel-dev@igalia.com, linux-api@vger.kernel.org, Vinicius Peixoto , Sebastian Andrzej Siewior , =?UTF-8?q?Andr=C3=A9=20Almeida?= Subject: [PATCH v4 2/5] futex: Create set_robust_list2 Date: Tue, 25 Feb 2025 15:35:28 -0300 Message-ID: <20250225183531.682556-3-andrealmeid@igalia.com> X-Mailer: git-send-email 2.48.1 In-Reply-To: <20250225183531.682556-1-andrealmeid@igalia.com> References: <20250225183531.682556-1-andrealmeid@igalia.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Create a new robust_list() syscall. The current syscall can't be expanded to cover the following use case, so a new one is needed. This new syscall allows users to set multiple robust lists per process and to have either 32bit or 64bit pointers in the list. * Interface This is the proposed interface: long set_robust_list2(void *head, int index, unsigned int flags) `head` is the head of the userspace struct robust_list_head, just as old set_robust_list(). It needs to be a void pointer since it can point to a normal robust_list_head or a compat_robust_list_head. `flags` can be used for defining the list type: enum robust_list_type { ROBUST_LIST_32BIT, ROBUST_LIST_64BIT, }; `index` is the index in the internal robust_list's linked list (the naming starts to get confusing, I reckon). If `index =3D=3D -1`, that means that user wants to set a new robust_list, and the kernel will append it in the end of the list, assign a new index and return this index to the user. If `index >=3D 0`, that means that user wants to re-set `*head` of an already existing list (similarly to what happens when you call set_robust_list() twice with different `*head`). If `index` is out of range, or it points to a non-existing robust_list, or if the internal list is full, an error is returned. Unaligned `head` addresses are refused by the kernel with -EINVAL. User cannot remove lists. * Implementation The old syscall's set/get_robust_list() are converted to use the linked list as well. When using only the old syscalls user shouldn't any difference as the internal code will handle the linked list insertion as usual. When mixing old and new interfaces users should be aware that one of the elements of the list was created by another syscall and they should have special care handling this element index. On exit, the linked list is parsed and all robust lists regardless of which interface it was used to create them are handled. Signed-off-by: Andr=C3=A9 Almeida --- include/linux/futex.h | 5 +- include/linux/sched.h | 5 +- include/uapi/asm-generic/unistd.h | 4 +- include/uapi/linux/futex.h | 24 +++++++ kernel/futex/core.c | 111 ++++++++++++++++++++++++------ kernel/futex/futex.h | 5 ++ kernel/futex/syscalls.c | 81 ++++++++++++++++++++-- 7 files changed, 205 insertions(+), 30 deletions(-) diff --git a/include/linux/futex.h b/include/linux/futex.h index 8217b5ebdd9c..39335f21aea6 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -72,10 +72,11 @@ enum { =20 static inline void futex_init_task(struct task_struct *tsk) { - tsk->robust_list =3D NULL; + tsk->robust_list_index =3D -1; #ifdef CONFIG_COMPAT - tsk->compat_robust_list =3D NULL; + tsk->compat_robust_list_index =3D -1; #endif + INIT_LIST_HEAD(&tsk->robust_list2); INIT_LIST_HEAD(&tsk->pi_state_list); tsk->pi_state_cache =3D NULL; tsk->futex_state =3D FUTEX_STATE_OK; diff --git a/include/linux/sched.h b/include/linux/sched.h index 29e500d8d19d..903c1aedbe07 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1297,10 +1297,11 @@ struct task_struct { u32 rmid; #endif #ifdef CONFIG_FUTEX - struct robust_list_head __user *robust_list; + int robust_list_index; #ifdef CONFIG_COMPAT - struct robust_list_head32 __user *compat_robust_list; + int compat_robust_list_index; #endif + struct list_head robust_list2; struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; struct mutex futex_exit_mutex; diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/u= nistd.h index 88dc393c2bca..477cce02ed72 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -850,8 +850,10 @@ __SYSCALL(__NR_listxattrat, sys_listxattrat) #define __NR_removexattrat 466 __SYSCALL(__NR_removexattrat, sys_removexattrat) =20 +#define __NR_set_robust_list2 467 + #undef __NR_syscalls -#define __NR_syscalls 467 +#define __NR_syscalls 468 =20 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index d2ee625ea189..13903a278b71 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -146,6 +146,30 @@ struct robust_list_head { struct robust_list __user *list_op_pending; }; =20 +#define ROBUST_LISTS_PER_TASK 10 + +enum robust_list2_type { + ROBUST_LIST_32BIT, + ROBUST_LIST_64BIT, +}; + +#define ROBUST_LIST_TYPE_MASK (ROBUST_LIST_32BIT | ROBUST_LIST_64BIT) + +/* + * This is an entry of a linked list of robust lists. + * + * @head: can point to a 64bit list or a 32bit list + * @list_type: determine the size of the futex pointers in the list + * @index: the index of this entry in the list + * @list: linked list element + */ +struct robust_list2_entry { + void __user *head; + enum robust_list2_type list_type; + unsigned int index; + struct list_head list; +}; + /* * Are there any waiters for this robust futex: */ diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 3d81a53c114c..07a7e5e9bc8d 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -776,9 +776,9 @@ static inline int fetch_robust_entry(struct robust_list= __user **entry, * * We silently return on any sign of list-walking problem. */ -static void exit_robust_list64(struct task_struct *curr) +static void exit_robust_list64(struct task_struct *curr, + struct robust_list_head __user *head) { - struct robust_list_head __user *head =3D curr->robust_list; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit =3D ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; @@ -838,7 +838,8 @@ static void exit_robust_list64(struct task_struct *curr) } } #else -static void exit_robust_list64(struct task_struct *curr) +static void exit_robust_list64(struct task_struct *curr, + struct robust_list_head __user *head) { pr_warn("32bit kernel should not allow ROBUST_LIST_64BIT"); } @@ -875,9 +876,9 @@ fetch_robust_entry32(u32 *uentry, struct robust_list __= user **entry, * * We silently return on any sign of list-walking problem. */ -static void exit_robust_list32(struct task_struct *curr) +static void exit_robust_list32(struct task_struct *curr, + struct robust_list_head32 __user *head) { - struct robust_list_head32 __user *head =3D curr->compat_robust_list; struct robust_list __user *entry, *next_entry, *pending; unsigned int limit =3D ROBUST_LIST_LIMIT, pi, pip; unsigned int next_pi; @@ -943,6 +944,70 @@ static void exit_robust_list32(struct task_struct *cur= r) } } =20 +long do_set_robust_list2(struct robust_list_head __user *head, + int index, unsigned int type) +{ + struct list_head *list2 =3D ¤t->robust_list2; + struct robust_list2_entry *prev, *new =3D NULL; + + if (index =3D=3D -1) { + if (list_empty(list2)) { + index =3D 0; + } else { + prev =3D list_last_entry(list2, struct robust_list2_entry, list); + index =3D prev->index + 1; + } + + if (index >=3D ROBUST_LISTS_PER_TASK) + return -EINVAL; + + new =3D kmalloc(sizeof(struct robust_list2_entry), GFP_KERNEL); + if (!new) + return -ENOMEM; + + list_add_tail(&new->list, list2); + new->index =3D index; + + } else if (index >=3D 0) { + struct robust_list2_entry *curr; + + if (list_empty(list2)) + return -ENOENT; + + list_for_each_entry(curr, list2, list) { + if (index =3D=3D curr->index) { + new =3D curr; + break; + } + } + + if (!new) + return -ENOENT; + } + + BUG_ON(!new); + new->head =3D head; + new->list_type =3D type; + + return index; +} + +struct robust_list_head __user *get_robust_list2(int index, struct task_st= ruct *task) +{ + struct list_head *list2 =3D &task->robust_list2; + struct robust_list2_entry *curr; + + if (list_empty(list2) || index =3D=3D -1) + return NULL; + + list_for_each_entry(curr, list2, list) { + if (index =3D=3D curr->index) + return curr->head; + } + + return NULL; +} + #ifdef CONFIG_FUTEX_PI =20 /* @@ -1024,24 +1089,28 @@ static inline void exit_pi_state_list(struct task_s= truct *curr) { } =20 static void futex_cleanup(struct task_struct *tsk) { -#ifdef CONFIG_64BIT - if (unlikely(tsk->robust_list)) { - exit_robust_list64(tsk); - tsk->robust_list =3D NULL; - } -#else - if (unlikely(tsk->robust_list)) { - exit_robust_list32(tsk); - tsk->robust_list =3D NULL; - } -#endif + struct robust_list2_entry *curr, *n; + struct list_head *list2 =3D &tsk->robust_list2; =20 -#ifdef CONFIG_COMPAT - if (unlikely(tsk->compat_robust_list)) { - exit_robust_list32(tsk); - tsk->compat_robust_list =3D NULL; + /* + * Walk through the linked list, parsing robust lists and freeing the + * allocated lists + */ + if (unlikely(!list_empty(list2))) { + list_for_each_entry_safe(curr, n, list2, list) { + if (curr->head !=3D NULL) { + if (curr->list_type =3D=3D ROBUST_LIST_64BIT) + exit_robust_list64(tsk, curr->head); + else if (curr->list_type =3D=3D ROBUST_LIST_32BIT) + exit_robust_list32(tsk, curr->head); + curr->head =3D NULL; + } + list_del_init(&curr->list); + kfree(curr); + } } -#endif + + tsk->robust_list_index =3D -1; =20 if (unlikely(!list_empty(&tsk->pi_state_list))) exit_pi_state_list(tsk); diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 6b2f4c7eb720..b8c20deb5552 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -409,6 +409,11 @@ extern int __futex_wait(u32 __user *uaddr, unsigned in= t flags, u32 val, extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset); =20 +extern long do_set_robust_list2(struct robust_list_head __user *head, + int index, unsigned int type); + +extern struct robust_list_head __user *get_robust_list2(int index, struct = task_struct *task); + /** * struct futex_vector - Auxiliary struct for futex_waitv() * @w: Userspace provided data diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index dba193dfd216..56ee1123cbd8 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -20,6 +20,18 @@ * the list. There can only be one such pending lock. */ =20 +#ifdef CONFIG_64BIT +static inline int robust_list_native_type(void) +{ + return ROBUST_LIST_64BIT; +} +#else +static inline int robust_list_native_type(void) +{ + return ROBUST_LIST_32BIT; +} +#endif + /** * sys_set_robust_list() - Set the robust-futex list head of a task * @head: pointer to the list-head @@ -28,17 +40,63 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, size_t, len) { + unsigned int type =3D robust_list_native_type(); + int ret; + /* * The kernel knows only one size for now: */ if (unlikely(len !=3D sizeof(*head))) return -EINVAL; =20 - current->robust_list =3D head; + ret =3D do_set_robust_list2(head, current->robust_list_index, type); + if (ret < 0) + return ret; + + current->robust_list_index =3D ret; =20 return 0; } =20 +#define ROBUST_LIST_FLAGS ROBUST_LIST_TYPE_MASK + +/* + * sys_set_robust_list2() + * + * When index =3D=3D -1, create a new list for user. When index >=3D 0, tr= y to find + * the corresponding list and re-set the head there. + * + * Return values: + * >=3D 0: success, index of the robust list + * -EINVAL: invalid flags, invalid index + * -ENOENT: requested index no where to be found + * -ENOMEM: error allocating new list + * -ESRCH: too many allocated lists + */ +SYSCALL_DEFINE3(set_robust_list2, struct robust_list_head __user *, head, + int, index, unsigned int, flags) +{ + unsigned int type; + + type =3D flags & ROBUST_LIST_TYPE_MASK; + + if (index < -1 || index >=3D ROBUST_LISTS_PER_TASK) + return -EINVAL; + + if ((flags & ~ROBUST_LIST_FLAGS) !=3D 0) + return -EINVAL; + + if (((uintptr_t) head % sizeof(u32)) !=3D 0) + return -EINVAL; + +#ifndef CONFIG_64BIT + if (type =3D=3D ROBUST_LIST_64BIT) + return -EINVAL; +#endif + + return do_set_robust_list2(head, index, type); +} + /** * sys_get_robust_list() - Get the robust-futex list head of a task * @pid: pid of the process [zero for current task] @@ -52,6 +110,7 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, struct robust_list_head __user *head; unsigned long ret; struct task_struct *p; + int index; =20 rcu_read_lock(); =20 @@ -68,9 +127,11 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock; =20 - head =3D p->robust_list; + index =3D p->robust_list_index; rcu_read_unlock(); =20 + head =3D get_robust_list2(index, p); + if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(head, head_ptr); @@ -443,10 +504,19 @@ COMPAT_SYSCALL_DEFINE2(set_robust_list, struct robust_list_head32 __user *, head, compat_size_t, len) { + unsigned int type =3D ROBUST_LIST_32BIT; + int ret; + if (unlikely(len !=3D sizeof(*head))) return -EINVAL; =20 - current->compat_robust_list =3D head; + ret =3D do_set_robust_list2((struct robust_list_head __user *) head, + current->robust_list_index, type); + if (ret < 0) + return ret; + + current->robust_list_index =3D ret; + =20 return 0; } @@ -458,6 +528,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, struct robust_list_head32 __user *head; unsigned long ret; struct task_struct *p; + int index; =20 rcu_read_lock(); =20 @@ -474,9 +545,11 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock; =20 - head =3D p->compat_robust_list; + index =3D p->compat_robust_list_index; rcu_read_unlock(); =20 + head =3D (struct robust_list_head32 __user *) get_robust_list2(index, p); + if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(ptr_to_compat(head), head_ptr); --=20 2.48.1 From nobody Sun Feb 8 21:42:14 2026 Received: from fanzine2.igalia.com (fanzine.igalia.com [178.60.130.6]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C7A1E1A23BA; Tue, 25 Feb 2025 18:36:12 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=178.60.130.6 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740508574; cv=none; b=UdwnVmEZKaOmrSUmlN+YVqQWz26YtdHjfyfAFUR7i/e2XQ+ojQQxPEqwUz3C4KEUjpHHjbGJdX6rrmsx3QA8fx9Hk4EGoYUdBIMsLZNKoXndaIkf4jR+Cih4dYSUdCO68RsnQGojlwLN7r2U/UyBSFukCxNX8jjzWpfE5j7+oC4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740508574; c=relaxed/simple; bh=8ie6mdcEpoxrr+6qJXWh27A/NpVno+bkcZ+GTlfHJnk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=avJyJR3EwnBGnRkO3ZKwzOEE9kU97y0EbCbF2mhM3xEe6ndrnZ6x9d8NiMX2TrX5kIzFHTZsH4BPeDWMt6u9x+dZ/G6zVxhIEV/0iXCTBgkDNuu3HUmGJeyFAQMDorPivuJAVjCs7gGMBbi/km1TlKjQFx4xHZnasvRg0guf238= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=igalia.com; spf=pass smtp.mailfrom=igalia.com; dkim=pass (2048-bit key) header.d=igalia.com header.i=@igalia.com header.b=ErsqfNHt; arc=none smtp.client-ip=178.60.130.6 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=igalia.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=igalia.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=igalia.com header.i=@igalia.com header.b="ErsqfNHt" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=igalia.com; s=20170329; h=Content-Transfer-Encoding:Content-Type:MIME-Version:References: In-Reply-To:Message-ID:Date:Subject:Cc:To:From:Sender:Reply-To:Content-ID: Content-Description:Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc :Resent-Message-ID:List-Id:List-Help:List-Unsubscribe:List-Subscribe: List-Post:List-Owner:List-Archive; bh=pLrj0EitBwAWnlaub054mvi38qvRFG/38Y7ATxZkKl0=; b=ErsqfNHtW+Adyh/vurGdK9So4C mwpdv7uVOrWXpAbKnvTd+IcGHa5LW6RPmwOVdsdKccKubj/P8F3B1kU0RJ9ECFvRMpAFqRxbiAGm4 XXYRCSwsLNwudEUa1fkfj2MHjh9gPX59NG+zsEYiWrLtNon7aBl5Y1Kefqmvyj3L0Pvl0SddJTjWF aCNsQl2CKPM41ct4DwRSYN12y4fDDbLa76OpM4k+iJp/bH+/X3Mvh/fX+QgAptL1hwYkRTMq+2FjX wiHLzjXXRAKwd0cvirZLCdVcIg4FLgS+q6UMmKLmiOoVyr37SPS+0OItiEp5U85euXl+feGNr8hTw WS1IValw==; Received: from [191.204.194.148] (helo=localhost.localdomain) by fanzine2.igalia.com with esmtpsa (Cipher TLS1.3:ECDHE_X25519__RSA_PSS_RSAE_SHA256__AES_256_GCM:256) (Exim) id 1tmzmS-000WtH-Hc; Tue, 25 Feb 2025 19:35:50 +0100 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= To: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Darren Hart , Davidlohr Bueso , Arnd Bergmann , sonicadvance1@gmail.com Cc: linux-kernel@vger.kernel.org, kernel-dev@igalia.com, linux-api@vger.kernel.org, Vinicius Peixoto , Sebastian Andrzej Siewior , =?UTF-8?q?Andr=C3=A9=20Almeida?= Subject: [PATCH v4 3/5] futex: Wire up set_robust_list2 syscall Date: Tue, 25 Feb 2025 15:35:29 -0300 Message-ID: <20250225183531.682556-4-andrealmeid@igalia.com> X-Mailer: git-send-email 2.48.1 In-Reply-To: <20250225183531.682556-1-andrealmeid@igalia.com> References: <20250225183531.682556-1-andrealmeid@igalia.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Wire up the new set_robust_list2 syscall in all available architectures. Signed-off-by: Andr=C3=A9 Almeida --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + kernel/sys_ni.c | 1 + scripts/syscall.tbl | 1 + 17 files changed, 17 insertions(+) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/sys= calls/syscall.tbl index c59d53d6d3f3..d1193a7f948e 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -506,3 +506,4 @@ 574 common getxattrat sys_getxattrat 575 common listxattrat sys_listxattrat 576 common removexattrat sys_removexattrat +577 common set_robust_list2 sys_robust_list2 diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 49eeb2ad8dbd..269721f54a5c 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -481,3 +481,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/sysca= lls/syscall.tbl index f5ed71f1910d..75a387585b3a 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -466,3 +466,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/= kernel/syscalls/syscall.tbl index 680f568b77f2..176f84b79c1c 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -472,3 +472,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/s= yscalls/syscall_n32.tbl index 0b9b7e25b69a..47e28d67ca8a 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -405,3 +405,4 @@ 464 n32 getxattrat sys_getxattrat 465 n32 listxattrat sys_listxattrat 466 n32 removexattrat sys_removexattrat +467 n32 set_robust_list2 sys_set_robust_list2 diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/s= yscalls/syscall_n64.tbl index c844cd5cda62..488c1bca7715 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -381,3 +381,4 @@ 464 n64 getxattrat sys_getxattrat 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat +467 n64 set_robust_list2 sys_set_robust_list2 diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/s= yscalls/syscall_o32.tbl index 349b8aad1159..f983086695a8 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -454,3 +454,4 @@ 464 o32 getxattrat sys_getxattrat 465 o32 listxattrat sys_listxattrat 466 o32 removexattrat sys_removexattrat +467 o32 set_robust_list2 sys_set_robust_list2 diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/s= yscalls/syscall.tbl index d9fc94c86965..f8735cb8046b 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -465,3 +465,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel= /syscalls/syscall.tbl index d8b4ab78bef0..1da55a6a3bb5 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -557,3 +557,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/sysca= lls/syscall.tbl index e9115b4d8b63..93bda0d6580b 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common set_robust_list2 sys_set_robust_list2 sys_set_robust_list2 diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/= syscall.tbl index c8cad33bf250..dd591da98af5 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -470,3 +470,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/sys= calls/syscall.tbl index 727f99d333b3..a4ee76e234a3 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -512,3 +512,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common set_robust_list2 sys_set_robust_list2 diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscal= ls/syscall_32.tbl index 4d0fb2fba7e2..8d609abda75b 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -472,3 +472,4 @@ 464 i386 getxattrat sys_getxattrat 465 i386 listxattrat sys_listxattrat 466 i386 removexattrat sys_removexattrat +467 i386 set_robust_list2 sys_set_robust_list2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscal= ls/syscall_64.tbl index 5eb708bff1c7..2c6461df154b 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -390,6 +390,7 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common set_robust_list2 sys_set_robust_list2 =20 # # Due to a historical design error, certain syscalls are numbered differen= tly diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/s= yscalls/syscall.tbl index 37effc1b134e..fa46635d7380 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -437,3 +437,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common set_robust_list2 sys_set_robust_list2 diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index c00a86931f8c..71fbac6176c8 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -195,6 +195,7 @@ COND_SYSCALL(move_pages); COND_SYSCALL(set_mempolicy_home_node); COND_SYSCALL(cachestat); COND_SYSCALL(mseal); +COND_SYSCALL(set_robust_list2); =20 COND_SYSCALL(perf_event_open); COND_SYSCALL(accept4); diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl index ebbdb3c42e9f..615a3043c982 100644 --- a/scripts/syscall.tbl +++ b/scripts/syscall.tbl @@ -407,3 +407,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common set_robust_list2 sys_set_robust_list2 --=20 2.48.1 From nobody Sun Feb 8 21:42:14 2026 Received: from fanzine2.igalia.com (fanzine.igalia.com [178.60.130.6]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7B49118E34A; Tue, 25 Feb 2025 18:36:10 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=178.60.130.6 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740508572; cv=none; b=mTlBfRhzXrYol7kUvdfKIyDGZ0sGHl0doYyZUrEc/tlCpaWzMk+ArYYetxTCsAXLrdIuzmF/0F2Nt8E0heWc1gMA0+RKQ3thkq6eYlAXCzEEHpSOce8krUn/lyyR4EriBo0gDhnzjIuxhrnnMRsfCddgae7sC7fZ8Y4EAbjhFX4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740508572; c=relaxed/simple; bh=FBm4oUcbXFtTEDZ7FDmVHPI2qkkxttQ4wonvfbj3Ujk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=WO4wFbxCmlmVG1HHUBdmGnc3qjAQhrZ4kZTKG8hm/aWX5jjyqq+UT1fKD57b82VATWEkVJQIqlY1ihcjhiSu8lrDMmwSF1V0TkM1eGbPU/+TwFltVPxZ16eJ4JXy7edFIRTeZDtk1WqM1drMzGbPK4qR/5SChT2/5bp22GaFQww= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=igalia.com; spf=pass smtp.mailfrom=igalia.com; dkim=pass (2048-bit key) header.d=igalia.com header.i=@igalia.com header.b=pcefXTgq; arc=none smtp.client-ip=178.60.130.6 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=igalia.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=igalia.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=igalia.com header.i=@igalia.com header.b="pcefXTgq" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=igalia.com; s=20170329; h=Content-Transfer-Encoding:Content-Type:MIME-Version:References: In-Reply-To:Message-ID:Date:Subject:Cc:To:From:Sender:Reply-To:Content-ID: Content-Description:Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc :Resent-Message-ID:List-Id:List-Help:List-Unsubscribe:List-Subscribe: List-Post:List-Owner:List-Archive; bh=sft+T4Fu+5ncTC4aZbVc33XxpCTJMgxMtgqsT/dZoSI=; b=pcefXTgqcJecgqrtf3kiYdmEjv 0hDns2PRtXTxOs5U7Mho/c/b58VYp7GSWDz4hVMXfUkpQTqnhG/iNh2pcPTE12V8uPMrxJbj+ISNO PQPabcnjKbPTsWQ/k5phEiClW2r2+HNqTapX+Akfz6vBuSksLckiVW7OOTMoVqPxlkGiUtjWwtg0o dg7TECgMm7lo0mzDDWw4+FffDnYjhMu5AbB2hMDiVeSyWwH/B2QAvNry2adz2dQQmuFVTHTPfJZ/X knrbgSaIeJMRNkEunPumciel9nFGjJaYyu7wsgIgiRRZXyP2awJ+xXyGd22CsScXIMVMRPI3SA9UW 9kah/TxQ==; Received: from [191.204.194.148] (helo=localhost.localdomain) by fanzine2.igalia.com with esmtpsa (Cipher TLS1.3:ECDHE_X25519__RSA_PSS_RSAE_SHA256__AES_256_GCM:256) (Exim) id 1tmzmW-000WtH-7o; Tue, 25 Feb 2025 19:35:54 +0100 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= To: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Darren Hart , Davidlohr Bueso , Arnd Bergmann , sonicadvance1@gmail.com Cc: linux-kernel@vger.kernel.org, kernel-dev@igalia.com, linux-api@vger.kernel.org, Vinicius Peixoto , Sebastian Andrzej Siewior , =?UTF-8?q?Andr=C3=A9=20Almeida?= Subject: [PATCH v4 4/5] futex: Remove the limit of elements for sys_set_robust_list2 lists Date: Tue, 25 Feb 2025 15:35:30 -0300 Message-ID: <20250225183531.682556-5-andrealmeid@igalia.com> X-Mailer: git-send-email 2.48.1 In-Reply-To: <20250225183531.682556-1-andrealmeid@igalia.com> References: <20250225183531.682556-1-andrealmeid@igalia.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Remove the limit of ROBUST_LIST_LIMIT elements that a robust list can have, for the ones created with the new interface. This is done by overwritten the list as it's proceeded in a way that we avoid circular lists. For the old interface, we keep the limited behavior to avoid changing the API. Signed-off-by: Andr=C3=A9 Almeida --- kernel/futex/core.c | 50 ++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 07a7e5e9bc8d..bfd4443208ea 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -777,7 +777,8 @@ static inline int fetch_robust_entry(struct robust_list= __user **entry, * We silently return on any sign of list-walking problem. */ static void exit_robust_list64(struct task_struct *curr, - struct robust_list_head __user *head) + struct robust_list_head __user *head, + bool destroyable) { struct robust_list __user *entry, *next_entry, *pending; unsigned int limit =3D ROBUST_LIST_LIMIT, pi, pip; @@ -821,13 +822,17 @@ static void exit_robust_list64(struct task_struct *cu= rr, } if (rc) return; - entry =3D next_entry; - pi =3D next_pi; + /* * Avoid excessively long or circular lists: */ - if (!--limit) + if (!destroyable && !--limit) break; + else + put_user(&head->list, &entry->next); + + entry =3D next_entry; + pi =3D next_pi; =20 cond_resched(); } @@ -839,7 +844,8 @@ static void exit_robust_list64(struct task_struct *curr, } #else static void exit_robust_list64(struct task_struct *curr, - struct robust_list_head __user *head) + struct robust_list_head __user *head, + bool destroyable) { pr_warn("32bit kernel should not allow ROBUST_LIST_64BIT"); } @@ -877,7 +883,8 @@ fetch_robust_entry32(u32 *uentry, struct robust_list __= user **entry, * We silently return on any sign of list-walking problem. */ static void exit_robust_list32(struct task_struct *curr, - struct robust_list_head32 __user *head) + struct robust_list_head32 __user *head, + bool destroyable) { struct robust_list __user *entry, *next_entry, *pending; unsigned int limit =3D ROBUST_LIST_LIMIT, pi, pip; @@ -926,14 +933,17 @@ static void exit_robust_list32(struct task_struct *cu= rr, } if (rc) return; - uentry =3D next_uentry; - entry =3D next_entry; - pi =3D next_pi; /* * Avoid excessively long or circular lists: */ - if (!--limit) + if (!destroyable && !--limit) break; + else + put_user((struct robust_list __user *) &head->list, &entry->next); + + uentry =3D next_uentry; + entry =3D next_entry; + pi =3D next_pi; =20 cond_resched(); } @@ -1087,26 +1097,38 @@ static void exit_pi_state_list(struct task_struct *= curr) static inline void exit_pi_state_list(struct task_struct *curr) { } #endif =20 +/* + * futex_cleanup - After the task exists, process the robust lists + * + * Walk through the linked list, parsing robust lists and freeing the + * allocated lists. Lists created with the set_robust_list2 don't have a l= imit + * for sizing and can be destroyed while we walk on it to avoid circular l= ist. + */ static void futex_cleanup(struct task_struct *tsk) { struct robust_list2_entry *curr, *n; struct list_head *list2 =3D &tsk->robust_list2; + bool destroyable =3D true; + int i =3D 0; =20 /* - * Walk through the linked list, parsing robust lists and freeing the - * allocated lists */ if (unlikely(!list_empty(list2))) { list_for_each_entry_safe(curr, n, list2, list) { + destroyable =3D true; + if (tsk->robust_list_index =3D=3D i) + destroyable =3D false; + if (curr->head !=3D NULL) { if (curr->list_type =3D=3D ROBUST_LIST_64BIT) - exit_robust_list64(tsk, curr->head); + exit_robust_list64(tsk, curr->head, destroyable); else if (curr->list_type =3D=3D ROBUST_LIST_32BIT) - exit_robust_list32(tsk, curr->head); + exit_robust_list32(tsk, curr->head, destroyable); curr->head =3D NULL; } list_del_init(&curr->list); kfree(curr); + i++; } } =20 --=20 2.48.1 From nobody Sun Feb 8 21:42:14 2026 Received: from fanzine2.igalia.com (fanzine.igalia.com [178.60.130.6]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CEE0F189B91; Tue, 25 Feb 2025 18:36:09 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=178.60.130.6 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740508573; cv=none; b=KstE8sm4D+UvDJIMUWoe8VEsH4TX/8/AOEUJ6MoGwUiEDB2ywgepy4Co8pIBg8YpPhrw5hO9Mpt9JtbzwoXSKv4seKZmvYkX3alPxM4X7GJ+QFQAgVb1BOB0b9jGqm5kgPT/dsmBzG30m2M7VFi/HNDljD/zVeKsxmGuz0PrvZo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740508573; c=relaxed/simple; bh=Kr1Aoxz1jc0w/MFdevMCJFBQi8H+SUq0DzMJxpDwQgY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=Hj1cf0NLtRU9+pt8S41Z/23OFHXCgAZ2duspkeH22LRJ6Stgta+kemhI9ffuhksTE0ArgZhyGN5iKgsGZIV6yEhO0eHqO5NpYRikc4a9UsyKhCwfPROsZR6e+VVxM1SrZSS374zuJu6/ct+/Hj5feqBOODwFXSos3uleQpOxZuM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=igalia.com; spf=pass smtp.mailfrom=igalia.com; dkim=pass (2048-bit key) header.d=igalia.com header.i=@igalia.com header.b=aZe0CTf1; arc=none smtp.client-ip=178.60.130.6 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=igalia.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=igalia.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=igalia.com header.i=@igalia.com header.b="aZe0CTf1" DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=igalia.com; s=20170329; h=Content-Transfer-Encoding:Content-Type:MIME-Version:References: In-Reply-To:Message-ID:Date:Subject:Cc:To:From:Sender:Reply-To:Content-ID: Content-Description:Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc :Resent-Message-ID:List-Id:List-Help:List-Unsubscribe:List-Subscribe: List-Post:List-Owner:List-Archive; bh=W4MmNdoMd9oJsfw/bLHr7MLfSZfrHsMdh/doUblxfYo=; b=aZe0CTf1bg3ejYPY6qskfHb8PF SxgHtFT4CvPcRW8RtxeFZMpCtvlkPv0JrjuXYZEf8XYT8oMAxbIe1zmcANZHcrN5AvDkJUATVF0R2 Rt0/p2Sd0F/y8qdW5BeT5fZvxaVQ4XvnHk5kq1q+9iPIIV57yzjDp08oW/CneFnp6PxS/M0YoV/S/ Cq6bKhANLw4AjN+FAaPppJ87s9qVTHTUsxuVLsg4ginzQZC1ZyMxm/wnHdQ3tvjLZp7JqRd5UqSE1 A9EmbHEjaR79nNbjX8dM1Lb18aYk8DlBlf6cE3mW0tn6NYTnpBXH4PJm2VfxxUHsqPwqaV/KHMnOh GvDN5twQ==; Received: from [191.204.194.148] (helo=localhost.localdomain) by fanzine2.igalia.com with esmtpsa (Cipher TLS1.3:ECDHE_X25519__RSA_PSS_RSAE_SHA256__AES_256_GCM:256) (Exim) id 1tmzmZ-000WtH-UG; Tue, 25 Feb 2025 19:35:58 +0100 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= To: Thomas Gleixner , Ingo Molnar , Peter Zijlstra , Darren Hart , Davidlohr Bueso , Arnd Bergmann , sonicadvance1@gmail.com Cc: linux-kernel@vger.kernel.org, kernel-dev@igalia.com, linux-api@vger.kernel.org, Vinicius Peixoto , Sebastian Andrzej Siewior , =?UTF-8?q?Andr=C3=A9=20Almeida?= Subject: [PATCH v4 5/5] selftests: futex: Expand robust list test for the new interface Date: Tue, 25 Feb 2025 15:35:31 -0300 Message-ID: <20250225183531.682556-6-andrealmeid@igalia.com> X-Mailer: git-send-email 2.48.1 In-Reply-To: <20250225183531.682556-1-andrealmeid@igalia.com> References: <20250225183531.682556-1-andrealmeid@igalia.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Expand the current robust list test for the new set_robust_list2 syscall. Create an option to make it possible to run the same tests using the new syscall, and also add two new relevant test: test long lists (bigger than ROBUST_LIST_LIMIT) and for unaligned addresses. Signed-off-by: Andr=C3=A9 Almeida --- .../selftests/futex/functional/robust_list.c | 160 +++++++++++++++++- 1 file changed, 156 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/futex/functional/robust_list.c b/tools= /testing/selftests/futex/functional/robust_list.c index 42690b2440fd..201acbeeac5a 100644 --- a/tools/testing/selftests/futex/functional/robust_list.c +++ b/tools/testing/selftests/futex/functional/robust_list.c @@ -35,16 +35,45 @@ #include #include #include +#include =20 #define STACK_SIZE (1024 * 1024) =20 #define FUTEX_TIMEOUT 3 =20 +#define SYS_set_robust_list2 467 + +enum robust_list2_type { + ROBUST_LIST_32BIT, + ROBUST_LIST_64BIT, +}; + static pthread_barrier_t barrier, barrier2; =20 +bool robust2 =3D false; + int set_robust_list(struct robust_list_head *head, size_t len) { - return syscall(SYS_set_robust_list, head, len); + int ret, flags; + + if (!robust2) { + return syscall(SYS_set_robust_list, head, len); + } + + if (sizeof(head) =3D=3D 8) + flags =3D ROBUST_LIST_64BIT; + else + flags =3D ROBUST_LIST_32BIT; + + /* + * We act as we have just one list here. We try to use the first slot, + * but if it hasn't been alocated yet we allocate it. + */ + ret =3D syscall(SYS_set_robust_list2, head, 0, flags); + if (ret =3D=3D -1 && errno =3D=3D ENOENT) + ret =3D syscall(SYS_set_robust_list2, head, -1, flags); + + return ret; } =20 int get_robust_list(int pid, struct robust_list_head **head, size_t *len_p= tr) @@ -246,6 +275,11 @@ static void test_set_robust_list_invalid_size(void) size_t head_size =3D sizeof(struct robust_list_head); int ret; =20 + if (robust2) { + ksft_test_result_skip("This test is only for old robust interface\n"); + return; + } + ret =3D set_robust_list(&head, head_size); ASSERT_EQ(ret, 0); =20 @@ -321,6 +355,11 @@ static void test_get_robust_list_child(void) struct robust_list_head head, *get_head; size_t len_ptr; =20 + if (robust2) { + ksft_test_result_skip("Not implemented in the new robust interface\n"); + return; + } + ret =3D pthread_barrier_init(&barrier, NULL, 2); ret =3D pthread_barrier_init(&barrier2, NULL, 2); ASSERT_EQ(ret, 0); @@ -332,7 +371,7 @@ static void test_get_robust_list_child(void) =20 ret =3D get_robust_list(tid, &get_head, &len_ptr); ASSERT_EQ(ret, 0); - ASSERT_EQ(&head, get_head); + ASSERT_EQ(get_head, &head); =20 pthread_barrier_wait(&barrier2); =20 @@ -507,11 +546,119 @@ static void test_circular_list(void) ksft_test_result_pass("%s\n", __func__); } =20 +#define ROBUST_LIST_LIMIT 2048 +#define CHILD_LIST_LIMIT (ROBUST_LIST_LIMIT + 10) + +static int child_robust_list_limit(void *arg) +{ + struct lock_struct *locks; + struct robust_list *list; + struct robust_list_head head; + int ret, i; + + locks =3D (struct lock_struct *) arg; + + ret =3D set_list(&head); + if (ret) + ksft_test_result_fail("set_list error\n"); + + /* + * Create a very long list of locks + */ + head.list.next =3D &locks[0].list; + + list =3D head.list.next; + for (i =3D 0; i < CHILD_LIST_LIMIT - 1; i++) { + list->next =3D &locks[i+1].list; + list =3D list->next; + } + list->next =3D &head.list; + + /* + * Grab the lock in the last one, and die without releasing it + */ + mutex_lock(&locks[CHILD_LIST_LIMIT], &head, false); + pthread_barrier_wait(&barrier); + + sleep(1); + + return 0; +} + +/* + * The old robust list used to have a limit of 2048 items from the kernel = side. + * After this limit the kernel stops walking the list and ignore the other + * futexes, causing deadlocks. + * + * For the new interface, test if we can wait for a list of more than 2048 + * elements. + */ +static void test_robust_list_limit(void) +{ + struct lock_struct locks[CHILD_LIST_LIMIT + 1]; + _Atomic(unsigned int) *futex =3D &locks[CHILD_LIST_LIMIT].futex; + struct robust_list_head head; + int ret; + + if (!robust2) { + ksft_test_result_skip("This test is only for new robust interface\n"); + return; + } + + *futex =3D 0; + + ret =3D set_list(&head); + ASSERT_EQ(ret, 0); + + ret =3D pthread_barrier_init(&barrier, NULL, 2); + ASSERT_EQ(ret, 0); + + create_child(child_robust_list_limit, locks); + + /* + * After the child thread creates the very long list of locks, wait on + * the last one. + */ + pthread_barrier_wait(&barrier); + ret =3D mutex_lock(&locks[CHILD_LIST_LIMIT], &head, false); + + if (ret !=3D 0) + printf("futex wait returned %d\n", errno); + ASSERT_EQ(ret, 0); + + ASSERT_TRUE(*futex | FUTEX_OWNER_DIED); + + wait(NULL); + pthread_barrier_destroy(&barrier); + + ksft_test_result_pass("%s\n", __func__); +} + +/* + * The kernel should refuse an unaligned head pointer + */ +static void test_unaligned_address(void) +{ + struct robust_list_head head, *h; + int ret; + + if (!robust2) { + ksft_test_result_skip("This test is only for new robust interface\n"); + return; + } + + h =3D (struct robust_list_head *) ((uintptr_t) &head + 1); + ret =3D set_list(h); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); +} + void usage(char *prog) { printf("Usage: %s\n", prog); printf(" -c Use color\n"); printf(" -h Display this help message\n"); + printf(" -n Use robust2 syscall\n"); printf(" -v L Verbosity level: %d=3DQUIET %d=3DCRITICAL %d=3DINFO\n", VQUIET, VCRITICAL, VINFO); } @@ -520,7 +667,7 @@ int main(int argc, char *argv[]) { int c; =20 - while ((c =3D getopt(argc, argv, "cht:v:")) !=3D -1) { + while ((c =3D getopt(argc, argv, "chnt:v:")) !=3D -1) { switch (c) { case 'c': log_color(1); @@ -531,6 +678,9 @@ int main(int argc, char *argv[]) case 'v': log_verbosity(atoi(optarg)); break; + case 'n': + robust2 =3D true; + break; default: usage(basename(argv[0])); exit(1); @@ -538,7 +688,7 @@ int main(int argc, char *argv[]) } =20 ksft_print_header(); - ksft_set_plan(7); + ksft_set_plan(8); =20 test_robustness(); =20 @@ -548,6 +698,8 @@ int main(int argc, char *argv[]) test_set_list_op_pending(); test_robust_list_multiple_elements(); test_circular_list(); + test_robust_list_limit(); + test_unaligned_address(); =20 ksft_print_cnts(); return 0; --=20 2.48.1