From nobody Wed Dec 17 18:00:05 2025 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 74D141991AE for ; Tue, 25 Feb 2025 17:09:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=193.142.43.55 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503370; cv=none; b=qFRNAZVUPjhIf1UBeEWClmPoxXIc6fMbzTPyWtGjGae+pvARSP4Y0g3CNgbhDVWTUxA/5nub1Cp2dpjJlD8FOJSpoSsCZ1ND+i5jMcIwcxS57Ce7D7uG00+5YgsDM34da29cCBZHjMmu3g70KoQwJtQoj8/IepKFZpxV1yggWDA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503370; c=relaxed/simple; bh=1UFPStimk7GCMSpObszKn0RYDzCLZgqRppp75U5nMoY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=eXFsyAmnpqFcj7wQMlVHOlFMIhynwSBwOkleISpOObYapdoknnmU7VErpXE2yITspPe8ZWyzGOZxM6d+UhFfGFKabW2C27sofAwM3nbri0IaEpbCRWoS8nLz2t5OYtev+kMoTSkPwDTvNbbZJoNe2dOlKHCS3TbNa/2VKV4c1B4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de; spf=pass smtp.mailfrom=linutronix.de; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=RXJUfcGn; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=ROVoj/wr; arc=none smtp.client-ip=193.142.43.55 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linutronix.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="RXJUfcGn"; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="ROVoj/wr" From: Sebastian Andrzej Siewior DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1740503360; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=E6Q5cz+NF4t9mG06Fvky1LDdpbRbeDpEwN6yr1SC6h0=; b=RXJUfcGniJLr4k2j2Q18xfYYZYQFusS0Za5OWSLlvpKKdQKIxSDKt2ltWBv8a8es2FSVbG CZEWefyXrvUbnRLP2whf8MDprvJsHlLQ0YGxI/06KmuwAs8I3SZJ00zGtWUMB/EFkn1dY2 fjbY6vu6uvUi/9lN/VHcoS4NAY4byltGigfcuETDPOQ7gQXyLSjppxdXXRIRKoFyNBEwWm Fzzrl5TRwlehEIDgj/c1ATEcxPzglyEX0fCB3FvK3iIFeBTQ/+VpQc2vBqHYDG8AU7cVAA Y8pCKIr8ja4sp7EGdhWigu1Hg6XYIiKzePDiI0aon8kadThO5n1qhTp0/HPfgw== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1740503360; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=E6Q5cz+NF4t9mG06Fvky1LDdpbRbeDpEwN6yr1SC6h0=; b=ROVoj/wrSayRMjK5OpYPG5X7uErMr5mDtLouFpfs16nj7az3dBXPZePy1TcNTS8CowNM6b n551CJR+Bx4owsDQ== To: linux-kernel@vger.kernel.org Cc: =?UTF-8?q?Andr=C3=A9=20Almeida?= , Darren Hart , Davidlohr Bueso , Ingo Molnar , Juri Lelli , Peter Zijlstra , Thomas Gleixner , Valentin Schneider , Waiman Long , Sebastian Andrzej Siewior Subject: [PATCH v9 01/11] futex: fixup futex_wait_setup [fold futex: Move futex_queue() into futex_wait_setup()] Date: Tue, 25 Feb 2025 18:09:04 +0100 Message-ID: <20250225170914.289358-2-bigeasy@linutronix.de> In-Reply-To: <20250225170914.289358-1-bigeasy@linutronix.de> References: <20250225170914.289358-1-bigeasy@linutronix.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" we could also make @task a bool signaling it is either NULL or current. Signed-off-by: Sebastian Andrzej Siewior --- kernel/futex/waitwake.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index 7655de59ab3d6..44034dee7a48c 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -571,7 +571,8 @@ int futex_wait_multiple(struct futex_vector *vs, unsign= ed int count, * @val: the expected value * @flags: futex flags (FLAGS_SHARED, etc.) * @q: the associated futex_q - * @hb: storage for hash_bucket pointer to be returned to caller + * @key2: the second futex_key if used for requeue PI + * task: Task queueing this futex * * Setup the futex_q and locate the hash_bucket. Get the futex value and * compare it with the expected value. Handle atomic faults internally. @@ -634,7 +635,7 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsign= ed int flags, =20 if (uval !=3D val) { futex_q_unlock(hb); - ret =3D -EWOULDBLOCK; + return -EWOULDBLOCK; } =20 if (key2 && futex_match(&q->key, key2)) { @@ -648,8 +649,9 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsign= ed int flags, * futex_queue() calls spin_unlock() upon completion, both serializing * access to the hash list and forcing another memory barrier. */ - set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); - futex_queue(q, hb, current); + if (task =3D=3D current) + set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); + futex_queue(q, hb, task); } =20 return ret; --=20 2.47.2 From nobody Wed Dec 17 18:00:05 2025 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 74CE11990D9 for ; Tue, 25 Feb 2025 17:09:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=193.142.43.55 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503371; cv=none; b=YtmEWBWVer2yDuYQrsgRFoju+r+oPSdlyh8Cj9AJnJXht15HsA3BNvwevJDO/9v1tcS4rLpBNorYnnURxaZHD1zjiKjRopAy5+8C8yItVfUvXVkYycWPD8qWZPpk9z/Cth/AzDCVBRvdjuh3P2h2zgSyV2Bn6NEiEaMNQ73vF8I= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503371; c=relaxed/simple; bh=9+u0fxT/kKUJdd+zdmmGXakO8qf81Dzlwd7I/W/yn4M=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=cyD/Au5QixiIWp0l+jYyxS5XA21/dtpd651qR4KPElBj8RoejRE7d/pb+Y2+khENgEtZw/08xvu5/bo15WZEhynLSd3ug6zkM67oaI3KDjwbkX2SMtjso46Nrgqk3u1fDhFgmTcNl13jSlpuXzC84N0VzM8HZoux7C9+s+zUnWg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de; spf=pass smtp.mailfrom=linutronix.de; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=bgE4klGO; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=zbX3Os89; arc=none smtp.client-ip=193.142.43.55 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linutronix.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="bgE4klGO"; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="zbX3Os89" From: Sebastian Andrzej Siewior DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1740503360; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=ZIUFWfMZivwhlZ0/mkumDKTy431rCC/YdcJfJLYMkgs=; b=bgE4klGOrsqTdb5ztP5v0EdsDZpDOzegHzttyk/X4AlBC7PoJJh2PxgiPKOaMtdRN1fAp8 7XtTNXOnB4+8x/XIaPLzOVnOSUBi8XC7/8uWWlE1RJN/dYJ8T9Uc/I/nAvjXn1ByMLyXWK iih7z9vsI++LLPXO5Vfzmn4Iw6a2s6qC0XIq6Auif3mvlzdyvY2mF9gb4DYSSFq/+uFKV0 5A0+i9F16qdZWw4w23Ylyhu4Fk7XRjQqJk8lMWiJukuwQfTE2zk9649/kNdxXjTShkc1Ff Qj3O+ZgkkcymIOV2AdeQB54GTWDdRQlyGb2CvFLa0oE/m8t9b7BA6IYOe9mHWw== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1740503360; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=ZIUFWfMZivwhlZ0/mkumDKTy431rCC/YdcJfJLYMkgs=; b=zbX3Os89pkk8x/dg5+jnRRq4RQS+PWNidSDO/WO2G3FCgDeplycGEDwsyx5N5F/Sb9kurS jrf5Rxd0UfY3GLAg== To: linux-kernel@vger.kernel.org Cc: =?UTF-8?q?Andr=C3=A9=20Almeida?= , Darren Hart , Davidlohr Bueso , Ingo Molnar , Juri Lelli , Peter Zijlstra , Thomas Gleixner , Valentin Schneider , Waiman Long , Sebastian Andrzej Siewior Subject: [PATCH v9 02/11] futex: Create helper function to initialize a hash slot. Date: Tue, 25 Feb 2025 18:09:05 +0100 Message-ID: <20250225170914.289358-3-bigeasy@linutronix.de> In-Reply-To: <20250225170914.289358-1-bigeasy@linutronix.de> References: <20250225170914.289358-1-bigeasy@linutronix.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Factor out the futex_hash_bucket initialisation into a helpr function. The helper function will be used in a follow up patch implementing process private hash buckets. Signed-off-by: Sebastian Andrzej Siewior --- kernel/futex/core.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index f72f4561eb94e..69424994e7d9e 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -1122,6 +1122,13 @@ void futex_exit_release(struct task_struct *tsk) futex_cleanup_end(tsk, FUTEX_STATE_DEAD); } =20 +static void futex_hash_bucket_init(struct futex_hash_bucket *fhb) +{ + atomic_set(&fhb->waiters, 0); + plist_head_init(&fhb->chain); + spin_lock_init(&fhb->lock); +} + static int __init futex_init(void) { unsigned int futex_shift; @@ -1139,11 +1146,8 @@ static int __init futex_init(void) futex_hashsize, futex_hashsize); futex_hashsize =3D 1UL << futex_shift; =20 - for (i =3D 0; i < futex_hashsize; i++) { - atomic_set(&futex_queues[i].waiters, 0); - plist_head_init(&futex_queues[i].chain); - spin_lock_init(&futex_queues[i].lock); - } + for (i =3D 0; i < futex_hashsize; i++) + futex_hash_bucket_init(&futex_queues[i]); =20 return 0; } --=20 2.47.2 From nobody Wed Dec 17 18:00:05 2025 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 74D4C1991D2 for ; Tue, 25 Feb 2025 17:09:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=193.142.43.55 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503370; cv=none; b=GdILuZSpPYhWO/fyHEPvAqu8wWDJroHxhuMFRcwibcaiDmQR3jGiyXEnw0gv6Wu76v/FTvfbGfzlfRZs8QlTC3T8s66orrsghlastPl5ECIL2hWGwJGNcpu2XRgy9RxTwg3hnmwwVXqn5l3qgZwXQVu2kutWKi0wRKDPUkkdoN8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503370; c=relaxed/simple; bh=3bhlo7w2GGaMJljRY+b9XYy6dk16hOCUVDH0fQC3uG4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=PGyB+dm2bbQwvDW+FiWlnh2s18JWaiyLDsemIa5BjSrPA/rVc7QYMZH8YI46RZwTWYC5u0x+1Mx0XWHGky2QT3bFEXA1S+bL258zyV+uFqCquRqzh2VmNStttJhLtpyPXpcb7LigvDHtlth0uY3PF8KicwttM3LHTgJpvZA/ijo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de; spf=pass smtp.mailfrom=linutronix.de; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=iTWj0Rfa; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=Ck5QpbF7; arc=none smtp.client-ip=193.142.43.55 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linutronix.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="iTWj0Rfa"; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="Ck5QpbF7" From: Sebastian Andrzej Siewior DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1740503361; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=ivcEPiYDw/xauTN5+gctjpftfG2qwnWb+OnXfwr53vg=; b=iTWj0Rfaq257nPcZXwes/QSLi3F0VutFC8zPoKhLVgzKAF5lff3RhMDAU+4jrDtgj+epFg KNez42YkLjkZF0X8c7zV78tORU0AhDg7iVyd0sT9aFUZD+QUdivW3sIiMYFCh4Chyax3tG a0AlGS7tS7hgu2fwybSJwS2egKu+JC7L1UFdIwyh7EKNKq+nCv7kxVmg+KiXlV9ZCMD76w OF/nlUfEDxTXAA7WQbPewY1K3PLjeX57rFMhPOMdF6eFfdLFn0JVrWtMFGPcOLDpH6UZ5b vkJiERngJL1n7VFhT6ge4WNkWMO0S/KkokGhV8+hTYkBfd+YPlO6DTaSZmcCQA== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1740503361; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=ivcEPiYDw/xauTN5+gctjpftfG2qwnWb+OnXfwr53vg=; b=Ck5QpbF7HnTuWiBAsyah/Co3Qj8BvSYCBnutjwJC1F4PBJul0kjS9bCjAgN/YINsgDRIiH Wk3IhgUCz8uZEeCw== To: linux-kernel@vger.kernel.org Cc: =?UTF-8?q?Andr=C3=A9=20Almeida?= , Darren Hart , Davidlohr Bueso , Ingo Molnar , Juri Lelli , Peter Zijlstra , Thomas Gleixner , Valentin Schneider , Waiman Long , Sebastian Andrzej Siewior Subject: [PATCH v9 03/11] futex: Add basic infrastructure for local task local hash. Date: Tue, 25 Feb 2025 18:09:06 +0100 Message-ID: <20250225170914.289358-4-bigeasy@linutronix.de> In-Reply-To: <20250225170914.289358-1-bigeasy@linutronix.de> References: <20250225170914.289358-1-bigeasy@linutronix.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The futex hashmap is system wide and shared by random tasks. Each slot is hashed based on its address and VMA. Due to randomized VMAs (and memory allocations) the same logical lock (pointer) can end up in a different hash bucket on each invocation of the application. This in turn means that different applications may share a hash bucket on the first invocation but not on the second an it is not always clear which applications will be involved. This can result in high latency's to acquire the futex_hash_bucket::lock especially if the lock owner is limited to a CPU and not be effectively PI boosted. Introduce a task local hash map. The hashmap can be allocated via prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, 0) The `0' argument allocates a default number of 16 slots, a higher number can be specified if desired. The current upper limit is 131072. The allocated hashmap is used by all threads within a process. A thread can check if the private map has been allocated via prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); Which return the current number of slots. Signed-off-by: Sebastian Andrzej Siewior --- include/linux/futex.h | 20 ++++++++ include/linux/mm_types.h | 6 ++- include/uapi/linux/prctl.h | 5 ++ kernel/fork.c | 2 + kernel/futex/core.c | 101 +++++++++++++++++++++++++++++++++++-- kernel/sys.c | 4 ++ 6 files changed, 133 insertions(+), 5 deletions(-) diff --git a/include/linux/futex.h b/include/linux/futex.h index b70df27d7e85c..943828db52234 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -77,6 +77,15 @@ void futex_exec_release(struct task_struct *tsk); =20 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); +int futex_hash_prctl(unsigned long arg2, unsigned long arg3); +int futex_hash_allocate_default(void); +void futex_hash_free(struct mm_struct *mm); + +static inline void futex_mm_init(struct mm_struct *mm) +{ + mm->futex_hash_bucket =3D NULL; +} + #else static inline void futex_init_task(struct task_struct *tsk) { } static inline void futex_exit_recursive(struct task_struct *tsk) { } @@ -88,6 +97,17 @@ static inline long do_futex(u32 __user *uaddr, int op, u= 32 val, { return -EINVAL; } +static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3) +{ + return -EINVAL; +} +static inline int futex_hash_allocate_default(void) +{ + return 0; +} +static inline void futex_hash_free(struct mm_struct *mm) { } +static inline void futex_mm_init(struct mm_struct *mm) { } + #endif =20 #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6b27db7f94963..c20f2310d78ca 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -30,6 +30,7 @@ #define INIT_PASID 0 =20 struct address_space; +struct futex_hash_bucket; struct mem_cgroup; =20 /* @@ -936,7 +937,10 @@ struct mm_struct { */ seqcount_t mm_lock_seq; #endif - +#ifdef CONFIG_FUTEX + unsigned int futex_hash_mask; + struct futex_hash_bucket *futex_hash_bucket; +#endif =20 unsigned long hiwater_rss; /* High-watermark of RSS usage */ unsigned long hiwater_vm; /* High-water virtual memory usage */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 5c6080680cb27..55b843644c51a 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -353,4 +353,9 @@ struct prctl_mm_map { */ #define PR_LOCK_SHADOW_STACK_STATUS 76 =20 +/* FUTEX hash management */ +#define PR_FUTEX_HASH 77 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define PR_FUTEX_HASH_GET_SLOTS 2 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 735405a9c5f32..80ac156adebbf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1287,6 +1287,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm= , struct task_struct *p, RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); + futex_mm_init(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLO= CKS) mm->pmd_huge_pte =3D NULL; #endif @@ -1364,6 +1365,7 @@ static inline void __mmput(struct mm_struct *mm) if (mm->binfmt) module_put(mm->binfmt->module); lru_gen_del_mm(mm); + futex_hash_free(mm); mmdrop(mm); } =20 diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 69424994e7d9e..e64a5cf818414 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -39,6 +39,7 @@ #include #include #include +#include =20 #include "futex.h" #include "../locking/rtmutex_common.h" @@ -107,18 +108,40 @@ late_initcall(fail_futex_debugfs); =20 #endif /* CONFIG_FAIL_FUTEX */ =20 +static inline bool futex_key_is_private(union futex_key *key) +{ + /* + * Relies on get_futex_key() to set either bit for shared + * futexes -- see comment with union futex_key. + */ + return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED)); +} + /** - * futex_hash - Return the hash bucket in the global hash + * futex_hash - Return the hash bucket in the global or local hash * @key: Pointer to the futex key for which the hash is calculated * * We hash on the keys returned from get_futex_key (see below) and return = the - * corresponding hash bucket in the global hash. + * corresponding hash bucket in the global hash. If the FUTEX is private a= nd + * a local hash table is privated then this one is used. */ struct futex_hash_bucket *__futex_hash(union futex_key *key) { - u32 hash =3D jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, - key->both.offset); + struct futex_hash_bucket *fhb; + u32 hash; =20 + fhb =3D current->mm->futex_hash_bucket; + if (fhb && futex_key_is_private(key)) { + u32 hash_mask =3D current->mm->futex_hash_mask; + + hash =3D jhash2((u32 *)key, + offsetof(typeof(*key), both.offset) / 4, + key->both.offset); + return &fhb[hash & hash_mask]; + } + hash =3D jhash2((u32 *)key, + offsetof(typeof(*key), both.offset) / 4, + key->both.offset); return &futex_queues[hash & (futex_hashsize - 1)]; } =20 @@ -1129,6 +1152,76 @@ static void futex_hash_bucket_init(struct futex_hash= _bucket *fhb) spin_lock_init(&fhb->lock); } =20 +void futex_hash_free(struct mm_struct *mm) +{ + kvfree(mm->futex_hash_bucket); +} + +static int futex_hash_allocate(unsigned int hash_slots) +{ + struct futex_hash_bucket *fhb; + int i; + + if (current->mm->futex_hash_bucket) + return -EALREADY; + + if (!thread_group_leader(current)) + return -EINVAL; + + if (hash_slots =3D=3D 0) + hash_slots =3D 16; + if (hash_slots < 2) + hash_slots =3D 2; + if (hash_slots > 131072) + hash_slots =3D 131072; + if (!is_power_of_2(hash_slots)) + hash_slots =3D rounddown_pow_of_two(hash_slots); + + fhb =3D kvmalloc_array(hash_slots, sizeof(struct futex_hash_bucket), GFP_= KERNEL_ACCOUNT); + if (!fhb) + return -ENOMEM; + + current->mm->futex_hash_mask =3D hash_slots - 1; + + for (i =3D 0; i < hash_slots; i++) + futex_hash_bucket_init(&fhb[i]); + + current->mm->futex_hash_bucket =3D fhb; + return 0; +} + +int futex_hash_allocate_default(void) +{ + return futex_hash_allocate(0); +} + +static int futex_hash_get_slots(void) +{ + if (current->mm->futex_hash_bucket) + return current->mm->futex_hash_mask + 1; + return 0; +} + +int futex_hash_prctl(unsigned long arg2, unsigned long arg3) +{ + int ret; + + switch (arg2) { + case PR_FUTEX_HASH_SET_SLOTS: + ret =3D futex_hash_allocate(arg3); + break; + + case PR_FUTEX_HASH_GET_SLOTS: + ret =3D futex_hash_get_slots(); + break; + + default: + ret =3D -EINVAL; + break; + } + return ret; +} + static int __init futex_init(void) { unsigned int futex_shift; diff --git a/kernel/sys.c b/kernel/sys.c index cb366ff8703af..e509ad9795103 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -52,6 +52,7 @@ #include #include #include +#include =20 #include #include @@ -2811,6 +2812,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, ar= g2, unsigned long, arg3, return -EINVAL; error =3D arch_lock_shadow_stack_status(me, arg2); break; + case PR_FUTEX_HASH: + error =3D futex_hash_prctl(arg2, arg3); + break; default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); error =3D -EINVAL; --=20 2.47.2 From nobody Wed Dec 17 18:00:05 2025 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8000919258C for ; Tue, 25 Feb 2025 17:09:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=193.142.43.55 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503371; cv=none; b=ImSYkUiVsxBTnh2HVd2C0vecLuZQLWTffISqr6WHI4iT4ukttI9aB0E5DdPpjvAuDOHBjUbjxZzqlcU0cGqLDNhk564Nx+FLt/P71W0XGsS5ADWw3DtZLLEfj9PtV8x98s9PsuBjxWWj9tb4ErpDo/mMj11Sr1ZaTz/MKuGw7xo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503371; c=relaxed/simple; bh=z7SKTQ7Ma64q/SAOUM4xlnyQVGuryAeFCcdUA7KW4bI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=UuESnIp94NvTp5R5ysE/Cm3AMKYDa/52wvOIRyDc7QFgnGMaNzWYcdMEkn+dyMbgDFNjhAceEGvBlb2EhXO50cOrun+CAZxDC6/XoT4ayasxICNP2S1npg1WYFt6/MM4bPSbL7hirMXcpYlI07TFp05a3INy8i1SUAvE+Ol7XLU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de; spf=pass smtp.mailfrom=linutronix.de; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=KZKvLIIR; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=AoqM9zZb; arc=none smtp.client-ip=193.142.43.55 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linutronix.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="KZKvLIIR"; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="AoqM9zZb" From: Sebastian Andrzej Siewior DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1740503361; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=rkKAJ4zyYXh6FJYQyMom9miYRg7bgz+h0w13kY0QJBI=; b=KZKvLIIR1P2O5E1vcCB1yqF0Cr8B6O6jgTeTQT8zyfsdv4uVG327uEpG9c3dCW5sCPd9sU lsJo+oib+Q8pd7m/e4p80X7k1NZqlEggx3cTRsCe9ZJET3WJOUofslVvll4lvd2L1TWZjF xA+biGrTRBr9Ij4KQreDtPlBf0+WnY5Esuo0SDpNJj8aY15IRp9nlPdMYbIxOKVafLNGMa CWZodrkkd9Ke7PFvBVwmpTZ44VwULDFaymKP24HgYb7oWM1udCu9+uSctcwUZYDifkq8Oj MPzi4o4FDFRgVtdx9alN7H+nMRFnZ4dGhtJoLrQHEANZVSH1+g0b1OytAy47pQ== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1740503361; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=rkKAJ4zyYXh6FJYQyMom9miYRg7bgz+h0w13kY0QJBI=; b=AoqM9zZbFZey+sikzxRYPfDE8/qG9Mgw9tDuP6raihhKPvXLtEOYRP+6Py1Nax33XvEQPc GtiFpaa/5G6TF8Dw== To: linux-kernel@vger.kernel.org Cc: =?UTF-8?q?Andr=C3=A9=20Almeida?= , Darren Hart , Davidlohr Bueso , Ingo Molnar , Juri Lelli , Peter Zijlstra , Thomas Gleixner , Valentin Schneider , Waiman Long , Sebastian Andrzej Siewior Subject: [PATCH v9 04/11] futex: Hash only the address for private futexes. Date: Tue, 25 Feb 2025 18:09:07 +0100 Message-ID: <20250225170914.289358-5-bigeasy@linutronix.de> In-Reply-To: <20250225170914.289358-1-bigeasy@linutronix.de> References: <20250225170914.289358-1-bigeasy@linutronix.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" futex_hash() passes the whole futex_key to jhash2. The first two member are passed as the first argument and the offset as the "initial value". For private futexes, the mm-part is always the same and it is used only within the process. By excluding the mm part from the hash, we reduce the length passed to jhash2 from 4 (16 / 4) to 2 (8 / 2). This avoids the __jhash_mix() part of jhash. The resulting code is smaller and based on testing this variant performs as good as the original or slightly better. Signed-off-by: Sebastian Andrzej Siewior --- kernel/futex/core.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index e64a5cf818414..e4e0bc7722d78 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -117,6 +117,18 @@ static inline bool futex_key_is_private(union futex_ke= y *key) return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED)); } =20 +static struct futex_hash_bucket *futex_hash_private(union futex_key *key, + struct futex_hash_bucket *fhb, + u32 hash_mask) +{ + u32 hash; + + hash =3D jhash2((void *)&key->private.address, + sizeof(key->private.address) / 4, + key->both.offset); + return &fhb[hash & hash_mask]; +} + /** * futex_hash - Return the hash bucket in the global or local hash * @key: Pointer to the futex key for which the hash is calculated @@ -131,14 +143,9 @@ struct futex_hash_bucket *__futex_hash(union futex_key= *key) u32 hash; =20 fhb =3D current->mm->futex_hash_bucket; - if (fhb && futex_key_is_private(key)) { - u32 hash_mask =3D current->mm->futex_hash_mask; + if (fhb && futex_key_is_private(key)) + return futex_hash_private(key, fhb, current->mm->futex_hash_mask); =20 - hash =3D jhash2((u32 *)key, - offsetof(typeof(*key), both.offset) / 4, - key->both.offset); - return &fhb[hash & hash_mask]; - } hash =3D jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, key->both.offset); --=20 2.47.2 From nobody Wed Dec 17 18:00:05 2025 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4CBFE19F41C for ; Tue, 25 Feb 2025 17:09:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=193.142.43.55 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503373; cv=none; b=AM64b2qsxSZ4n6wJNgHNPBHKzL8/NCQBtfuOFy8x1BgtOF99TQfAYGopB7SL0GOnc8HcB/xhNClR2J7wXDX4uH9EEKefp1zWxiAkNvfVD+Unc80P6VlB0TrF0RLpiTpu5/D0UERM+5rcpGpbZ9+iJE2pOiBCzLpfygDtrqlcsw0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503373; c=relaxed/simple; bh=jx24DITxVlzSBY9t3sScv5dV3Uc64R3fHu3VkFAyLnY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=tBTJsWzNjKEuvRRaqIki9ogZH54q94yEEZNznp/jAEarUcuREUNHS4tHu7RCxgCl+lblXNmNKZkzqE6CPMilMG3AdjJrC3Pi2mYTKWZ/h9tvUpMnhRt5sKJhO8aNGEF/dssirg1BtcDv5EHS1pm5cwUIFnzWkrQSwxOd5anT6/M= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de; spf=pass smtp.mailfrom=linutronix.de; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=ADK+hxxT; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=obECOaPQ; arc=none smtp.client-ip=193.142.43.55 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linutronix.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="ADK+hxxT"; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="obECOaPQ" From: Sebastian Andrzej Siewior DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1740503361; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=3VkTRA93v/TYQ+iuRG7ZC9MwtD2MiDnMSmSFE4JgP+k=; b=ADK+hxxTVuO8UkhShLFsisxww5yMw/EnDUwFGesojI4d6Wp86vEEPuibGq4xpOitBxuG02 2h8u5iJDh4fRu1X1BDRYnmXAXqRSQ//nD/h53b6ieTjH4YzilpGMo1hDyaH3MJ72xJfB5r UlNgJ2er/j4aOXzw4rwZJndGVwPmXliz5dfcykS9X1WUKiZMbetQMM6crkY5bb+5p46K7O 9GSmeoNZfvxWYdUwPYhhqde5VDdc7ImSx8IjkiJlgHCTMasLYBhd3aCG/j9MLpWoIeSedO eGhi2IUtzdvf6ykAihSre1p552AQyPXXwUpVuq63XXGUVRV/LQbZ53z9PrONjw== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1740503361; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=3VkTRA93v/TYQ+iuRG7ZC9MwtD2MiDnMSmSFE4JgP+k=; b=obECOaPQo8qbfNZyHx8SPSlJfWeoJ5ukF4TxzDjc0uq6AwSY7Lgu+ezqd7iq69T0NguAOU 3SAzBXrTSl72xcBA== To: linux-kernel@vger.kernel.org Cc: =?UTF-8?q?Andr=C3=A9=20Almeida?= , Darren Hart , Davidlohr Bueso , Ingo Molnar , Juri Lelli , Peter Zijlstra , Thomas Gleixner , Valentin Schneider , Waiman Long , Sebastian Andrzej Siewior Subject: [PATCH v9 05/11] futex: Allow automatic allocation of process wide futex hash. Date: Tue, 25 Feb 2025 18:09:08 +0100 Message-ID: <20250225170914.289358-6-bigeasy@linutronix.de> In-Reply-To: <20250225170914.289358-1-bigeasy@linutronix.de> References: <20250225170914.289358-1-bigeasy@linutronix.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Allocate a default futex hash if a task forks its first thread. Signed-off-by: Sebastian Andrzej Siewior --- include/linux/futex.h | 12 ++++++++++++ kernel/fork.c | 24 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/include/linux/futex.h b/include/linux/futex.h index 943828db52234..bad377c30de5e 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -86,6 +86,13 @@ static inline void futex_mm_init(struct mm_struct *mm) mm->futex_hash_bucket =3D NULL; } =20 +static inline bool futex_hash_requires_allocation(void) +{ + if (current->mm->futex_hash_bucket) + return false; + return true; +} + #else static inline void futex_init_task(struct task_struct *tsk) { } static inline void futex_exit_recursive(struct task_struct *tsk) { } @@ -108,6 +115,11 @@ static inline int futex_hash_allocate_default(void) static inline void futex_hash_free(struct mm_struct *mm) { } static inline void futex_mm_init(struct mm_struct *mm) { } =20 +static inline bool futex_hash_requires_allocation(void) +{ + return false; +} + #endif =20 #endif diff --git a/kernel/fork.c b/kernel/fork.c index 80ac156adebbf..824cc55d32ece 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2138,6 +2138,15 @@ static void rv_task_fork(struct task_struct *p) #define rv_task_fork(p) do {} while (0) #endif =20 +static bool need_futex_hash_allocate_default(u64 clone_flags) +{ + if ((clone_flags & (CLONE_THREAD | CLONE_VM)) !=3D (CLONE_THREAD | CLONE_= VM)) + return false; + if (!thread_group_empty(current)) + return false; + return futex_hash_requires_allocation(); +} + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -2515,6 +2524,21 @@ __latent_entropy struct task_struct *copy_process( if (retval) goto bad_fork_cancel_cgroup; =20 + /* + * Allocate a default futex hash for the user process once the first + * thread spawns. + */ + if (need_futex_hash_allocate_default(clone_flags)) { + retval =3D futex_hash_allocate_default(); + if (retval) + goto bad_fork_core_free; + /* + * If we fail beyond this point we don't free the allocated + * futex hash map. We assume that another thread will be created + * and makes use of it. The hash map will be freed once the main + * thread terminates. + */ + } /* * From this point on we must avoid any synchronous user-space * communication until we take the tasklist-lock. In particular, we do --=20 2.47.2 From nobody Wed Dec 17 18:00:05 2025 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4CC4B1A073F for ; Tue, 25 Feb 2025 17:09:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=193.142.43.55 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503372; cv=none; b=uP0V2FgHWaL7bIc0VmuKzoyqpNwd2090521hBqDo15G29VFjO+cnLijBZ1h60TSmIlhdAW8m0921yECecV/9FvynMSPy1KnlNOwgw8kHzxK23abE4ybzDa9i65qNWk/JJBvzhbvgWcXM2tQR4bNjqw0ims853ekl6lxY1Xb1UPI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503372; c=relaxed/simple; bh=sOe+wXZR4BqZvyAanWB7XKCQ40k5e6zY2rFmlzubbGg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=F/KfA+wzguulEWOzphjxmbAXtXV6hUJcXKIobCc/W8ni33+uWbM6En+DHXqcWaQDcfDaQRP5ySZfO/yJYHicfBH92XsDRnwNkAaRIIlTZtBJEv5VyoaE3z7WEoS4Nv3RcC7HmPsc9AxJFm/LFovLLKtOa5PJhucwnCXAq4SOF7c= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de; spf=pass smtp.mailfrom=linutronix.de; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=UveOwL8a; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=vZXPygKI; arc=none smtp.client-ip=193.142.43.55 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linutronix.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="UveOwL8a"; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="vZXPygKI" From: Sebastian Andrzej Siewior DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1740503362; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=aAMRbscoUAdf49hvJ3tjIaA97RO0pYHBV4A0ahIJ7q4=; b=UveOwL8aN2oBbR5anr81lBR2dmCGhlzas1qbNz4by/visEz2upi688DiOod/rUw5bHrtQC 2zLFOZy6rRtvnxdPEpBgcEaWBgjMdp0WQCgGMVCQjDlSPr1hpAtu8NSNSadU90u7mrvC8V nIEZGq3ci8OlFd0mZU1O9tB7863oqPzA9hc7kqTpkdduVVpc8ryR4lpY9FvgGRIP2rrY6a sGTrbuwrHydttQCNUPz0owPJByb89gAo2PXl/Pl8EXF0RE5mbZS23Ye4iUZux8j0NE9DnZ TFzPOcBStNCI5AkCNBVotT3KIDCLy4z10FdfGHZXByxTe5IQKl+ZZAqEkLoOBQ== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1740503362; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=aAMRbscoUAdf49hvJ3tjIaA97RO0pYHBV4A0ahIJ7q4=; b=vZXPygKIp5df2UDgsR6gHbxJzGKZZty2r4MBO/WRxAGe38lrtwDCqK3v0EE+c1dV2WNz+r eqm0d6EKODB+7IAQ== To: linux-kernel@vger.kernel.org Cc: =?UTF-8?q?Andr=C3=A9=20Almeida?= , Darren Hart , Davidlohr Bueso , Ingo Molnar , Juri Lelli , Peter Zijlstra , Thomas Gleixner , Valentin Schneider , Waiman Long , Sebastian Andrzej Siewior Subject: [PATCH v9 06/11] futex: Decrease the waiter count before the unlock operation. Date: Tue, 25 Feb 2025 18:09:09 +0100 Message-ID: <20250225170914.289358-7-bigeasy@linutronix.de> In-Reply-To: <20250225170914.289358-1-bigeasy@linutronix.de> References: <20250225170914.289358-1-bigeasy@linutronix.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" To support runtime resizing of the process private hash, it's required to not use the obtained hash bucket once the reference count has been dropped. The reference will be dropped after the unlock of the hash bucket. The amount of waiters is decremented after the unlock operation. There is no requirement that this needs to happen after the unlock. The increment happens before acquiring the lock to signal early that there will be a waiter. The waiter can avoid blocking on the lock if it is known that there will be no waiter. There is no difference in terms of ordering if the decrement happens before or after the unlock. Decrease the waiter count before the unlock operation. Signed-off-by: Sebastian Andrzej Siewior --- kernel/futex/core.c | 2 +- kernel/futex/requeue.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index e4e0bc7722d78..a66623524a952 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -554,8 +554,8 @@ void futex_q_lock(struct futex_q *q, struct futex_hash_= bucket *hb) void futex_q_unlock(struct futex_hash_bucket *hb) __releases(&hb->lock) { - spin_unlock(&hb->lock); futex_hb_waiters_dec(hb); + spin_unlock(&hb->lock); } =20 void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c index 992e3ce005c6f..023c028d2fce3 100644 --- a/kernel/futex/requeue.c +++ b/kernel/futex/requeue.c @@ -456,8 +456,8 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flag= s1, ret =3D futex_get_value_locked(&curval, uaddr1); =20 if (unlikely(ret)) { - double_unlock_hb(hb1, hb2); futex_hb_waiters_dec(hb2); + double_unlock_hb(hb1, hb2); =20 ret =3D get_user(curval, uaddr1); if (ret) @@ -542,8 +542,8 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flag= s1, * waiter::requeue_state is correct. */ case -EFAULT: - double_unlock_hb(hb1, hb2); futex_hb_waiters_dec(hb2); + double_unlock_hb(hb1, hb2); ret =3D fault_in_user_writeable(uaddr2); if (!ret) goto retry; @@ -556,8 +556,8 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flag= s1, * exit to complete. * - EAGAIN: The user space value changed. */ - double_unlock_hb(hb1, hb2); futex_hb_waiters_dec(hb2); + double_unlock_hb(hb1, hb2); /* * Handle the case where the owner is in the middle of * exiting. Wait for the exit to complete otherwise @@ -674,8 +674,8 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flag= s1, put_pi_state(pi_state); =20 out_unlock: - double_unlock_hb(hb1, hb2); futex_hb_waiters_dec(hb2); + double_unlock_hb(hb1, hb2); } wake_up_q(&wake_q); return ret ? ret : task_count; --=20 2.47.2 From nobody Wed Dec 17 18:00:05 2025 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BC5071A238F for ; Tue, 25 Feb 2025 17:09:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=193.142.43.55 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503374; cv=none; b=rvEA1/OQSmXXzoBuxdvkH7p6w3IO7t9VWRZLpHHEB2fd0U4yzi6e5FDNy/zM/olebI7/jgUbmVaB1fbp2j6ZJFCDHw54t+B/PlGbKyV4SnWxbbqMhnmXVeMonraCIsG79mHLlg+EyItmSh03YS6LyV4UmGbi/cfGVLIrTC++lg4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503374; c=relaxed/simple; bh=1oppVKse9shIFKRUDnz0EsIpv6wlaSZS/QPBRI7tbTw=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=szBT0C8P7wZbO1VkVzKACAjFK65spN6BshjQo+bm5Mj9uA4utHjXOgLuwqeC0cYXyqdDRaMvJG9jASc1jgPJykYezVLklHsED/UpaNpqI5HSzl+Vb9oNRuynG8HQQsBD0Ap2jfzH6vYHNnBJAUmhjEC+gQMRST+jNxXJh/8p6rE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de; spf=pass smtp.mailfrom=linutronix.de; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=eHBYoECW; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=4V9KDlyf; arc=none smtp.client-ip=193.142.43.55 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linutronix.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="eHBYoECW"; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="4V9KDlyf" From: Sebastian Andrzej Siewior DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1740503362; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=11iWIDE03Q+uFYEukz8KEdprtJS5/Hh6uPvUsm+O540=; b=eHBYoECWnB1VS7QwrOyXtBJP3YxpDquPr4Ktfpqd578CYO//pdWsasw3Zf5W3dIXVp0TBq SY0raBvFoslkB+cXd2cgxSzDNWNNjSkbfLPhfBfGOYFnL6l8N/l3+0aXADmSw0g+gjh2Qx x0QmqdzK8vsrZPlti0qQ2LoYyBFD609j2NMWI2NnDJ9dnU07AfRgpJmDxsVhFio3CKVVWH RFqjaDk2n4ZWzcgJt+cdm3jtxTwSHaQO5TBnQ3pWzJD6kOgkq0+OnBvOCZCy/zmGy9iNGt S1hmRGNwbtg5SsJ0XuB5KTVmMU0D3UGIbcPPOkVZoPAs+0oJRF7YuDwQ+6GMtQ== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1740503362; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=11iWIDE03Q+uFYEukz8KEdprtJS5/Hh6uPvUsm+O540=; b=4V9KDlyfgLD+7dZhqNHa7WHwPCK69RZ/xGQgxOVrCJQ3MbG4EKF31lu6YpnsUkGE6oaBJW JQtxV2OyRLzCBiDw== To: linux-kernel@vger.kernel.org Cc: =?UTF-8?q?Andr=C3=A9=20Almeida?= , Darren Hart , Davidlohr Bueso , Ingo Molnar , Juri Lelli , Peter Zijlstra , Thomas Gleixner , Valentin Schneider , Waiman Long , Sebastian Andrzej Siewior Subject: [PATCH v9 07/11] futex: Introduce futex_q_lockptr_lock(). Date: Tue, 25 Feb 2025 18:09:10 +0100 Message-ID: <20250225170914.289358-8-bigeasy@linutronix.de> In-Reply-To: <20250225170914.289358-1-bigeasy@linutronix.de> References: <20250225170914.289358-1-bigeasy@linutronix.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" futex_lock_pi() and __fixup_pi_state_owner() acquire the futex_q::lock_ptr without holding a reference assuming the previously obtained hash bucket and the assigned lock_ptr are still valid. This isn't the case once the private hash can be resized and becomes invalid after the reference drop. Introduce futex_q_lockptr_lock() to lock the hash bucket recorded in futex_q::lock_ptr. The lock pointer is read in a RCU section to ensure that it does not go away if the hash bucket has been replaced and the old pointer has been observed. After locking the pointer needs to be compared to check if it changed. If so then the hash bucket has been replaced and the user has been moved to the new one and lock_ptr has been updated. The lock operation needs to be redone in this case. The locked hash bucket is not returned. A special case is an early return in futex_lock_pi() (due to signal or timeout) and a successful futex_wait_requeue_pi(). In both cases a valid futex_q::lock_ptr is expected (and its matching hash bucket) but since the waiter has been removed from the hash this can no longer be guaranteed. Therefore before the waiter is removed and a reference is acquired which is later dropped by the waiter to avoid a resize. Add futex_q_lockptr_lock() and use it. Acquire an additional reference in requeue_pi_wake_futex() and futex_unlock_pi() while the futex_q is removed, denote this extra reference in futex_q::drop_hb_ref and let the waiter drop the reference in this case. Signed-off-by: Sebastian Andrzej Siewior --- kernel/futex/core.c | 44 ++++++++++++++++++++++++++++++++++++++++++ kernel/futex/futex.h | 4 +++- kernel/futex/pi.c | 15 ++++++++++++-- kernel/futex/requeue.c | 16 ++++++++++++--- 4 files changed, 73 insertions(+), 6 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index a66623524a952..239179e9ed9d5 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -152,6 +152,17 @@ struct futex_hash_bucket *__futex_hash(union futex_key= *key) return &futex_queues[hash & (futex_hashsize - 1)]; } =20 +/** + * futex_hash_get - Get an additional reference for the local hash. + * @hb: ptr to the private local hash. + * + * Obtain an additional reference for the already obtained hash bucket. The + * caller must already own an reference. + */ +void futex_hash_get(struct futex_hash_bucket *hb) +{ +} + void futex_hash_put(struct futex_hash_bucket *hb) { } =20 /** @@ -632,6 +643,39 @@ int futex_unqueue(struct futex_q *q) return ret; } =20 +void futex_q_lockptr_lock(struct futex_q *q) +{ +#if 0 + struct futex_hash_bucket *hb; +#endif + spinlock_t *lock_ptr; + + /* + * See futex_unqueue() why lock_ptr can change. + */ + guard(rcu)(); +retry: + lock_ptr =3D READ_ONCE(q->lock_ptr); + spin_lock(lock_ptr); + + if (unlikely(lock_ptr !=3D q->lock_ptr)) { + spin_unlock(lock_ptr); + goto retry; + } +#if 0 + hb =3D container_of(lock_ptr, struct futex_hash_bucket, lock); + /* + * The caller needs to either hold a reference on the hash (to ensure + * that the hash is not resized) _or_ be enqueued on the hash. This + * ensures that futex_q::lock_ptr is updated while moved to the new + * hash during resize. + * Once the hash bucket is locked the resize operation, which might be + * in progress, will block on the lock. + */ + return hb; +#endif +} + /* * PI futexes can not be requeued and must remove themselves from the hash * bucket. The hash bucket lock (i.e. lock_ptr) is held. diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index eac6de6ed563a..e6f8f2f9281aa 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -183,6 +183,7 @@ struct futex_q { union futex_key *requeue_pi_key; u32 bitset; atomic_t requeue_state; + bool drop_hb_ref; #ifdef CONFIG_PREEMPT_RT struct rcuwait requeue_wait; #endif @@ -197,12 +198,13 @@ enum futex_access { =20 extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union fute= x_key *key, enum futex_access rw); - +extern void futex_q_lockptr_lock(struct futex_q *q); extern struct hrtimer_sleeper * futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, int flags, u64 range_ns); =20 extern struct futex_hash_bucket *__futex_hash(union futex_key *key); +extern void futex_hash_get(struct futex_hash_bucket *hb); extern void futex_hash_put(struct futex_hash_bucket *hb); =20 DEFINE_CLASS(hb, struct futex_hash_bucket *, diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index 4cee9ec5d97d6..51c69e8808152 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -806,7 +806,7 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, st= ruct futex_q *q, break; } =20 - spin_lock(q->lock_ptr); + futex_q_lockptr_lock(q); raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); =20 /* @@ -1066,7 +1066,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int fla= gs, ktime_t *time, int tryl * spinlock/rtlock (which might enqueue its own rt_waiter) and fix up * the */ - spin_lock(q.lock_ptr); + futex_q_lockptr_lock(&q); /* * Waiter is unqueued. */ @@ -1086,6 +1086,11 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int fl= ags, ktime_t *time, int tryl =20 futex_unqueue_pi(&q); spin_unlock(q.lock_ptr); + if (q.drop_hb_ref) { + CLASS(hb, hb)(&q.key); + /* Additional reference from futex_unlock_pi() */ + futex_hash_put(hb); + } goto out; =20 out_unlock_put_key: @@ -1194,6 +1199,12 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int = flags) */ rt_waiter =3D rt_mutex_top_waiter(&pi_state->pi_mutex); if (!rt_waiter) { + /* + * Acquire a reference for the leaving waiter to ensure + * valid futex_q::lock_ptr. + */ + futex_hash_get(hb); + top_waiter->drop_hb_ref =3D true; __futex_unqueue(top_waiter); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); goto retry_hb; diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c index 023c028d2fce3..b0e64fd454d96 100644 --- a/kernel/futex/requeue.c +++ b/kernel/futex/requeue.c @@ -231,7 +231,12 @@ void requeue_pi_wake_futex(struct futex_q *q, union fu= tex_key *key, =20 WARN_ON(!q->rt_waiter); q->rt_waiter =3D NULL; - + /* + * Acquire a reference for the waiter to ensure valid + * futex_q::lock_ptr. + */ + futex_hash_get(hb); + q->drop_hb_ref =3D true; q->lock_ptr =3D &hb->lock; =20 /* Signal locked state to the waiter */ @@ -826,7 +831,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned i= nt flags, case Q_REQUEUE_PI_LOCKED: /* The requeue acquired the lock */ if (q.pi_state && (q.pi_state->owner !=3D current)) { - spin_lock(q.lock_ptr); + futex_q_lockptr_lock(&q); ret =3D fixup_pi_owner(uaddr2, &q, true); /* * Drop the reference to the pi state which the @@ -853,7 +858,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned i= nt flags, if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) ret =3D 0; =20 - spin_lock(q.lock_ptr); + futex_q_lockptr_lock(&q); debug_rt_mutex_free_waiter(&rt_waiter); /* * Fixup the pi_state owner and possibly acquire the lock if we @@ -885,6 +890,11 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned = int flags, default: BUG(); } + if (q.drop_hb_ref) { + CLASS(hb, hb)(&q.key); + /* Additional reference from requeue_pi_wake_futex() */ + futex_hash_put(hb); + } =20 out: if (to) { --=20 2.47.2 From nobody Wed Dec 17 18:00:05 2025 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D219D1A23AD for ; Tue, 25 Feb 2025 17:09:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=193.142.43.55 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503374; cv=none; b=o5ljidqg65e4iQ12Itm/VeeCirUO2LjFG5mW1q6fcCtGzWpyjbYqFcFxCpKeWRjOcO+azxNdER791Ta+I/xxiR1I0QwDXFGfLpZmX9Us8FSQbO2G2xYTMdQ4GnzDCicRuDhFD9JlSghnwKwKgHhncKECIxk+8bfIeGCW0zKGMoo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503374; c=relaxed/simple; bh=wti/elsG0RUA51ZuaoWFp0gCHmZQuf9hhQuWRGremoI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=WmKhHrwBHjeVQR8ZL90nE8/LC9Dzr6kLsLKYD9gKrgCAd3GGJQv7ZVK6yaxHdjpIiSzw55Y5cnZcJabAgwA7DvnDPtIpOWnXtF8qkRHanlWdTeywAfaTvqNlJtYppjL4m0Ahsj3FgocpZ098i6weAzpJCBp+plNi2PEUSaB3WuI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de; spf=pass smtp.mailfrom=linutronix.de; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=XIcfGlfL; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=L1Mx7pSF; arc=none smtp.client-ip=193.142.43.55 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linutronix.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="XIcfGlfL"; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="L1Mx7pSF" From: Sebastian Andrzej Siewior DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1740503363; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=NGxmewyhId8hTpegAx9z2qlG4UsFCP4k2p/HLIL5dR8=; b=XIcfGlfLP7/y6ekYjUcCidr3F6yKoYKKg6oLG0KB99XmSF44mzzlTERbKvvtO1Y269QakZ p9Oljcu8U6QvbnVPwglnOFLk3UGzPbBwZXDJmpma/m+70ddOP9ELwNOBFp/OQiVn4o04Dj zfIk6pFcIvs5zJfj205HL2nyID8QXX6jXudwdz7LLsrM+sPSSKyt9F6bDwf0ZY9Y3UXeMO 7JTC6N7jYJbCD6BpWo3V7KO/z68stCTkR26g6RrET38sFC4hWDoX2GM3/iqa3TBNIIy864 Ixup3CRBWw4AkaSo6DHwQfyzQ85BIImO2VH9xRiCzbhR3h3CpY+BA5nJdXSRTg== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1740503363; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=NGxmewyhId8hTpegAx9z2qlG4UsFCP4k2p/HLIL5dR8=; b=L1Mx7pSFLkKChNb7XCKOoCf4zEov1BRmg8bYonPVAXdx+Nm6y84AgDLLnb1Ilx57wsfvR3 TSRENU0m2+LtjXCw== To: linux-kernel@vger.kernel.org Cc: =?UTF-8?q?Andr=C3=A9=20Almeida?= , Darren Hart , Davidlohr Bueso , Ingo Molnar , Juri Lelli , Peter Zijlstra , Thomas Gleixner , Valentin Schneider , Waiman Long , Sebastian Andrzej Siewior Subject: [PATCH v9 08/11] futex: Acquire a hash reference in futex_wait_multiple_setup(). Date: Tue, 25 Feb 2025 18:09:11 +0100 Message-ID: <20250225170914.289358-9-bigeasy@linutronix.de> In-Reply-To: <20250225170914.289358-1-bigeasy@linutronix.de> References: <20250225170914.289358-1-bigeasy@linutronix.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" futex_wait_multiple_setup() changes task_struct::__state to !TASK_RUNNING and then enqueues on multiple futexes. Every futex_q_lock() acquires a reference on the global hash which is dropped later. If a rehash is in progress then the loop will block on mm_struct::futex_hash_bucket for the rehash to complete and this will lose the previously set task_struct::__state. Acquire a reference on the local hash to avoiding blocking on mm_struct::futex_hash_bucket. Signed-off-by: Sebastian Andrzej Siewior --- kernel/futex/core.c | 10 ++++++++++ kernel/futex/futex.h | 2 ++ kernel/futex/waitwake.c | 21 +++++++++++++++++++-- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 239179e9ed9d5..b08bca2ed0342 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -129,6 +129,11 @@ static struct futex_hash_bucket *futex_hash_private(un= ion futex_key *key, return &fhb[hash & hash_mask]; } =20 +struct futex_private_hash *futex_get_private_hash(void) +{ + return NULL; +} + /** * futex_hash - Return the hash bucket in the global or local hash * @key: Pointer to the futex key for which the hash is calculated @@ -152,6 +157,11 @@ struct futex_hash_bucket *__futex_hash(union futex_key= *key) return &futex_queues[hash & (futex_hashsize - 1)]; } =20 +bool futex_put_private_hash(struct futex_private_hash *hb_p) +{ + return false; +} + /** * futex_hash_get - Get an additional reference for the local hash. * @hb: ptr to the private local hash. diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index e6f8f2f9281aa..0a76ee6e7dc10 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -206,6 +206,8 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper= *timeout, extern struct futex_hash_bucket *__futex_hash(union futex_key *key); extern void futex_hash_get(struct futex_hash_bucket *hb); extern void futex_hash_put(struct futex_hash_bucket *hb); +extern struct futex_private_hash *futex_get_private_hash(void); +extern bool futex_put_private_hash(struct futex_private_hash *hb_p); =20 DEFINE_CLASS(hb, struct futex_hash_bucket *, if (_T) futex_hash_put(_T), diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index 44034dee7a48c..67eebb5b4b212 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -385,7 +385,7 @@ int futex_unqueue_multiple(struct futex_vector *v, int = count) } =20 /** - * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes + * __futex_wait_multiple_setup - Prepare to wait and enqueue multiple fute= xes * @vs: The futex list to wait on * @count: The size of the list * @woken: Index of the last woken futex, if any. Used to notify the @@ -400,7 +400,7 @@ int futex_unqueue_multiple(struct futex_vector *v, int = count) * - 0 - Success * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL */ -int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *wok= en) +static int __futex_wait_multiple_setup(struct futex_vector *vs, int count,= int *woken) { bool retry =3D false; int ret, i; @@ -491,6 +491,23 @@ int futex_wait_multiple_setup(struct futex_vector *vs,= int count, int *woken) return 0; } =20 +int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *wok= en) +{ + struct futex_private_hash *hb_p; + int ret; + + /* + * Assume to have a private futex and acquire a reference on the private + * hash to avoid blocking on mm_struct::futex_hash_bucket during rehash + * after changing the task state. + */ + hb_p =3D futex_get_private_hash(); + ret =3D __futex_wait_multiple_setup(vs, count, woken); + if (hb_p) + futex_put_private_hash(hb_p); + return ret; +} + /** * futex_sleep_multiple - Check sleeping conditions and sleep * @vs: List of futexes to wait for --=20 2.47.2 From nobody Wed Dec 17 18:00:05 2025 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CA7271A2393 for ; Tue, 25 Feb 2025 17:09:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=193.142.43.55 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503374; cv=none; b=OWwvrZtltNcJ5v0kKZhGy8dcy9i6+7iNhsyqLnNQsQscsQTJxI/uhVRSpg2PTFMKuXtO2yESgEectQwIAGXXJbHfwMYQOaAO6xaaLYBUxefKQGTwoduIbRjMxTzrHTJc9UYFTTsLHRNl7D9tPXHYw2+k4W5MIsH3LWpLZ24+oBQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503374; c=relaxed/simple; bh=YqUvIPoskup33fFPxdQ0BXsA0l7ziIULXaMUdvIc5Vs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=R2IWxPBbuSQvQI5PPGT9EvVjAx/BCpF8Q9s71IGZdxHIVNqVxBN1hvDKFLQO4ADL/JMRRirVUt73cwEUWJrkc/LsiNoNQZJG4M3DveFk1kZVZ/OaFioJuHsl9caLPzCBYrrVgrBorU7sB55ywaMyNJ5/xQ6YABpM+PAx7E5IVCQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de; spf=pass smtp.mailfrom=linutronix.de; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=EuTnzjtn; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=yootGPQG; arc=none smtp.client-ip=193.142.43.55 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linutronix.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="EuTnzjtn"; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="yootGPQG" From: Sebastian Andrzej Siewior DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1740503363; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=M/jdKtUOrhKM1O89fZWDstXrS8WnNa8003TLtytQv0I=; b=EuTnzjtnsykRHADxvqkCUIc+8NZdyZ5HvegBoh4L0XzkgJfZxOo8lFEmc9eqZTrT0+XhrD t/iX/q/1by8J1DNedqlOpsV9SLdwa4SQTGnVQFGMOuLsJioU835Qfot7cTqeitrmhC8Nvd 6Jv537yaHMX394/RfW7zlQzY3s98eA5ifz1MDPmCQZQch30WkEUM+8c6JXKVlZEifxeQyn dmPXy/3ZeowIdoTSvJ8Qcto4Z5iS4U+RbiRGfd4X2mZntcsjqm4Da5nvIseOymRBgcSLzR R83nL6IieREIvQxTb7s6hlgPUEB/C7LRZl7Ah5GabPzLkcrv7SU5Typ5OoErhQ== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1740503363; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=M/jdKtUOrhKM1O89fZWDstXrS8WnNa8003TLtytQv0I=; b=yootGPQGSUvw+iZnQILtjZ2UsrWGGfNnOiDwX+tSFUljHoIun2cSTr137AHTveGdHjaFn2 hCPD9hrlNmCSwNBg== To: linux-kernel@vger.kernel.org Cc: =?UTF-8?q?Andr=C3=A9=20Almeida?= , Darren Hart , Davidlohr Bueso , Ingo Molnar , Juri Lelli , Peter Zijlstra , Thomas Gleixner , Valentin Schneider , Waiman Long , Sebastian Andrzej Siewior Subject: [PATCH v9 09/11] futex: Allow to re-allocate the private local hash. Date: Tue, 25 Feb 2025 18:09:12 +0100 Message-ID: <20250225170914.289358-10-bigeasy@linutronix.de> In-Reply-To: <20250225170914.289358-1-bigeasy@linutronix.de> References: <20250225170914.289358-1-bigeasy@linutronix.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The mm_struct::futex_hash_lock guards the futex_hash_bucket assignment/ replacement. The futex_hash_allocate()/ PR_FUTEX_HASH_SET_SLOTS operation can now be invoked at runtime and resize an already existing internal private futex_hash_bucket to another size. The reallocation is based on an idea by Thomas Gleixner: The initial allocation of struct futex_private_hash sets the reference count to one. Every user acquires a reference on the local hash before using it and drops it after it enqueued itself on the hash bucket. There is no reference held while the task is scheduled out while waiting for the wake up. The resize allocates a new struct futex_private_hash and drops the initial reference under the mm_struct::futex_hash_lock. If the reference drop results in destruction of the object then users currently queued on the local hash will be requeued on the new local hash. At the end mm_struct::futex_phash is updated, the old pointer is RCU freed and the mutex is dropped. If the reference drop does not result in destruction of the object then the new pointer is saved as mm_struct::futex_phash_new. In this case replacement is delayed. The user dropping the last reference is not always the best choice to perform the replacement. For instance futex_wait_queue() drops the reference after changing its task state which will also be modified while the futex_hash_lock is acquired. Therefore the replacement is delayed to the task acquiring a reference on the current local hash. This scheme keeps the requirement that all waiters/ wakers of the same addr= ess block always on the same futex_hash_bucket::lock. Signed-off-by: Sebastian Andrzej Siewior --- include/linux/futex.h | 5 +- include/linux/mm_types.h | 7 +- kernel/futex/core.c | 248 +++++++++++++++++++++++++++++++++++---- kernel/futex/futex.h | 1 + kernel/futex/requeue.c | 5 + 5 files changed, 237 insertions(+), 29 deletions(-) diff --git a/include/linux/futex.h b/include/linux/futex.h index bad377c30de5e..bfb38764bac7a 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -83,12 +83,13 @@ void futex_hash_free(struct mm_struct *mm); =20 static inline void futex_mm_init(struct mm_struct *mm) { - mm->futex_hash_bucket =3D NULL; + rcu_assign_pointer(mm->futex_phash, NULL); + mutex_init(&mm->futex_hash_lock); } =20 static inline bool futex_hash_requires_allocation(void) { - if (current->mm->futex_hash_bucket) + if (current->mm->futex_phash) return false; return true; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c20f2310d78ca..19abbc870e0a9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -30,7 +30,7 @@ #define INIT_PASID 0 =20 struct address_space; -struct futex_hash_bucket; +struct futex_private_hash; struct mem_cgroup; =20 /* @@ -938,8 +938,9 @@ struct mm_struct { seqcount_t mm_lock_seq; #endif #ifdef CONFIG_FUTEX - unsigned int futex_hash_mask; - struct futex_hash_bucket *futex_hash_bucket; + struct mutex futex_hash_lock; + struct futex_private_hash __rcu *futex_phash; + struct futex_private_hash *futex_phash_new; #endif =20 unsigned long hiwater_rss; /* High-watermark of RSS usage */ diff --git a/kernel/futex/core.c b/kernel/futex/core.c index b08bca2ed0342..4d9ee3bcaa6d0 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -40,6 +40,7 @@ #include #include #include +#include =20 #include "futex.h" #include "../locking/rtmutex_common.h" @@ -56,6 +57,14 @@ static struct { #define futex_queues (__futex_data.queues) #define futex_hashsize (__futex_data.hashsize) =20 +struct futex_private_hash { + rcuref_t users; + unsigned int hash_mask; + struct rcu_head rcu; + bool initial_ref_dropped; + bool released; + struct futex_hash_bucket queues[]; +}; =20 /* * Fault injections for futexes. @@ -129,9 +138,122 @@ static struct futex_hash_bucket *futex_hash_private(u= nion futex_key *key, return &fhb[hash & hash_mask]; } =20 +static void futex_rehash_current_users(struct futex_private_hash *old, + struct futex_private_hash *new) +{ + struct futex_hash_bucket *hb_old, *hb_new; + unsigned int slots =3D old->hash_mask + 1; + u32 hash_mask =3D new->hash_mask; + unsigned int i; + + for (i =3D 0; i < slots; i++) { + struct futex_q *this, *tmp; + + hb_old =3D &old->queues[i]; + + spin_lock(&hb_old->lock); + plist_for_each_entry_safe(this, tmp, &hb_old->chain, list) { + + plist_del(&this->list, &hb_old->chain); + futex_hb_waiters_dec(hb_old); + + WARN_ON_ONCE(this->lock_ptr !=3D &hb_old->lock); + + hb_new =3D futex_hash_private(&this->key, new->queues, hash_mask); + futex_hb_waiters_inc(hb_new); + /* + * The new pointer isn't published yet but an already + * moved user can be unqueued due to timeout or signal. + */ + spin_lock_nested(&hb_new->lock, SINGLE_DEPTH_NESTING); + plist_add(&this->list, &hb_new->chain); + this->lock_ptr =3D &hb_new->lock; + spin_unlock(&hb_new->lock); + } + spin_unlock(&hb_old->lock); + } +} + +static void futex_assign_new_hash(struct futex_private_hash *hb_p_new, + struct mm_struct *mm) +{ + bool drop_init_ref =3D hb_p_new !=3D NULL; + struct futex_private_hash *hb_p; + + if (!hb_p_new) { + hb_p_new =3D mm->futex_phash_new; + mm->futex_phash_new =3D NULL; + } + /* Someone was quicker, the current mask is valid */ + if (!hb_p_new) + return; + + hb_p =3D rcu_dereference_check(mm->futex_phash, + lockdep_is_held(&mm->futex_hash_lock)); + if (hb_p) { + if (hb_p->hash_mask >=3D hb_p_new->hash_mask) { + /* It was increased again while we were waiting */ + kvfree(hb_p_new); + return; + } + /* + * If the caller started the resize then the initial reference + * needs to be dropped. If the object can not be deconstructed + * we save hb_p_new for later and ensure the reference counter + * is not dropped again. + */ + if (drop_init_ref && + (hb_p->initial_ref_dropped || !futex_put_private_hash(hb_p))) { + mm->futex_phash_new =3D hb_p_new; + hb_p->initial_ref_dropped =3D true; + return; + } + if (!READ_ONCE(hb_p->released)) { + mm->futex_phash_new =3D hb_p_new; + return; + } + + futex_rehash_current_users(hb_p, hb_p_new); + } + rcu_assign_pointer(mm->futex_phash, hb_p_new); + kvfree_rcu(hb_p, rcu); +} + struct futex_private_hash *futex_get_private_hash(void) { - return NULL; + struct mm_struct *mm =3D current->mm; + /* + * Ideally we don't loop. If there is a replacement in progress + * then a new private hash is already prepared and a reference can't be + * obtained once the last user dropped it's. + * In that case we block on mm_struct::futex_hash_lock and either have + * to perform the replacement or wait while someone else is doing the + * job. Eitherway, on the second iteration we acquire a reference on the + * new private hash or loop again because a new replacement has been + * requested. + */ +again: + scoped_guard(rcu) { + struct futex_private_hash *hb_p; + + hb_p =3D rcu_dereference(mm->futex_phash); + if (!hb_p) + return NULL; + + if (rcuref_get(&hb_p->users)) + return hb_p; + } + scoped_guard(mutex, ¤t->mm->futex_hash_lock) + futex_assign_new_hash(NULL, mm); + goto again; +} + +static struct futex_private_hash *futex_get_private_hb(union futex_key *ke= y) +{ + if (!futex_key_is_private(key)) + return NULL; + + return futex_get_private_hash(); } =20 /** @@ -144,12 +266,12 @@ struct futex_private_hash *futex_get_private_hash(voi= d) */ struct futex_hash_bucket *__futex_hash(union futex_key *key) { - struct futex_hash_bucket *fhb; + struct futex_private_hash *hb_p; u32 hash; =20 - fhb =3D current->mm->futex_hash_bucket; - if (fhb && futex_key_is_private(key)) - return futex_hash_private(key, fhb, current->mm->futex_hash_mask); + hb_p =3D futex_get_private_hb(key); + if (hb_p) + return futex_hash_private(key, hb_p->queues, hb_p->hash_mask); =20 hash =3D jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, @@ -159,7 +281,13 @@ struct futex_hash_bucket *__futex_hash(union futex_key= *key) =20 bool futex_put_private_hash(struct futex_private_hash *hb_p) { - return false; + bool released; + + guard(preempt)(); + released =3D rcuref_put_rcusafe(&hb_p->users); + if (released) + WRITE_ONCE(hb_p->released, true); + return released; } =20 /** @@ -171,9 +299,22 @@ bool futex_put_private_hash(struct futex_private_hash = *hb_p) */ void futex_hash_get(struct futex_hash_bucket *hb) { + struct futex_private_hash *hb_p =3D hb->hb_p; + + if (!hb_p) + return; + + WARN_ON_ONCE(!rcuref_get(&hb_p->users)); } =20 -void futex_hash_put(struct futex_hash_bucket *hb) { } +void futex_hash_put(struct futex_hash_bucket *hb) +{ + struct futex_private_hash *hb_p =3D hb->hb_p; + + if (!hb_p) + return; + futex_put_private_hash(hb_p); +} =20 /** * futex_setup_timer - set up the sleeping hrtimer. @@ -615,6 +756,8 @@ int futex_unqueue(struct futex_q *q) spinlock_t *lock_ptr; int ret =3D 0; =20 + /* RCU so lock_ptr is not going away during locking. */ + guard(rcu)(); /* In the common case we don't take the spinlock, which is nice. */ retry: /* @@ -1028,9 +1171,21 @@ static void compat_exit_robust_list(struct task_stru= ct *curr) static void exit_pi_state_list(struct task_struct *curr) { struct list_head *next, *head =3D &curr->pi_state_list; + struct futex_private_hash *hb_p; struct futex_pi_state *pi_state; union futex_key key =3D FUTEX_KEY_INIT; =20 + /* + * The mutex mm_struct::futex_hash_lock might be acquired. + */ + might_sleep(); + /* + * Ensure the hash remains stable (no resize) during the while loop + * below. The hb pointer is acquired under the pi_lock so we can't block + * on the mutex. + */ + WARN_ON(curr !=3D current); + hb_p =3D futex_get_private_hash(); /* * We are a ZOMBIE and nobody can enqueue itself on * pi_state_list anymore, but we have to be careful @@ -1093,6 +1248,8 @@ static void exit_pi_state_list(struct task_struct *cu= rr) raw_spin_lock_irq(&curr->pi_lock); } raw_spin_unlock_irq(&curr->pi_lock); + if (hb_p) + futex_put_private_hash(hb_p); } #else static inline void exit_pi_state_list(struct task_struct *curr) { } @@ -1206,8 +1363,10 @@ void futex_exit_release(struct task_struct *tsk) futex_cleanup_end(tsk, FUTEX_STATE_DEAD); } =20 -static void futex_hash_bucket_init(struct futex_hash_bucket *fhb) +static void futex_hash_bucket_init(struct futex_hash_bucket *fhb, + struct futex_private_hash *hb_p) { + fhb->hb_p =3D hb_p; atomic_set(&fhb->waiters, 0); plist_head_init(&fhb->chain); spin_lock_init(&fhb->lock); @@ -1215,20 +1374,34 @@ static void futex_hash_bucket_init(struct futex_has= h_bucket *fhb) =20 void futex_hash_free(struct mm_struct *mm) { - kvfree(mm->futex_hash_bucket); + struct futex_private_hash *hb_p; + + kvfree(mm->futex_phash_new); + /* + * The mm_struct belonging to the task is about to be removed so all + * threads, that ever accessed the private hash, are gone and the + * pointer can be accessed directly (omitting a RCU-read section or + * lock). + * Since there can not be a thread holding a reference to the private + * hash we free it immediately. + */ + hb_p =3D rcu_dereference_raw(mm->futex_phash); + if (!hb_p) + return; + + if (!hb_p->initial_ref_dropped && WARN_ON(!futex_put_private_hash(hb_p))) + return; + + kvfree(hb_p); } =20 static int futex_hash_allocate(unsigned int hash_slots) { - struct futex_hash_bucket *fhb; + struct futex_private_hash *hb_p, *hb_tofree =3D NULL; + struct mm_struct *mm =3D current->mm; + size_t alloc_size; int i; =20 - if (current->mm->futex_hash_bucket) - return -EALREADY; - - if (!thread_group_leader(current)) - return -EINVAL; - if (hash_slots =3D=3D 0) hash_slots =3D 16; if (hash_slots < 2) @@ -1238,16 +1411,39 @@ static int futex_hash_allocate(unsigned int hash_sl= ots) if (!is_power_of_2(hash_slots)) hash_slots =3D rounddown_pow_of_two(hash_slots); =20 - fhb =3D kvmalloc_array(hash_slots, sizeof(struct futex_hash_bucket), GFP_= KERNEL_ACCOUNT); - if (!fhb) + if (unlikely(check_mul_overflow(hash_slots, sizeof(struct futex_hash_buck= et), + &alloc_size))) return -ENOMEM; =20 - current->mm->futex_hash_mask =3D hash_slots - 1; + if (unlikely(check_add_overflow(alloc_size, sizeof(struct futex_private_h= ash), + &alloc_size))) + return -ENOMEM; + + hb_p =3D kvmalloc(alloc_size, GFP_KERNEL_ACCOUNT); + if (!hb_p) + return -ENOMEM; + + rcuref_init(&hb_p->users, 1); + hb_p->initial_ref_dropped =3D false; + hb_p->released =3D false; + hb_p->hash_mask =3D hash_slots - 1; =20 for (i =3D 0; i < hash_slots; i++) - futex_hash_bucket_init(&fhb[i]); + futex_hash_bucket_init(&hb_p->queues[i], hb_p); =20 - current->mm->futex_hash_bucket =3D fhb; + scoped_guard(mutex, &mm->futex_hash_lock) { + if (mm->futex_phash_new) { + if (mm->futex_phash_new->hash_mask <=3D hb_p->hash_mask) { + hb_tofree =3D mm->futex_phash_new; + } else { + hb_tofree =3D hb_p; + hb_p =3D mm->futex_phash_new; + } + mm->futex_phash_new =3D NULL; + } + futex_assign_new_hash(hb_p, mm); + } + kvfree(hb_tofree); return 0; } =20 @@ -1258,8 +1454,12 @@ int futex_hash_allocate_default(void) =20 static int futex_hash_get_slots(void) { - if (current->mm->futex_hash_bucket) - return current->mm->futex_hash_mask + 1; + struct futex_private_hash *hb_p; + + guard(rcu)(); + hb_p =3D rcu_dereference(current->mm->futex_phash); + if (hb_p) + return hb_p->hash_mask + 1; return 0; } =20 @@ -1301,7 +1501,7 @@ static int __init futex_init(void) futex_hashsize =3D 1UL << futex_shift; =20 for (i =3D 0; i < futex_hashsize; i++) - futex_hash_bucket_init(&futex_queues[i]); + futex_hash_bucket_init(&futex_queues[i], 0); =20 return 0; } diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 0a76ee6e7dc10..973efcca2e01b 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -118,6 +118,7 @@ struct futex_hash_bucket { atomic_t waiters; spinlock_t lock; struct plist_head chain; + struct futex_private_hash *hb_p; } ____cacheline_aligned_in_smp; =20 /* diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c index b0e64fd454d96..c716a66f86929 100644 --- a/kernel/futex/requeue.c +++ b/kernel/futex/requeue.c @@ -87,6 +87,11 @@ void requeue_futex(struct futex_q *q, struct futex_hash_= bucket *hb1, futex_hb_waiters_inc(hb2); plist_add(&q->list, &hb2->chain); q->lock_ptr =3D &hb2->lock; + /* + * hb1 and hb2 belong to the same futex_hash_bucket_private + * because if we managed get a reference on hb1 then it can't be + * replaced. Therefore we avoid put(hb1)+get(hb2) here. + */ } q->key =3D *key2; } --=20 2.47.2 From nobody Wed Dec 17 18:00:05 2025 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D171B1A23A9 for ; Tue, 25 Feb 2025 17:09:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=193.142.43.55 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503374; cv=none; b=tiAdDzTapQ7TyUIA/kJ/jyqtCo5zJ7NP7VC29obugTqrHp0HA3JOtW92jdFTb0HXuUgXV89sUmg8KZcXEMhu1CHWhunfZc5PvyP9JFDjzddiXTVTFQ9+IDR9VPlaEMSdMF04g7wVe4MHxK0bKfBzedhswJ8E8PmmwY6b8JZHCpw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503374; c=relaxed/simple; bh=8FFO6bFPGwyY0+Xl6fvMWNJIoLX7x/wmhWfnnYAgvQw=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=lleUR8JtQPAzy8g5/yUUf8eZ2cuowNQLDBu6JfLyLeW5pEDoXzcvfeCczitqsbam+5qdoQUyj07tkb/dYuSnBV+ZwdF7A/aAeBMibZEiT8h8ugDCvYMQtJ0n75MzoDQO9LwNjwy6gwUnOcGDJbudtzF29mH0tkz7t0P3sjj1a2o= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de; spf=pass smtp.mailfrom=linutronix.de; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=0/OgES9X; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=jEqTkPGN; arc=none smtp.client-ip=193.142.43.55 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linutronix.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="0/OgES9X"; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="jEqTkPGN" From: Sebastian Andrzej Siewior DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1740503364; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=YX0O8wuELXX5Vdc+92JPzZgR8pAmCG7s5ghLTgE+/8A=; b=0/OgES9XvkRg6FQDFk0MG+GCGCqmZFXTZZdvEOKFE4u+tzOj6mfcfWObDLnD3+a6YbEHDt LU0KXDV8Ii/Wjg+77yxS7bhRYwfxE9YRAGIffFWDyUAb1PDdT9IIIXGOoWbIDGJDFlzhmk RhQUBjaPlBXIsJ5ZO/JQXPHgq7KZ3kfg74kWc3PSg+DexgxyhjN9TzF8it9mA0DYG/tBsG 9whV4M29iwqc1u9U9OMRPUPOKg15InOVjdr97iT/y/S4fJzIFLBlMz0Z5vFsmMij53UY6R hRYvjsHjBUt5DTEH8lhNNltbrNhy+GPOja+UlIoDCfJKWARLtJi1+Qd6V6gR8g== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1740503364; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=YX0O8wuELXX5Vdc+92JPzZgR8pAmCG7s5ghLTgE+/8A=; b=jEqTkPGNeZxldgoz/YkjeCmWgaTCJy4WBMSUNyYVbjh42D7whPa6Obd9Pc/Ig3LU1KaPkZ iIB5SD9vDjS0/aCQ== To: linux-kernel@vger.kernel.org Cc: =?UTF-8?q?Andr=C3=A9=20Almeida?= , Darren Hart , Davidlohr Bueso , Ingo Molnar , Juri Lelli , Peter Zijlstra , Thomas Gleixner , Valentin Schneider , Waiman Long , Sebastian Andrzej Siewior Subject: [PATCH v9 10/11] futex: Resize local futex hash table based on number of threads. Date: Tue, 25 Feb 2025 18:09:13 +0100 Message-ID: <20250225170914.289358-11-bigeasy@linutronix.de> In-Reply-To: <20250225170914.289358-1-bigeasy@linutronix.de> References: <20250225170914.289358-1-bigeasy@linutronix.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Automatically size the local hash based on the number of threads but don't exceed the number of online CPUs. The logic tries to allocate between 16 and futex_hashsize (the default for the system wide hash bucket) and uses 4 * number-of-threads. On CONFIG_BASE_SMALL configs, the additional members for private hash resize have been removed in order to save memory on mm_struct and avoid any additional memory consumption. If we really do this, then I would re-arrange the code structure in the previous patches to limit the ifdefs. The alternatives would be to limit the buckets allocated in futex_hash_allocate_default() to 2. Avoiding futex_hash_allocate_default() but allowing PR_FUTEX_HASH_SET_SLOTS to work would require to hold mm_struct::futex_hash_lock in exit_pi_state_list() and futex_wait_multiple_setup() that private does not appear during these operations (which is currently ensured by holding a reference). Signed-off-by: Sebastian Andrzej Siewior --- include/linux/futex.h | 21 +++++++-------- include/linux/mm_types.h | 2 +- kernel/fork.c | 4 +-- kernel/futex/core.c | 57 +++++++++++++++++++++++++++++++++++++--- kernel/futex/futex.h | 8 ++++++ 5 files changed, 73 insertions(+), 19 deletions(-) diff --git a/include/linux/futex.h b/include/linux/futex.h index bfb38764bac7a..77821a78059f2 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -78,6 +78,13 @@ void futex_exec_release(struct task_struct *tsk); long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); int futex_hash_prctl(unsigned long arg2, unsigned long arg3); + +#ifdef CONFIG_BASE_SMALL +static inline int futex_hash_allocate_default(void) { return 0; } +static inline void futex_hash_free(struct mm_struct *mm) { } +static inline void futex_mm_init(struct mm_struct *mm) { } +#else /* !CONFIG_BASE_SMALL */ + int futex_hash_allocate_default(void); void futex_hash_free(struct mm_struct *mm); =20 @@ -87,14 +94,9 @@ static inline void futex_mm_init(struct mm_struct *mm) mutex_init(&mm->futex_hash_lock); } =20 -static inline bool futex_hash_requires_allocation(void) -{ - if (current->mm->futex_phash) - return false; - return true; -} +#endif /* CONFIG_BASE_SMALL */ =20 -#else +#else /* !CONFIG_FUTEX */ static inline void futex_init_task(struct task_struct *tsk) { } static inline void futex_exit_recursive(struct task_struct *tsk) { } static inline void futex_exit_release(struct task_struct *tsk) { } @@ -116,11 +118,6 @@ static inline int futex_hash_allocate_default(void) static inline void futex_hash_free(struct mm_struct *mm) { } static inline void futex_mm_init(struct mm_struct *mm) { } =20 -static inline bool futex_hash_requires_allocation(void) -{ - return false; -} - #endif =20 #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 19abbc870e0a9..72e68de850745 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -937,7 +937,7 @@ struct mm_struct { */ seqcount_t mm_lock_seq; #endif -#ifdef CONFIG_FUTEX +#if defined(CONFIG_FUTEX) && !defined(CONFIG_BASE_SMALL) struct mutex futex_hash_lock; struct futex_private_hash __rcu *futex_phash; struct futex_private_hash *futex_phash_new; diff --git a/kernel/fork.c b/kernel/fork.c index 824cc55d32ece..5e15e5b24f289 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2142,9 +2142,7 @@ static bool need_futex_hash_allocate_default(u64 clon= e_flags) { if ((clone_flags & (CLONE_THREAD | CLONE_VM)) !=3D (CLONE_THREAD | CLONE_= VM)) return false; - if (!thread_group_empty(current)) - return false; - return futex_hash_requires_allocation(); + return true; } =20 /* diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 4d9ee3bcaa6d0..6d375b9407c85 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -138,6 +138,7 @@ static struct futex_hash_bucket *futex_hash_private(uni= on futex_key *key, return &fhb[hash & hash_mask]; } =20 +#ifndef CONFIG_BASE_SMALL static void futex_rehash_current_users(struct futex_private_hash *old, struct futex_private_hash *new) { @@ -256,6 +257,14 @@ static struct futex_private_hash *futex_get_private_hb= (union futex_key *key) return futex_get_private_hash(); } =20 +#else + +static struct futex_private_hash *futex_get_private_hb(union futex_key *ke= y) +{ + return NULL; +} +#endif + /** * futex_hash - Return the hash bucket in the global or local hash * @key: Pointer to the futex key for which the hash is calculated @@ -279,6 +288,7 @@ struct futex_hash_bucket *__futex_hash(union futex_key = *key) return &futex_queues[hash & (futex_hashsize - 1)]; } =20 +#ifndef CONFIG_BASE_SMALL bool futex_put_private_hash(struct futex_private_hash *hb_p) { bool released; @@ -315,6 +325,7 @@ void futex_hash_put(struct futex_hash_bucket *hb) return; futex_put_private_hash(hb_p); } +#endif =20 /** * futex_setup_timer - set up the sleeping hrtimer. @@ -1366,12 +1377,15 @@ void futex_exit_release(struct task_struct *tsk) static void futex_hash_bucket_init(struct futex_hash_bucket *fhb, struct futex_private_hash *hb_p) { +#ifndef CONFIG_BASE_SMALL fhb->hb_p =3D hb_p; +#endif atomic_set(&fhb->waiters, 0); plist_head_init(&fhb->chain); spin_lock_init(&fhb->lock); } =20 +#ifndef CONFIG_BASE_SMALL void futex_hash_free(struct mm_struct *mm) { struct futex_private_hash *hb_p; @@ -1406,8 +1420,8 @@ static int futex_hash_allocate(unsigned int hash_slot= s) hash_slots =3D 16; if (hash_slots < 2) hash_slots =3D 2; - if (hash_slots > 131072) - hash_slots =3D 131072; + if (hash_slots > futex_hashsize) + hash_slots =3D futex_hashsize; if (!is_power_of_2(hash_slots)) hash_slots =3D rounddown_pow_of_two(hash_slots); =20 @@ -1449,7 +1463,31 @@ static int futex_hash_allocate(unsigned int hash_slo= ts) =20 int futex_hash_allocate_default(void) { - return futex_hash_allocate(0); + unsigned int threads, buckets, current_buckets =3D 0; + struct futex_private_hash *hb_p; + + if (!current->mm) + return 0; + + scoped_guard(rcu) { + threads =3D min_t(unsigned int, get_nr_threads(current), num_online_cpus= ()); + hb_p =3D rcu_dereference(current->mm->futex_phash); + if (hb_p) + current_buckets =3D hb_p->hash_mask + 1; + } + + /* + * The default allocation will remain within + * 16 <=3D threads * 4 <=3D global hash size + */ + buckets =3D roundup_pow_of_two(4 * threads); + buckets =3D max(buckets, 16); + buckets =3D min(buckets, futex_hashsize); + + if (current_buckets >=3D buckets) + return 0; + + return futex_hash_allocate(buckets); } =20 static int futex_hash_get_slots(void) @@ -1463,6 +1501,19 @@ static int futex_hash_get_slots(void) return 0; } =20 +#else + +static int futex_hash_allocate(unsigned int hash_slots) +{ + return -EINVAL; +} + +static int futex_hash_get_slots(void) +{ + return 0; +} +#endif + int futex_hash_prctl(unsigned long arg2, unsigned long arg3) { int ret; diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 973efcca2e01b..d1149739f3110 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -205,10 +205,18 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleep= er *timeout, int flags, u64 range_ns); =20 extern struct futex_hash_bucket *__futex_hash(union futex_key *key); +#ifdef CONFIG_BASE_SMALL +static inline void futex_hash_get(struct futex_hash_bucket *hb) { } +static inline void futex_hash_put(struct futex_hash_bucket *hb) { } +static inline struct futex_private_hash *futex_get_private_hash(void) { re= turn NULL; } +static inline bool futex_put_private_hash(struct futex_private_hash *hb_p)= { return false; } + +#else /* !CONFIG_BASE_SMALL */ extern void futex_hash_get(struct futex_hash_bucket *hb); extern void futex_hash_put(struct futex_hash_bucket *hb); extern struct futex_private_hash *futex_get_private_hash(void); extern bool futex_put_private_hash(struct futex_private_hash *hb_p); +#endif =20 DEFINE_CLASS(hb, struct futex_hash_bucket *, if (_T) futex_hash_put(_T), --=20 2.47.2 From nobody Wed Dec 17 18:00:05 2025 Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E829F1A5BAF for ; Tue, 25 Feb 2025 17:09:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=193.142.43.55 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503374; cv=none; b=ZgPqGYBF/xKt9+2lyb2jf9Hq+P85Pw1VxNEaiO2+GKdJwtOed2grAwH9ApJiIdkqjSfkbjFJNsVvqmmATUygG3Q8YngmlybuFojX/0l4e/ByVmrY3yj79p1O4+FdFSBAU0Ivsy1W8VrRUcSurYZJ0qsHXe1kCwDOQ42e6/MtZew= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1740503374; c=relaxed/simple; bh=bZh6gXEa9LZriRoawUsQKgPpDMmzOzeYTGlY71VV2K0=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Zk9D78K4dDDaJr8FR38YoF5AkkaLLD5IiVuc1Xjj+vgCB4j3Ry6siUJwHlFg4n6sxWsBosXtVKKIqDkLktBe7za+wpLhW7fvBimiIBkfMIukN+Ts7xihuhEneBiuNKdyYTH+WtVFqKhMg/U1PDtLseVlzm065L0MjebDos535Pg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de; spf=pass smtp.mailfrom=linutronix.de; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=0b374hwN; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b=Bmm2fdvp; arc=none smtp.client-ip=193.142.43.55 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=linutronix.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=linutronix.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="0b374hwN"; dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de header.b="Bmm2fdvp" From: Sebastian Andrzej Siewior DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020; t=1740503364; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=pUPvwagFivNcKsYclfPvVNwDsNwAco0iq8Mt4vpkuNQ=; b=0b374hwNB9OMUEQP0gIad2ewTO48TIjaTpjoJdq/x2s85HjeIbaLYTN6X7OMpULLoSCg+J Z1t+uDTE3QVPF3p0utc5vOsIUYp1SaFS6zuP6aSKOHZ9S48rENJr+Nwm30bbGroI2ZXNtq Bxw4D1ldb56VG2852iWRfq2agC4NIVVCUIwLI8BR32PODfVdT41thDuxVpXrfklOLhFLdd iAC4u/xY21CTtUtf+hI5wdexHOlTAqv1sPopLAAxBalFtr3in1QnbdSlkBxHqnxF9CNnfY 27ZYbein5IlpdJCwdRtka2XchZwLdlSDVdHuaxPkJUyan7MZtUKQtwMzCHj9pg== DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de; s=2020e; t=1740503364; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=pUPvwagFivNcKsYclfPvVNwDsNwAco0iq8Mt4vpkuNQ=; b=Bmm2fdvp7vZuLcqYsABzuL1UpDfNoB/JH553DImoiYEBKgxlDkqfoXcHP7ea8jODegaf8U 5hik9gJTxtpBL0Ag== To: linux-kernel@vger.kernel.org Cc: =?UTF-8?q?Andr=C3=A9=20Almeida?= , Darren Hart , Davidlohr Bueso , Ingo Molnar , Juri Lelli , Peter Zijlstra , Thomas Gleixner , Valentin Schneider , Waiman Long , Sebastian Andrzej Siewior Subject: [PATCH v9 11/11] futex: Use a hashmask instead of hashsize. Date: Tue, 25 Feb 2025 18:09:14 +0100 Message-ID: <20250225170914.289358-12-bigeasy@linutronix.de> In-Reply-To: <20250225170914.289358-1-bigeasy@linutronix.de> References: <20250225170914.289358-1-bigeasy@linutronix.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The global hash uses futex_hashsize to save the amount of the hash buckets that have been allocated during system boot. On each futex_hash() invocation this number is substracted by one to get the mask. This can be optimized by saving directly the mask avoiding the substraction on each futex_hash() invocation. Rename futex_hashsize to futex_hashmask and save the mask of the allocated hash map. Signed-off-by: Sebastian Andrzej Siewior --- kernel/futex/core.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 6d375b9407c85..283e6644c05f9 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -52,10 +52,10 @@ */ static struct { struct futex_hash_bucket *queues; - unsigned long hashsize; + unsigned long hashmask; } __futex_data __read_mostly __aligned(2*sizeof(long)); #define futex_queues (__futex_data.queues) -#define futex_hashsize (__futex_data.hashsize) +#define futex_hashmask (__futex_data.hashmask) =20 struct futex_private_hash { rcuref_t users; @@ -285,7 +285,7 @@ struct futex_hash_bucket *__futex_hash(union futex_key = *key) hash =3D jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, key->both.offset); - return &futex_queues[hash & (futex_hashsize - 1)]; + return &futex_queues[hash & futex_hashmask]; } =20 #ifndef CONFIG_BASE_SMALL @@ -1420,8 +1420,8 @@ static int futex_hash_allocate(unsigned int hash_slot= s) hash_slots =3D 16; if (hash_slots < 2) hash_slots =3D 2; - if (hash_slots > futex_hashsize) - hash_slots =3D futex_hashsize; + if (hash_slots > futex_hashmask + 1) + hash_slots =3D futex_hashmask + 1; if (!is_power_of_2(hash_slots)) hash_slots =3D rounddown_pow_of_two(hash_slots); =20 @@ -1482,7 +1482,7 @@ int futex_hash_allocate_default(void) */ buckets =3D roundup_pow_of_two(4 * threads); buckets =3D max(buckets, 16); - buckets =3D min(buckets, futex_hashsize); + buckets =3D min(buckets, futex_hashmask + 1); =20 if (current_buckets >=3D buckets) return 0; @@ -1536,24 +1536,25 @@ int futex_hash_prctl(unsigned long arg2, unsigned l= ong arg3) =20 static int __init futex_init(void) { + unsigned long i, hashsize; unsigned int futex_shift; - unsigned long i; =20 #ifdef CONFIG_BASE_SMALL - futex_hashsize =3D 16; + hashsize =3D 16; #else - futex_hashsize =3D roundup_pow_of_two(256 * num_possible_cpus()); + hashsize =3D roundup_pow_of_two(256 * num_possible_cpus()); #endif =20 futex_queues =3D alloc_large_system_hash("futex", sizeof(*futex_queues), - futex_hashsize, 0, 0, + hashsize, 0, 0, &futex_shift, NULL, - futex_hashsize, futex_hashsize); - futex_hashsize =3D 1UL << futex_shift; + hashsize, hashsize); + hashsize =3D 1UL << futex_shift; =20 - for (i =3D 0; i < futex_hashsize; i++) + for (i =3D 0; i < hashsize; i++) futex_hash_bucket_init(&futex_queues[i], 0); =20 + futex_hashmask =3D hashsize - 1; return 0; } core_initcall(futex_init); --=20 2.47.2