The futex hashmap is system wide and shared by random tasks. Each slot
is hashed based on its address and VMA. Due to randomized VMAs (and
memory allocations) the same logical lock (pointer) can end up in a
different hash bucket on each invocation of the application. This in
turn means that different applications may share a hash bucket on the
first invocation but not on the second an it is not always clear which
applications will be involved. This can result in high latency's to
acquire the futex_hash_bucket::lock especially if the lock owner is
limited to a CPU and not be effectively PI boosted.
Introduce a task local hash map. The hashmap can be allocated via
prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, 0)
The `0' argument allocates a default number of 16 slots, a higher number
can be specified if desired. The current upper limit is 131072.
The allocated hashmap is used by all threads within a process.
A thread can check if the private map has been allocated via
prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
Which return the current number of slots.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/futex.h | 20 ++++++++
include/linux/mm_types.h | 6 ++-
include/uapi/linux/prctl.h | 5 ++
kernel/fork.c | 2 +
kernel/futex/core.c | 101 +++++++++++++++++++++++++++++++++++--
kernel/sys.c | 4 ++
6 files changed, 133 insertions(+), 5 deletions(-)
diff --git a/include/linux/futex.h b/include/linux/futex.h
index b70df27d7e85c..943828db52234 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -77,6 +77,15 @@ void futex_exec_release(struct task_struct *tsk);
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3);
+int futex_hash_allocate_default(void);
+void futex_hash_free(struct mm_struct *mm);
+
+static inline void futex_mm_init(struct mm_struct *mm)
+{
+ mm->futex_hash_bucket = NULL;
+}
+
#else
static inline void futex_init_task(struct task_struct *tsk) { }
static inline void futex_exit_recursive(struct task_struct *tsk) { }
@@ -88,6 +97,17 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
{
return -EINVAL;
}
+static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3)
+{
+ return -EINVAL;
+}
+static inline int futex_hash_allocate_default(void)
+{
+ return 0;
+}
+static inline void futex_hash_free(struct mm_struct *mm) { }
+static inline void futex_mm_init(struct mm_struct *mm) { }
+
#endif
#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6b27db7f94963..c20f2310d78ca 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -30,6 +30,7 @@
#define INIT_PASID 0
struct address_space;
+struct futex_hash_bucket;
struct mem_cgroup;
/*
@@ -936,7 +937,10 @@ struct mm_struct {
*/
seqcount_t mm_lock_seq;
#endif
-
+#ifdef CONFIG_FUTEX
+ unsigned int futex_hash_mask;
+ struct futex_hash_bucket *futex_hash_bucket;
+#endif
unsigned long hiwater_rss; /* High-watermark of RSS usage */
unsigned long hiwater_vm; /* High-water virtual memory usage */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 5c6080680cb27..55b843644c51a 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -353,4 +353,9 @@ struct prctl_mm_map {
*/
#define PR_LOCK_SHADOW_STACK_STATUS 76
+/* FUTEX hash management */
+#define PR_FUTEX_HASH 77
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define PR_FUTEX_HASH_GET_SLOTS 2
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 735405a9c5f32..80ac156adebbf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1287,6 +1287,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
RCU_INIT_POINTER(mm->exe_file, NULL);
mmu_notifier_subscriptions_init(mm);
init_tlb_flush_pending(mm);
+ futex_mm_init(mm);
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS)
mm->pmd_huge_pte = NULL;
#endif
@@ -1364,6 +1365,7 @@ static inline void __mmput(struct mm_struct *mm)
if (mm->binfmt)
module_put(mm->binfmt->module);
lru_gen_del_mm(mm);
+ futex_hash_free(mm);
mmdrop(mm);
}
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index d1d3c7b358b23..26328d8072fee 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -39,6 +39,7 @@
#include <linux/memblock.h>
#include <linux/fault-inject.h>
#include <linux/slab.h>
+#include <linux/prctl.h>
#include "futex.h"
#include "../locking/rtmutex_common.h"
@@ -107,18 +108,40 @@ late_initcall(fail_futex_debugfs);
#endif /* CONFIG_FAIL_FUTEX */
+static inline bool futex_key_is_private(union futex_key *key)
+{
+ /*
+ * Relies on get_futex_key() to set either bit for shared
+ * futexes -- see comment with union futex_key.
+ */
+ return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED));
+}
+
/**
- * futex_hash - Return the hash bucket in the global hash
+ * futex_hash - Return the hash bucket in the global or local hash
* @key: Pointer to the futex key for which the hash is calculated
*
* We hash on the keys returned from get_futex_key (see below) and return the
- * corresponding hash bucket in the global hash.
+ * corresponding hash bucket in the global hash. If the FUTEX is private and
+ * a local hash table is privated then this one is used.
*/
struct futex_hash_bucket *futex_hash(union futex_key *key)
{
- u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
- key->both.offset);
+ struct futex_hash_bucket *fhb;
+ u32 hash;
+ fhb = current->mm->futex_hash_bucket;
+ if (fhb && futex_key_is_private(key)) {
+ u32 hash_mask = current->mm->futex_hash_mask;
+
+ hash = jhash2((u32 *)key,
+ offsetof(typeof(*key), both.offset) / 4,
+ key->both.offset);
+ return &fhb[hash & hash_mask];
+ }
+ hash = jhash2((u32 *)key,
+ offsetof(typeof(*key), both.offset) / 4,
+ key->both.offset);
return &futex_queues[hash & (futex_hashsize - 1)];
}
@@ -1131,6 +1154,76 @@ static void futex_hash_bucket_init(struct futex_hash_bucket *fhb)
spin_lock_init(&fhb->lock);
}
+void futex_hash_free(struct mm_struct *mm)
+{
+ kvfree(mm->futex_hash_bucket);
+}
+
+static int futex_hash_allocate(unsigned int hash_slots)
+{
+ struct futex_hash_bucket *fhb;
+ int i;
+
+ if (current->mm->futex_hash_bucket)
+ return -EALREADY;
+
+ if (!thread_group_leader(current))
+ return -EINVAL;
+
+ if (hash_slots == 0)
+ hash_slots = 16;
+ if (hash_slots < 2)
+ hash_slots = 2;
+ if (hash_slots > 131072)
+ hash_slots = 131072;
+ if (!is_power_of_2(hash_slots))
+ hash_slots = rounddown_pow_of_two(hash_slots);
+
+ fhb = kvmalloc_array(hash_slots, sizeof(struct futex_hash_bucket), GFP_KERNEL_ACCOUNT);
+ if (!fhb)
+ return -ENOMEM;
+
+ current->mm->futex_hash_mask = hash_slots - 1;
+
+ for (i = 0; i < hash_slots; i++)
+ futex_hash_bucket_init(&fhb[i]);
+
+ current->mm->futex_hash_bucket = fhb;
+ return 0;
+}
+
+int futex_hash_allocate_default(void)
+{
+ return futex_hash_allocate(0);
+}
+
+static int futex_hash_get_slots(void)
+{
+ if (current->mm->futex_hash_bucket)
+ return current->mm->futex_hash_mask + 1;
+ return 0;
+}
+
+int futex_hash_prctl(unsigned long arg2, unsigned long arg3)
+{
+ int ret;
+
+ switch (arg2) {
+ case PR_FUTEX_HASH_SET_SLOTS:
+ ret = futex_hash_allocate(arg3);
+ break;
+
+ case PR_FUTEX_HASH_GET_SLOTS:
+ ret = futex_hash_get_slots();
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
static int __init futex_init(void)
{
unsigned int futex_shift;
diff --git a/kernel/sys.c b/kernel/sys.c
index cb366ff8703af..e509ad9795103 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -52,6 +52,7 @@
#include <linux/user_namespace.h>
#include <linux/time_namespace.h>
#include <linux/binfmts.h>
+#include <linux/futex.h>
#include <linux/sched.h>
#include <linux/sched/autogroup.h>
@@ -2811,6 +2812,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
return -EINVAL;
error = arch_lock_shadow_stack_status(me, arg2);
break;
+ case PR_FUTEX_HASH:
+ error = futex_hash_prctl(arg2, arg3);
+ break;
default:
trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5);
error = -EINVAL;
--
2.47.2
On Mon, Feb 03, 2025 at 02:59:23PM +0100, Sebastian Andrzej Siewior wrote:
> struct futex_hash_bucket *futex_hash(union futex_key *key)
> {
> - u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
> - key->both.offset);
> + struct futex_hash_bucket *fhb;
> + u32 hash;
>
> + fhb = current->mm->futex_hash_bucket;
> + if (fhb && futex_key_is_private(key)) {
> + u32 hash_mask = current->mm->futex_hash_mask;
> +
> + hash = jhash2((u32 *)key,
> + offsetof(typeof(*key), both.offset) / 4,
> + key->both.offset);
> + return &fhb[hash & hash_mask];
> + }
> + hash = jhash2((u32 *)key,
> + offsetof(typeof(*key), both.offset) / 4,
> + key->both.offset);
> return &futex_queues[hash & (futex_hashsize - 1)];
> }
Having read this later patch, I would still prefer this to look like:
struct futex_hash_bucker *futex_hash(union futex_key *key)
{
struct futex_hash_bucket *fhb = current->mm->futex_hash_bucket;
u32 hash;
if (futex_key_is_private) {
// shorter jhash invocation;
} else {
hash = jhash2((u32 *)key,
offsetof(typeof(*key), both.offset) / 4,
key->both.offset);
}
if (fhb)
return fhb[hash & current->mm->futex_hash_mask];
return &futex_queues[hash & (futex_hashsize - 1)];
}
Because the shorter hash invocation only cares about being private, not
about having fhb.
Notably, I'm also still thikning about all that pending FUTEX2_NUMA
stuff.
On 2025-02-03 15:41:01 [+0100], Peter Zijlstra wrote: > Notably, I'm also still thikning about all that pending FUTEX2_NUMA > stuff. With local-hash there is still room for FUTEX2_NUMA? Sebastian
On Mon, Feb 03, 2025 at 04:52:51PM +0100, Sebastian Andrzej Siewior wrote: > On 2025-02-03 15:41:01 [+0100], Peter Zijlstra wrote: > > Notably, I'm also still thikning about all that pending FUTEX2_NUMA > > stuff. > > With local-hash there is still room for FUTEX2_NUMA? Yes, single process can span multiple nodes, and then you still get the cross node contention issues.
On Tue, Feb 04 2025 at 09:41, Peter Zijlstra wrote:
> On Mon, Feb 03, 2025 at 04:52:51PM +0100, Sebastian Andrzej Siewior wrote:
>> On 2025-02-03 15:41:01 [+0100], Peter Zijlstra wrote:
>> > Notably, I'm also still thikning about all that pending FUTEX2_NUMA
>> > stuff.
>>
>> With local-hash there is still room for FUTEX2_NUMA?
>
> Yes, single process can span multiple nodes, and then you still get the
> cross node contention issues.
This is strictly about process private futexes, while FUTEX2_NUMA covers
both shared and private futexes and helps to avoid the hash collision
and resulting contention problems which are caused by the current global
hash.
With the private futex seperation you obviously get the cross node
memory access problems, but not the general contention issues which
FUTEX2_NUMA tries to address.
Thanks,
tglx
On Mon, Feb 03, 2025 at 03:41:02PM +0100, Peter Zijlstra wrote: > Because the shorter hash invocation only cares about being private, not > about having fhb. While driving to pick up kid from high school, I realized I was full of crap. Short hash is very much specific to local thing. Carry on.
On Mon, Feb 03, 2025 at 02:59:23PM +0100, Sebastian Andrzej Siewior wrote:
> +static int futex_hash_allocate(unsigned int hash_slots)
> +{
> + struct futex_hash_bucket *fhb;
> + int i;
> +
> + if (current->mm->futex_hash_bucket)
> + return -EALREADY;
> +
> + if (!thread_group_leader(current))
> + return -EINVAL;
> +
> + if (hash_slots == 0)
> + hash_slots = 16;
> + if (hash_slots < 2)
> + hash_slots = 2;
> + if (hash_slots > 131072)
> + hash_slots = 131072;
> + if (!is_power_of_2(hash_slots))
> + hash_slots = rounddown_pow_of_two(hash_slots);
Why not require the argument is 2^n and fail otherwise?
On Mon, Feb 03, 2025 at 02:59:23PM +0100, Sebastian Andrzej Siewior wrote: > The futex hashmap is system wide and shared by random tasks. Each slot > is hashed based on its address and VMA. Due to randomized VMAs (and > memory allocations) the same logical lock (pointer) can end up in a > different hash bucket on each invocation of the application. This in > turn means that different applications may share a hash bucket on the > first invocation but not on the second an it is not always clear which > applications will be involved. This can result in high latency's to > acquire the futex_hash_bucket::lock especially if the lock owner is > limited to a CPU and not be effectively PI boosted. > > Introduce a task local hash map. The hashmap can be allocated via > prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, 0) > > The `0' argument allocates a default number of 16 slots, a higher number > can be specified if desired. The current upper limit is 131072. Hmm, I would expect 0 to disable the local thing. Now, I realize this is somewhat tricky, since there might be futexes inside it. But mapping 0 to some default value seems.. well, odd.
On 2025-02-03 15:27:43 [+0100], Peter Zijlstra wrote: > On Mon, Feb 03, 2025 at 02:59:23PM +0100, Sebastian Andrzej Siewior wrote: > > The futex hashmap is system wide and shared by random tasks. Each slot > > is hashed based on its address and VMA. Due to randomized VMAs (and > > memory allocations) the same logical lock (pointer) can end up in a > > different hash bucket on each invocation of the application. This in > > turn means that different applications may share a hash bucket on the > > first invocation but not on the second an it is not always clear which > > applications will be involved. This can result in high latency's to > > acquire the futex_hash_bucket::lock especially if the lock owner is > > limited to a CPU and not be effectively PI boosted. > > > > Introduce a task local hash map. The hashmap can be allocated via > > prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, 0) > > > > The `0' argument allocates a default number of 16 slots, a higher number > > can be specified if desired. The current upper limit is 131072. > > Hmm, I would expect 0 to disable the local thing. > > Now, I realize this is somewhat tricky, since there might be futexes > inside it. But mapping 0 to some default value seems.. well, odd. Looking at this from the top of the series: - The default value (currently 16) supposed to be something sane so the user does not have to worry about it. We could increase it if needed. - Currently each thread will alter the limit if needed based on the formula. So even if you set it to 2 or 16, once you have 16 threads you will have 64 buckets. - The number of buckets can only be increased, not shrunk. There is no technical requirement for it other than "avoid a race where you use a lower number if a lot of threads a fired at once". - It is not expected to go back to the global hash. - The upper limit is the same limit as for the global hash. All the things I mentioned are open for debate: - We could force the user to specify a power-of-number for the buckets (due to current implementation) and not round. - We could allow to increase and shrink the number of buckets. Allowing it to shrink by accident if many threads are fired at once. - We could say the user knows best and disable the heuristic once a size has been requested. - We could use 0 to disable the local hash and use the global one instead if this is a requirement. Doing this before a thread is created is preferred. Later might get tricky. Sebastian
On Mon, Feb 03, 2025 at 04:51:14PM +0100, Sebastian Andrzej Siewior wrote: > All the things I mentioned are open for debate: So how about we add sysctl_futex_private; when set to 0 it will disable the private hash for every new task, unless task explicitly calls PR_FUTEX_HASH_SET. When non-zero, it is the base for the auto-scaling. (then BASE_SMALL would have this default to 0, otherwise 4 as per this patch set) I would let PR_FUTEX_HASH_SET override and disable auto-scaling. There is nothing more annoying than a computer that thinks it knows better. User asked for N, user gets N etc. If user sets 0, fall back to global hash. Anyway, none of this solves anything when a process has both an active RT part and an active !RT part (which isn't uncommon AFAICT). Then the RT bits will still get interference from the !RT bits. Do we want to complicate things and consider that?
On 2025-02-04 11:34:47 [+0100], Peter Zijlstra wrote: > On Mon, Feb 03, 2025 at 04:51:14PM +0100, Sebastian Andrzej Siewior wrote: > > > All the things I mentioned are open for debate: > > So how about we add sysctl_futex_private; when set to 0 it will disable > the private hash for every new task, unless task explicitly calls > PR_FUTEX_HASH_SET. When non-zero, it is the base for the auto-scaling. > > (then BASE_SMALL would have this default to 0, otherwise 4 as per this > patch set) > > I would let PR_FUTEX_HASH_SET override and disable auto-scaling. There > is nothing more annoying than a computer that thinks it knows better. > User asked for N, user gets N etc. > > If user sets 0, fall back to global hash. Looks sane. I started with PR_FUTEX_HASH_SET to avoid the rehash at runtime. Now that I have this working PR_FUTEX_HASH_SET does not look needed. But okay. Having a default enable or disable with the sysctl and a PR_FUTEX_HASH_SET which sets a requested value and disabling auto-scale looks okay. Assuming the user knows best. Even that it is hard to get right given random pointer and so on. > Anyway, none of this solves anything when a process has both an active > RT part and an active !RT part (which isn't uncommon AFAICT). > > Then the RT bits will still get interference from the !RT bits. Do we > want to complicate things and consider that? I don't think so. The active and inactive are common but it is still the same process so you can expect it. The ugly part is when it is an entirely different task and it is random which one it is. Sebastian
Hi, On 05/02/25 09:39, Sebastian Andrzej Siewior wrote: > On 2025-02-04 11:34:47 [+0100], Peter Zijlstra wrote: ... > > Anyway, none of this solves anything when a process has both an active > > RT part and an active !RT part (which isn't uncommon AFAICT). > > > > Then the RT bits will still get interference from the !RT bits. Do we > > want to complicate things and consider that? > > I don't think so. The active and inactive are common but it is still the > same process so you can expect it. The ugly part is when it is an > entirely different task and it is random which one it is. Not entirely sure we are thinking about the same situation, but it looks like we have cases of RT tasks that are affected by the underlying issue this set is about because they make use of libraries. So, in this case we have a cross-process (RT/!RT) situation that I am not sure we can address sanely. What do you think? Thanks, Juri
On 2025-02-07 10:41:02 [+0100], Juri Lelli wrote: > Hi, > > On 05/02/25 09:39, Sebastian Andrzej Siewior wrote: > > On 2025-02-04 11:34:47 [+0100], Peter Zijlstra wrote: > > ... > > > > Anyway, none of this solves anything when a process has both an active > > > RT part and an active !RT part (which isn't uncommon AFAICT). > > > > > > Then the RT bits will still get interference from the !RT bits. Do we > > > want to complicate things and consider that? > > > > I don't think so. The active and inactive are common but it is still the > > same process so you can expect it. The ugly part is when it is an > > entirely different task and it is random which one it is. > > Not entirely sure we are thinking about the same situation, but it looks > like we have cases of RT tasks that are affected by the underlying issue > this set is about because they make use of libraries. So, in this case > we have a cross-process (RT/!RT) situation that I am not sure we can > address sanely. What do you think? I wouldn't advice to use "unknown" code in a RT application and even threads. Audit libs before using them and not just collect them. A lock without PI in your RT thread is not good. A lot of locks, not knowing the "locked" time, also not good. Things that work most of the time due to the fastpath and only break from time to time. Also, a thread does fork() once during start up and things may continue to be good but may catch up eventually. > Thanks, > Juri Sebastian
On Fri, Feb 07, 2025 at 12:00:50PM +0100, Sebastian Andrzej Siewior wrote: > On 2025-02-07 10:41:02 [+0100], Juri Lelli wrote: > > Hi, > > > > On 05/02/25 09:39, Sebastian Andrzej Siewior wrote: > > > On 2025-02-04 11:34:47 [+0100], Peter Zijlstra wrote: > > > > ... > > > > > > Anyway, none of this solves anything when a process has both an active > > > > RT part and an active !RT part (which isn't uncommon AFAICT). > > > > > > > > Then the RT bits will still get interference from the !RT bits. Do we > > > > want to complicate things and consider that? > > > > > > I don't think so. The active and inactive are common but it is still the > > > same process so you can expect it. The ugly part is when it is an > > > entirely different task and it is random which one it is. > > > > Not entirely sure we are thinking about the same situation, but it looks > > like we have cases of RT tasks that are affected by the underlying issue > > this set is about because they make use of libraries. So, in this case > > we have a cross-process (RT/!RT) situation that I am not sure we can > > address sanely. What do you think? > > I wouldn't advice to use "unknown" code in a RT application and even > threads. Audit libs before using them and not just collect them. > A lock without PI in your RT thread is not good. A lot of locks, not > knowing the "locked" time, also not good. Things that work most of the > time due to the fastpath and only break from time to time. > Also, a thread does fork() once during start up and things may continue > to be good but may catch up eventually. Anyway, supposing people want to tinker, it should be possible to split the local hash based on if the address is mlocked or not. This gives some obvious issues where people do futex_wait(), mlock() and then expect futex_wake() to still work, but that should be rare. You typically mlock before you start using the memory. As always, mlockall is bad, do not use, like ever. But especially in mixed mode RT programs.
On 07/02/25 12:06, Peter Zijlstra wrote: > On Fri, Feb 07, 2025 at 12:00:50PM +0100, Sebastian Andrzej Siewior wrote: > > On 2025-02-07 10:41:02 [+0100], Juri Lelli wrote: > > > Hi, > > > > > > On 05/02/25 09:39, Sebastian Andrzej Siewior wrote: > > > > On 2025-02-04 11:34:47 [+0100], Peter Zijlstra wrote: > > > > > > ... > > > > > > > > Anyway, none of this solves anything when a process has both an active > > > > > RT part and an active !RT part (which isn't uncommon AFAICT). > > > > > > > > > > Then the RT bits will still get interference from the !RT bits. Do we > > > > > want to complicate things and consider that? > > > > > > > > I don't think so. The active and inactive are common but it is still the > > > > same process so you can expect it. The ugly part is when it is an > > > > entirely different task and it is random which one it is. > > > > > > Not entirely sure we are thinking about the same situation, but it looks > > > like we have cases of RT tasks that are affected by the underlying issue > > > this set is about because they make use of libraries. So, in this case > > > we have a cross-process (RT/!RT) situation that I am not sure we can > > > address sanely. What do you think? > > > > I wouldn't advice to use "unknown" code in a RT application and even > > threads. Audit libs before using them and not just collect them. Indeed, that is the message. But, you know, existing/legacy applications or applications "repurposed" for RT. :/ > > A lock without PI in your RT thread is not good. A lot of locks, not > > knowing the "locked" time, also not good. Things that work most of the > > time due to the fastpath and only break from time to time. > > Also, a thread does fork() once during start up and things may continue > > to be good but may catch up eventually. > > Anyway, supposing people want to tinker, it should be possible to split > the local hash based on if the address is mlocked or not. So, fully agree that we don't want to implement changes just to target current broken usages, but wondered if there are saner situations (Peter's point above?) where the current idea might be further extended to. > This gives some obvious issues where people do futex_wait(), mlock() and > then expect futex_wake() to still work, but that should be rare. You > typically mlock before you start using the memory. > > As always, mlockall is bad, do not use, like ever. But especially in > mixed mode RT programs. >
© 2016 - 2026 Red Hat, Inc.