[PATCH v3] random: remove batched entropy locking

Jason A. Donenfeld posted 1 patch 4 years, 4 months ago
drivers/char/random.c | 55 ++++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 27 deletions(-)
[PATCH v3] random: remove batched entropy locking
Posted by Jason A. Donenfeld 4 years, 4 months ago
Rather than use spinlocks to protect batched entropy, we can instead
disable interrupts locally, since we're dealing with per-cpu data, and
manage resets with a basic generation counter. At the same time, we
can't quite do this on PREEMPT_RT, where we still want spinlocks-as-
mutexes semantics. So we use a local_lock_t, which provides the right
behavior for each. Because this is a per-cpu lock, that generation
counter is still doing the necessary CPU-to-CPU communication.

This should improve performance a bit. It will also fix the linked splat
that Jonathan received with a PROVE_RAW_LOCK_NESTING=y.

Note that Sebastian has pointed out a few other areas where
using spinlock_t in an IRQ context is potentially problematic for
PREEMPT_RT. This patch handles one of those cases, and we'll hopefully
have additional patches for other cases.

Suggested-by: Andy Lutomirski <luto@kernel.org>
Reported-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Tested-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Link: https://lore.kernel.org/lkml/YfMa0QgsjCVdRAvJ@latitude/
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/char/random.c | 55 ++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 455615ac169a..3e54b90a3ff8 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1732,13 +1732,16 @@ struct ctl_table random_table[] = {
 };
 #endif	/* CONFIG_SYSCTL */
 
+static atomic_t batch_generation = ATOMIC_INIT(0);
+
 struct batched_entropy {
 	union {
 		u64 entropy_u64[CHACHA_BLOCK_SIZE / sizeof(u64)];
 		u32 entropy_u32[CHACHA_BLOCK_SIZE / sizeof(u32)];
 	};
+	local_lock_t lock;
 	unsigned int position;
-	spinlock_t batch_lock;
+	int generation;
 };
 
 /*
@@ -1750,7 +1753,7 @@ struct batched_entropy {
  * point prior.
  */
 static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = {
-	.batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock),
+	.lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock)
 };
 
 u64 get_random_u64(void)
@@ -1759,41 +1762,54 @@ u64 get_random_u64(void)
 	unsigned long flags;
 	struct batched_entropy *batch;
 	static void *previous;
+	int next_gen;
 
 	warn_unseeded_randomness(&previous);
 
-	batch = raw_cpu_ptr(&batched_entropy_u64);
-	spin_lock_irqsave(&batch->batch_lock, flags);
-	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
+	batch = this_cpu_ptr(&batched_entropy_u64);
+	local_lock_irqsave(&batch->lock, flags);
+
+	next_gen = atomic_read(&batch_generation);
+	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0 ||
+	    next_gen != batch->generation) {
 		extract_crng((u8 *)batch->entropy_u64);
 		batch->position = 0;
+		batch->generation = next_gen;
 	}
+
 	ret = batch->entropy_u64[batch->position++];
-	spin_unlock_irqrestore(&batch->batch_lock, flags);
+	local_unlock_irqrestore(&batch->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_u64);
 
 static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = {
-	.batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock),
+	.lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock)
 };
+
 u32 get_random_u32(void)
 {
 	u32 ret;
 	unsigned long flags;
 	struct batched_entropy *batch;
 	static void *previous;
+	int next_gen;
 
 	warn_unseeded_randomness(&previous);
 
-	batch = raw_cpu_ptr(&batched_entropy_u32);
-	spin_lock_irqsave(&batch->batch_lock, flags);
-	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
+	batch = this_cpu_ptr(&batched_entropy_u32);
+	local_lock_irqsave(&batch->lock, flags);
+
+	next_gen = atomic_read(&batch_generation);
+	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0 ||
+	    next_gen != batch->generation) {
 		extract_crng((u8 *)batch->entropy_u32);
 		batch->position = 0;
+		batch->generation = next_gen;
 	}
+
 	ret = batch->entropy_u32[batch->position++];
-	spin_unlock_irqrestore(&batch->batch_lock, flags);
+	local_unlock_irqrestore(&batch->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(get_random_u32);
@@ -1804,22 +1820,7 @@ EXPORT_SYMBOL(get_random_u32);
  * next usage. */
 static void invalidate_batched_entropy(void)
 {
-	int cpu;
-	unsigned long flags;
-
-	for_each_possible_cpu(cpu) {
-		struct batched_entropy *batched_entropy;
-
-		batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu);
-		spin_lock_irqsave(&batched_entropy->batch_lock, flags);
-		batched_entropy->position = 0;
-		spin_unlock(&batched_entropy->batch_lock);
-
-		batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu);
-		spin_lock(&batched_entropy->batch_lock);
-		batched_entropy->position = 0;
-		spin_unlock_irqrestore(&batched_entropy->batch_lock, flags);
-	}
+	atomic_inc(&batch_generation);
 }
 
 /**
-- 
2.35.0

Re: [PATCH v3] random: remove batched entropy locking
Posted by Sebastian Andrzej Siewior 4 years, 4 months ago
On 2022-02-04 16:51:42 [+0100], Jason A. Donenfeld wrote:
> index 455615ac169a..3e54b90a3ff8 100644
> --- a/drivers/char/random.c
> +++ b/drivers/char/random.c
> @@ -1759,41 +1762,54 @@ u64 get_random_u64(void)
>  	unsigned long flags;
>  	struct batched_entropy *batch;
>  	static void *previous;
> +	int next_gen;
>  
>  	warn_unseeded_randomness(&previous);
>  
> -	batch = raw_cpu_ptr(&batched_entropy_u64);
> -	spin_lock_irqsave(&batch->batch_lock, flags);
> -	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
> +	batch = this_cpu_ptr(&batched_entropy_u64);
> +	local_lock_irqsave(&batch->lock, flags);

Does this compile and work? From the looks of it, this should be:

	local_lock_irqsave(&batched_entropy_u64.lock, flags);
	batch = this_cpu_ptr(&batched_entropy_u64);

and we could do s/this_cpu_ptr/raw_cpu_ptr/

Sebastian
Re: [PATCH v3] random: remove batched entropy locking
Posted by Jason A. Donenfeld 4 years, 4 months ago
Hi Sebastian,

On Fri, Feb 4, 2022 at 4:57 PM Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:
> Does this compile and work?

It does, yes.

> From the looks of it, this should be:
>
>         local_lock_irqsave(&batched_entropy_u64.lock, flags);
>         batch = this_cpu_ptr(&batched_entropy_u64);

I wasn't aware you could reference percpu things statically like that
without going through the ptr helper. Anyway, I'll switch to yours, as
taking that lock prior seems better anyway.

>
> and we could do s/this_cpu_ptr/raw_cpu_ptr/

Ack.

Jason
Re: [PATCH v3] random: remove batched entropy locking
Posted by Jann Horn 4 years, 4 months ago
On Fri, Feb 4, 2022 at 4:57 PM Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:
> On 2022-02-04 16:51:42 [+0100], Jason A. Donenfeld wrote:
> > index 455615ac169a..3e54b90a3ff8 100644
> > --- a/drivers/char/random.c
> > +++ b/drivers/char/random.c
> > @@ -1759,41 +1762,54 @@ u64 get_random_u64(void)
> >       unsigned long flags;
> >       struct batched_entropy *batch;
> >       static void *previous;
> > +     int next_gen;
> >
> >       warn_unseeded_randomness(&previous);
> >
> > -     batch = raw_cpu_ptr(&batched_entropy_u64);
> > -     spin_lock_irqsave(&batch->batch_lock, flags);
> > -     if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
> > +     batch = this_cpu_ptr(&batched_entropy_u64);
> > +     local_lock_irqsave(&batch->lock, flags);
>
> Does this compile and work? From the looks of it, this should be:
>
>         local_lock_irqsave(&batched_entropy_u64.lock, flags);
>         batch = this_cpu_ptr(&batched_entropy_u64);
>
> and we could do s/this_cpu_ptr/raw_cpu_ptr/

Why raw_cpu_ptr? include/linux/percpu-defs.h says about raw_*() operations:

 * Operations for contexts where we do not want to do any checks for
 * preemptions.  Unless strictly necessary, always use [__]this_cpu_*()
 * instead.

So when I see a raw_*() percpu thing, I read it as "it is expected
that we can migrate after this point (or we're in some really weird
context where the normal context check doesn't work)". Is that
incorrect?
Re: [PATCH v3] random: remove batched entropy locking
Posted by Jason A. Donenfeld 4 years, 4 months ago
On Wed, Feb 16, 2022 at 9:01 PM Jann Horn <jannh@google.com> wrote:
>
> On Fri, Feb 4, 2022 at 4:57 PM Sebastian Andrzej Siewior
> <bigeasy@linutronix.de> wrote:
> > On 2022-02-04 16:51:42 [+0100], Jason A. Donenfeld wrote:
> > > index 455615ac169a..3e54b90a3ff8 100644
> > > --- a/drivers/char/random.c
> > > +++ b/drivers/char/random.c
> > > @@ -1759,41 +1762,54 @@ u64 get_random_u64(void)
> > >       unsigned long flags;
> > >       struct batched_entropy *batch;
> > >       static void *previous;
> > > +     int next_gen;
> > >
> > >       warn_unseeded_randomness(&previous);
> > >
> > > -     batch = raw_cpu_ptr(&batched_entropy_u64);
> > > -     spin_lock_irqsave(&batch->batch_lock, flags);
> > > -     if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
> > > +     batch = this_cpu_ptr(&batched_entropy_u64);
> > > +     local_lock_irqsave(&batch->lock, flags);
> >
> > Does this compile and work? From the looks of it, this should be:
> >
> >         local_lock_irqsave(&batched_entropy_u64.lock, flags);
> >         batch = this_cpu_ptr(&batched_entropy_u64);
> >
> > and we could do s/this_cpu_ptr/raw_cpu_ptr/
>
> Why raw_cpu_ptr? include/linux/percpu-defs.h says about raw_*() operations:
>
>  * Operations for contexts where we do not want to do any checks for
>  * preemptions.  Unless strictly necessary, always use [__]this_cpu_*()
>  * instead.
>
> So when I see a raw_*() percpu thing, I read it as "it is expected
> that we can migrate after this point (or we're in some really weird
> context where the normal context check doesn't work)". Is that
> incorrect?

If it says "contexts where we do not want to do any checks for
preemptions", then that would apply here I would think? We're taking a
local lock, which means afterwards there are no preemptions. For
context, the code that got committed after Sebastian's final review
is:

        local_lock_irqsave(&batched_entropy_u32.lock, flags);
        batch = raw_cpu_ptr(&batched_entropy_u32);

However, I think most other code uses this_cpu_ptr() instead? So not sure.

Jason
Re: [PATCH v3] random: remove batched entropy locking
Posted by Sebastian Andrzej Siewior 4 years, 4 months ago
On 2022-02-16 21:58:14 [+0100], Jason A. Donenfeld wrote:
> > Why raw_cpu_ptr? include/linux/percpu-defs.h says about raw_*() operations:
> >
> >  * Operations for contexts where we do not want to do any checks for
> >  * preemptions.  Unless strictly necessary, always use [__]this_cpu_*()
> >  * instead.
> >
> > So when I see a raw_*() percpu thing, I read it as "it is expected
> > that we can migrate after this point (or we're in some really weird
> > context where the normal context check doesn't work)". Is that
> > incorrect?
> 
> If it says "contexts where we do not want to do any checks for
> preemptions", then that would apply here I would think? We're taking a
> local lock, which means afterwards there are no preemptions. For
> context, the code that got committed after Sebastian's final review
> is:
> 
>         local_lock_irqsave(&batched_entropy_u32.lock, flags);
>         batch = raw_cpu_ptr(&batched_entropy_u32);
> 
> However, I think most other code uses this_cpu_ptr() instead? So not sure.

It depends what you are looking for.
raw_cpu_ptr(&batched_entropy_u32) give you the pointer to
&batched_entropy_u32 of _this_ CPU - the CPU you are currently running.
this_cpu_ptr() does the same except that it has smp_processor_id() in
it. smp_processor_id() will yell at you (given you enabled
CONFIG_DEBUG_PREEMPT) if the code can migrate to another CPU.
So it will yell at you unless:
- you disable preemption / migration so code can't migrate
- you run on a per-CPU thread i.e. a thread which is bound to a single
  CPU and therefore can't migrate to another.

The local_lock_irqsave() acquires a lock / disables interrupt and
therefore can't migrate. So I suggested to use raw_cpu_ptr() as an
optimisation in the debug-case.

If you disable preemption before accessing a per-CPU variable with
this_cpu_ptr() then you _need_ to ensure that nobody is accessing the
variable from in-IRQ context. Nobody will yell at you. But if you use a
local-lock then you get lockdep annotation and lockdep will complain if
you use that variable always in process context and sometimes in-IRQ.

> Jason

Sebastian