arch/riscv/kernel/jump_label.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
From: Guo Ren <guoren@linux.alibaba.com>
When CONFIG_DEBUG_RT_MUTEXES=y, mutex_lock->rt_mutex_try_acquire
would change from rt_mutex_cmpxchg_acquire to
rt_mutex_slowtrylock():
raw_spin_lock_irqsave(&lock->wait_lock, flags);
ret = __rt_mutex_slowtrylock(lock);
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
Because queued_spin_#ops to ticket_#ops is changed one by one by
jump_label, raw_spin_lock/unlock would cause a deadlock during the
changing.
That means in arch/riscv/kernel/jump_label.c:
1.
arch_jump_label_transform_queue() ->
mutex_lock(&text_mutex); +-> raw_spin_lock -> queued_spin_lock
|-> raw_spin_unlock -> queued_spin_unlock
patch_insn_write -> change the raw_spin_lock to ticket_lock
mutex_unlock(&text_mutex);
...
2. /* Dirty the lock value */
arch_jump_label_transform_queue() ->
mutex_lock(&text_mutex); +-> raw_spin_lock -> *ticket_lock*
|-> raw_spin_unlock -> *queued_spin_unlock*
/* BUG: ticket_lock with queued_spin_unlock */
patch_insn_write -> change the raw_spin_unlock to ticket_unlock
mutex_unlock(&text_mutex);
...
3. /* Dead lock */
arch_jump_label_transform_queue() ->
mutex_lock(&text_mutex); +-> raw_spin_lock -> ticket_lock /* deadlock! */
|-> raw_spin_unlock -> ticket_unlock
patch_insn_write -> change other raw_spin_#op -> ticket_#op
mutex_unlock(&text_mutex);
So, the solution is to disable mutex usage of
arch_jump_label_transform_queue() during early_boot_irqs_disabled, just
like we have done for stop_machine.
Reported-by: Conor Dooley <conor@kernel.org>
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Fixes: ab83647fadae ("riscv: Add qspinlock support")
Link: https://lore.kernel.org/linux-riscv/CAJF2gTQwYTGinBmCSgVUoPv0_q4EPt_+WiyfUA1HViAKgUzxAg@mail.gmail.com/T/#mf488e6347817fca03bb93a7d34df33d8615b3775
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/kernel/jump_label.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c
index 6eee6f736f68..654ed159c830 100644
--- a/arch/riscv/kernel/jump_label.c
+++ b/arch/riscv/kernel/jump_label.c
@@ -36,9 +36,15 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
insn = RISCV_INSN_NOP;
}
- mutex_lock(&text_mutex);
- patch_insn_write(addr, &insn, sizeof(insn));
- mutex_unlock(&text_mutex);
+ if (early_boot_irqs_disabled) {
+ riscv_patch_in_stop_machine = 1;
+ patch_insn_write(addr, &insn, sizeof(insn));
+ riscv_patch_in_stop_machine = 0;
+ } else {
+ mutex_lock(&text_mutex);
+ patch_insn_write(addr, &insn, sizeof(insn));
+ mutex_unlock(&text_mutex);
+ }
return true;
}
--
2.40.1
On Sat, Nov 30, 2024 at 10:33:10AM -0500, guoren@kernel.org wrote:
> From: Guo Ren <guoren@linux.alibaba.com>
>
> When CONFIG_DEBUG_RT_MUTEXES=y, mutex_lock->rt_mutex_try_acquire
> would change from rt_mutex_cmpxchg_acquire to
> rt_mutex_slowtrylock():
> raw_spin_lock_irqsave(&lock->wait_lock, flags);
> ret = __rt_mutex_slowtrylock(lock);
> raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
>
> Because queued_spin_#ops to ticket_#ops is changed one by one by
> jump_label, raw_spin_lock/unlock would cause a deadlock during the
> changing.
>
> That means in arch/riscv/kernel/jump_label.c:
> 1.
> arch_jump_label_transform_queue() ->
> mutex_lock(&text_mutex); +-> raw_spin_lock -> queued_spin_lock
> |-> raw_spin_unlock -> queued_spin_unlock
> patch_insn_write -> change the raw_spin_lock to ticket_lock
> mutex_unlock(&text_mutex);
> ...
>
> 2. /* Dirty the lock value */
> arch_jump_label_transform_queue() ->
> mutex_lock(&text_mutex); +-> raw_spin_lock -> *ticket_lock*
> |-> raw_spin_unlock -> *queued_spin_unlock*
> /* BUG: ticket_lock with queued_spin_unlock */
> patch_insn_write -> change the raw_spin_unlock to ticket_unlock
> mutex_unlock(&text_mutex);
> ...
>
> 3. /* Dead lock */
> arch_jump_label_transform_queue() ->
> mutex_lock(&text_mutex); +-> raw_spin_lock -> ticket_lock /* deadlock! */
> |-> raw_spin_unlock -> ticket_unlock
> patch_insn_write -> change other raw_spin_#op -> ticket_#op
> mutex_unlock(&text_mutex);
>
> So, the solution is to disable mutex usage of
> arch_jump_label_transform_queue() during early_boot_irqs_disabled, just
> like we have done for stop_machine.
>
> Reported-by: Conor Dooley <conor@kernel.org>
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> Fixes: ab83647fadae ("riscv: Add qspinlock support")
> Link: https://lore.kernel.org/linux-riscv/CAJF2gTQwYTGinBmCSgVUoPv0_q4EPt_+WiyfUA1HViAKgUzxAg@mail.gmail.com/T/#mf488e6347817fca03bb93a7d34df33d8615b3775
> Cc: Palmer Dabbelt <palmer@dabbelt.com>
> Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
Tested-by: Nam Cao <namcao@linutronix.de>
Thanks for the fix,
Nam
Hi Guo,
On 30/11/2024 16:33, guoren@kernel.org wrote:
> From: Guo Ren <guoren@linux.alibaba.com>
>
> When CONFIG_DEBUG_RT_MUTEXES=y, mutex_lock->rt_mutex_try_acquire
> would change from rt_mutex_cmpxchg_acquire to
> rt_mutex_slowtrylock():
> raw_spin_lock_irqsave(&lock->wait_lock, flags);
> ret = __rt_mutex_slowtrylock(lock);
> raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
>
> Because queued_spin_#ops to ticket_#ops is changed one by one by
> jump_label, raw_spin_lock/unlock would cause a deadlock during the
> changing.
>
> That means in arch/riscv/kernel/jump_label.c:
> 1.
> arch_jump_label_transform_queue() ->
> mutex_lock(&text_mutex); +-> raw_spin_lock -> queued_spin_lock
> |-> raw_spin_unlock -> queued_spin_unlock
> patch_insn_write -> change the raw_spin_lock to ticket_lock
> mutex_unlock(&text_mutex);
> ...
>
> 2. /* Dirty the lock value */
> arch_jump_label_transform_queue() ->
> mutex_lock(&text_mutex); +-> raw_spin_lock -> *ticket_lock*
> |-> raw_spin_unlock -> *queued_spin_unlock*
> /* BUG: ticket_lock with queued_spin_unlock */
> patch_insn_write -> change the raw_spin_unlock to ticket_unlock
> mutex_unlock(&text_mutex);
> ...
>
> 3. /* Dead lock */
> arch_jump_label_transform_queue() ->
> mutex_lock(&text_mutex); +-> raw_spin_lock -> ticket_lock /* deadlock! */
> |-> raw_spin_unlock -> ticket_unlock
> patch_insn_write -> change other raw_spin_#op -> ticket_#op
> mutex_unlock(&text_mutex);
>
> So, the solution is to disable mutex usage of
> arch_jump_label_transform_queue() during early_boot_irqs_disabled, just
> like we have done for stop_machine.
>
> Reported-by: Conor Dooley <conor@kernel.org>
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> Fixes: ab83647fadae ("riscv: Add qspinlock support")
> Link: https://lore.kernel.org/linux-riscv/CAJF2gTQwYTGinBmCSgVUoPv0_q4EPt_+WiyfUA1HViAKgUzxAg@mail.gmail.com/T/#mf488e6347817fca03bb93a7d34df33d8615b3775
> Cc: Palmer Dabbelt <palmer@dabbelt.com>
> Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/kernel/jump_label.c | 12 +++++++++---
> 1 file changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c
> index 6eee6f736f68..654ed159c830 100644
> --- a/arch/riscv/kernel/jump_label.c
> +++ b/arch/riscv/kernel/jump_label.c
> @@ -36,9 +36,15 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
> insn = RISCV_INSN_NOP;
> }
>
> - mutex_lock(&text_mutex);
> - patch_insn_write(addr, &insn, sizeof(insn));
> - mutex_unlock(&text_mutex);
> + if (early_boot_irqs_disabled) {
> + riscv_patch_in_stop_machine = 1;
> + patch_insn_write(addr, &insn, sizeof(insn));
> + riscv_patch_in_stop_machine = 0;
> + } else {
> + mutex_lock(&text_mutex);
> + patch_insn_write(addr, &insn, sizeof(insn));
> + mutex_unlock(&text_mutex);
> + }
>
> return true;
> }
Sorry for the late answer, I've been sick lately!
Thank you very much for looking into this and finding this not-so-bad
solution! I remind everyone that this is a temporary solution until we
can use an alternative instead of a static key.
You can add:
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Tested-by: Alexandre Ghiti <alexghiti@rivosinc.com>
The revert is still on the table IMO, let's Palmer decide.
Thank you again Guo, really appreciate you took the time to find this
solution!
Alex
On Wed, 04 Dec 2024 00:27:00 PST (-0800), alex@ghiti.fr wrote:
> Hi Guo,
>
> On 30/11/2024 16:33, guoren@kernel.org wrote:
>> From: Guo Ren <guoren@linux.alibaba.com>
>>
>> When CONFIG_DEBUG_RT_MUTEXES=y, mutex_lock->rt_mutex_try_acquire
>> would change from rt_mutex_cmpxchg_acquire to
>> rt_mutex_slowtrylock():
>> raw_spin_lock_irqsave(&lock->wait_lock, flags);
>> ret = __rt_mutex_slowtrylock(lock);
>> raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
>>
>> Because queued_spin_#ops to ticket_#ops is changed one by one by
>> jump_label, raw_spin_lock/unlock would cause a deadlock during the
>> changing.
>>
>> That means in arch/riscv/kernel/jump_label.c:
>> 1.
>> arch_jump_label_transform_queue() ->
>> mutex_lock(&text_mutex); +-> raw_spin_lock -> queued_spin_lock
>> |-> raw_spin_unlock -> queued_spin_unlock
>> patch_insn_write -> change the raw_spin_lock to ticket_lock
>> mutex_unlock(&text_mutex);
>> ...
>>
>> 2. /* Dirty the lock value */
>> arch_jump_label_transform_queue() ->
>> mutex_lock(&text_mutex); +-> raw_spin_lock -> *ticket_lock*
>> |-> raw_spin_unlock -> *queued_spin_unlock*
>> /* BUG: ticket_lock with queued_spin_unlock */
>> patch_insn_write -> change the raw_spin_unlock to ticket_unlock
>> mutex_unlock(&text_mutex);
>> ...
>>
>> 3. /* Dead lock */
>> arch_jump_label_transform_queue() ->
>> mutex_lock(&text_mutex); +-> raw_spin_lock -> ticket_lock /* deadlock! */
>> |-> raw_spin_unlock -> ticket_unlock
>> patch_insn_write -> change other raw_spin_#op -> ticket_#op
>> mutex_unlock(&text_mutex);
>>
>> So, the solution is to disable mutex usage of
>> arch_jump_label_transform_queue() during early_boot_irqs_disabled, just
>> like we have done for stop_machine.
>>
>> Reported-by: Conor Dooley <conor@kernel.org>
>> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
>> Signed-off-by: Guo Ren <guoren@kernel.org>
>> Fixes: ab83647fadae ("riscv: Add qspinlock support")
>> Link: https://lore.kernel.org/linux-riscv/CAJF2gTQwYTGinBmCSgVUoPv0_q4EPt_+WiyfUA1HViAKgUzxAg@mail.gmail.com/T/#mf488e6347817fca03bb93a7d34df33d8615b3775
>> Cc: Palmer Dabbelt <palmer@dabbelt.com>
>> Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
>> ---
>> arch/riscv/kernel/jump_label.c | 12 +++++++++---
>> 1 file changed, 9 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c
>> index 6eee6f736f68..654ed159c830 100644
>> --- a/arch/riscv/kernel/jump_label.c
>> +++ b/arch/riscv/kernel/jump_label.c
>> @@ -36,9 +36,15 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
>> insn = RISCV_INSN_NOP;
>> }
>>
>> - mutex_lock(&text_mutex);
>> - patch_insn_write(addr, &insn, sizeof(insn));
>> - mutex_unlock(&text_mutex);
>> + if (early_boot_irqs_disabled) {
>> + riscv_patch_in_stop_machine = 1;
>> + patch_insn_write(addr, &insn, sizeof(insn));
>> + riscv_patch_in_stop_machine = 0;
>> + } else {
>> + mutex_lock(&text_mutex);
>> + patch_insn_write(addr, &insn, sizeof(insn));
>> + mutex_unlock(&text_mutex);
>> + }
>>
>> return true;
>> }
>
>
> Sorry for the late answer, I've been sick lately!
Ya, I was also sick -- bad timing.
> Thank you very much for looking into this and finding this not-so-bad
> solution! I remind everyone that this is a temporary solution until we
> can use an alternative instead of a static key.
>
> You can add:
>
> Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> Tested-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>
> The revert is still on the table IMO, let's Palmer decide.
IMO it's fine to just take this as a fix. Reverting stuff is always a
nice big hammer to have if we don't get a fix in a reasonable timeframe,
but with a fix avaliable in the time I was out sick anyway I think it's
fine to just take this.
So I've got it queued up on fixes.
> Thank you again Guo, really appreciate you took the time to find this
> solution!
>
> Alex
On Wed, Dec 4, 2024 at 4:27 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
>
> Hi Guo,
>
> On 30/11/2024 16:33, guoren@kernel.org wrote:
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > When CONFIG_DEBUG_RT_MUTEXES=y, mutex_lock->rt_mutex_try_acquire
> > would change from rt_mutex_cmpxchg_acquire to
> > rt_mutex_slowtrylock():
> > raw_spin_lock_irqsave(&lock->wait_lock, flags);
> > ret = __rt_mutex_slowtrylock(lock);
> > raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
> >
> > Because queued_spin_#ops to ticket_#ops is changed one by one by
> > jump_label, raw_spin_lock/unlock would cause a deadlock during the
> > changing.
> >
> > That means in arch/riscv/kernel/jump_label.c:
> > 1.
> > arch_jump_label_transform_queue() ->
> > mutex_lock(&text_mutex); +-> raw_spin_lock -> queued_spin_lock
> > |-> raw_spin_unlock -> queued_spin_unlock
> > patch_insn_write -> change the raw_spin_lock to ticket_lock
> > mutex_unlock(&text_mutex);
> > ...
> >
> > 2. /* Dirty the lock value */
> > arch_jump_label_transform_queue() ->
> > mutex_lock(&text_mutex); +-> raw_spin_lock -> *ticket_lock*
> > |-> raw_spin_unlock -> *queued_spin_unlock*
> > /* BUG: ticket_lock with queued_spin_unlock */
> > patch_insn_write -> change the raw_spin_unlock to ticket_unlock
> > mutex_unlock(&text_mutex);
> > ...
> >
> > 3. /* Dead lock */
> > arch_jump_label_transform_queue() ->
> > mutex_lock(&text_mutex); +-> raw_spin_lock -> ticket_lock /* deadlock! */
> > |-> raw_spin_unlock -> ticket_unlock
> > patch_insn_write -> change other raw_spin_#op -> ticket_#op
> > mutex_unlock(&text_mutex);
> >
> > So, the solution is to disable mutex usage of
> > arch_jump_label_transform_queue() during early_boot_irqs_disabled, just
> > like we have done for stop_machine.
> >
> > Reported-by: Conor Dooley <conor@kernel.org>
> > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > Fixes: ab83647fadae ("riscv: Add qspinlock support")
> > Link: https://lore.kernel.org/linux-riscv/CAJF2gTQwYTGinBmCSgVUoPv0_q4EPt_+WiyfUA1HViAKgUzxAg@mail.gmail.com/T/#mf488e6347817fca03bb93a7d34df33d8615b3775
> > Cc: Palmer Dabbelt <palmer@dabbelt.com>
> > Cc: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> > arch/riscv/kernel/jump_label.c | 12 +++++++++---
> > 1 file changed, 9 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c
> > index 6eee6f736f68..654ed159c830 100644
> > --- a/arch/riscv/kernel/jump_label.c
> > +++ b/arch/riscv/kernel/jump_label.c
> > @@ -36,9 +36,15 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
> > insn = RISCV_INSN_NOP;
> > }
> >
> > - mutex_lock(&text_mutex);
> > - patch_insn_write(addr, &insn, sizeof(insn));
> > - mutex_unlock(&text_mutex);
> > + if (early_boot_irqs_disabled) {
> > + riscv_patch_in_stop_machine = 1;
> > + patch_insn_write(addr, &insn, sizeof(insn));
> > + riscv_patch_in_stop_machine = 0;
> > + } else {
> > + mutex_lock(&text_mutex);
> > + patch_insn_write(addr, &insn, sizeof(insn));
> > + mutex_unlock(&text_mutex);
> > + }
> >
> > return true;
> > }
>
>
> Sorry for the late answer, I've been sick lately!
I wish you a speedy recovery.
>
> Thank you very much for looking into this and finding this not-so-bad
> solution! I remind everyone that this is a temporary solution until we
> can use an alternative instead of a static key.
Herer, I want to point out that the problem is not jump_label
infrastructure but our arch_jump_label_transform_queue
->patch_insn_write. The alternative also has a similar situation. So
you still need the modification like the below:
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 467c5c735bf5..475f7c520be4 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -1017,10 +1017,17 @@ void __init_or_module
riscv_cpufeature_patch_func(struct alt_entry *begin,
oldptr = ALT_OLD_PTR(alt);
altptr = ALT_ALT_PTR(alt);
+if (early_boot_irqs_disabled) {
+ riscv_patch_in_stop_machine = 1;
+ patch_text_nosync(oldptr, altptr, alt->alt_len);
+ riscv_alternative_fix_offsets(oldptr, alt->alt_len,
oldptr - altptr);
+ riscv_patch_in_stop_machine = 0;
+} else {
mutex_lock(&text_mutex);
patch_text_nosync(oldptr, altptr, alt->alt_len);
riscv_alternative_fix_offsets(oldptr, alt->alt_len,
oldptr - altptr);
mutex_unlock(&text_mutex);
+}
}
}
#endif
>
> You can add:
>
> Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> Tested-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>
> The revert is still on the table IMO, let's Palmer decide.
Okay
--
Best Regards
Guo Ren
On Sat, Nov 30, 2024 at 10:33:10AM -0500, guoren@kernel.org wrote: > From: Guo Ren <guoren@linux.alibaba.com> > > When CONFIG_DEBUG_RT_MUTEXES=y, mutex_lock->rt_mutex_try_acquire > would change from rt_mutex_cmpxchg_acquire to > rt_mutex_slowtrylock(): > raw_spin_lock_irqsave(&lock->wait_lock, flags); > ret = __rt_mutex_slowtrylock(lock); > raw_spin_unlock_irqrestore(&lock->wait_lock, flags); > > Because queued_spin_#ops to ticket_#ops is changed one by one by > jump_label, raw_spin_lock/unlock would cause a deadlock during the > changing. > > That means in arch/riscv/kernel/jump_label.c: > 1. > arch_jump_label_transform_queue() -> > mutex_lock(&text_mutex); +-> raw_spin_lock -> queued_spin_lock > |-> raw_spin_unlock -> queued_spin_unlock > patch_insn_write -> change the raw_spin_lock to ticket_lock > mutex_unlock(&text_mutex); > ... > > 2. /* Dirty the lock value */ > arch_jump_label_transform_queue() -> > mutex_lock(&text_mutex); +-> raw_spin_lock -> *ticket_lock* > |-> raw_spin_unlock -> *queued_spin_unlock* > /* BUG: ticket_lock with queued_spin_unlock */ > patch_insn_write -> change the raw_spin_unlock to ticket_unlock > mutex_unlock(&text_mutex); > ... > > 3. /* Dead lock */ > arch_jump_label_transform_queue() -> > mutex_lock(&text_mutex); +-> raw_spin_lock -> ticket_lock /* deadlock! */ > |-> raw_spin_unlock -> ticket_unlock > patch_insn_write -> change other raw_spin_#op -> ticket_#op > mutex_unlock(&text_mutex); > > So, the solution is to disable mutex usage of > arch_jump_label_transform_queue() during early_boot_irqs_disabled, just > like we have done for stop_machine. > > Reported-by: Conor Dooley <conor@kernel.org> Tested-by: Conor Dooley <conor.dooley@microchip.com> Thanks, Conor.
© 2016 - 2026 Red Hat, Inc.