kernel/signal.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-)
In has_pending_signals there was a switch/case used for optimizations.
However, today's compilers perform loop unrolling efficiently, thus it
is not needed anymore.
put i inside the for declaration so we do not risk its escape from the
scope. Moreover, i starts now from 0 and counts up, as it is a more
usual pattern.
Signed-off-by: Andrea Calabrese <andrea.calabrese@amarulasolutions.com>
---
The difference with V1 is in the description, now accurate thanks to the
review.
As before, the patch has been tested on gcc x86 and generates the same
code.
kernel/signal.c | 26 ++++----------------------
1 file changed, 4 insertions(+), 22 deletions(-)
diff --git a/kernel/signal.c b/kernel/signal.c
index 2d102e025883..799ee98cf03e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -130,28 +130,10 @@ static bool sig_ignored(struct task_struct *t, int sig, bool force)
*/
static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
{
- unsigned long ready;
- long i;
-
- switch (_NSIG_WORDS) {
- default:
- for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
- ready |= signal->sig[i] &~ blocked->sig[i];
- break;
-
- case 4: ready = signal->sig[3] &~ blocked->sig[3];
- ready |= signal->sig[2] &~ blocked->sig[2];
- ready |= signal->sig[1] &~ blocked->sig[1];
- ready |= signal->sig[0] &~ blocked->sig[0];
- break;
-
- case 2: ready = signal->sig[1] &~ blocked->sig[1];
- ready |= signal->sig[0] &~ blocked->sig[0];
- break;
-
- case 1: ready = signal->sig[0] &~ blocked->sig[0];
- }
- return ready != 0;
+ unsigned long ready = 0;
+ for (long i = 0; i < _NSIG_WORDS; i++)
+ ready |= signal->sig[i] & ~blocked->sig[i];
+ return ready != 0;
}
#define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
--
2.54.0
On 05/20, Andrea Calabrese wrote:
>
> static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
> {
> - unsigned long ready;
> - long i;
> -
> - switch (_NSIG_WORDS) {
> - default:
> - for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
> - ready |= signal->sig[i] &~ blocked->sig[i];
> - break;
> -
> - case 4: ready = signal->sig[3] &~ blocked->sig[3];
> - ready |= signal->sig[2] &~ blocked->sig[2];
> - ready |= signal->sig[1] &~ blocked->sig[1];
> - ready |= signal->sig[0] &~ blocked->sig[0];
> - break;
> -
> - case 2: ready = signal->sig[1] &~ blocked->sig[1];
> - ready |= signal->sig[0] &~ blocked->sig[0];
> - break;
> -
> - case 1: ready = signal->sig[0] &~ blocked->sig[0];
> - }
> - return ready != 0;
> + unsigned long ready = 0;
> + for (long i = 0; i < _NSIG_WORDS; i++)
> + ready |= signal->sig[i] & ~blocked->sig[i];
> + return ready != 0;
> }
Note that with your patch the main loop doesn't stop when ready
becomes != 0...
OK, I guess the modern compilers doesn't need this hint even if
_NSIG_WORDS > 1, so
Acked-by: Oleg Nesterov <oleg@redhat.com>
But since your patch is just a cleanup, you could do
for (int i = 0; i < _NSIG_WORDS; i++) {
if (signal->sig[i] & ~blocked->sig[i])
return true;
}
return false;
but this is subjective, I won't insist.
Oleg.
On Thu, 2026-05-21 at 14:50 +0200, Oleg Nesterov wrote:
> On 05/20, Andrea Calabrese wrote:
> >
> > static inline bool has_pending_signals(sigset_t *signal, sigset_t
> > *blocked)
> > {
> > - unsigned long ready;
> > - long i;
> > -
> > - switch (_NSIG_WORDS) {
> > - default:
> > - for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
> > - ready |= signal->sig[i] &~ blocked-
> > >sig[i];
> > - break;
> > -
> > - case 4: ready = signal->sig[3] &~ blocked->sig[3];
> > - ready |= signal->sig[2] &~ blocked->sig[2];
> > - ready |= signal->sig[1] &~ blocked->sig[1];
> > - ready |= signal->sig[0] &~ blocked->sig[0];
> > - break;
> > -
> > - case 2: ready = signal->sig[1] &~ blocked->sig[1];
> > - ready |= signal->sig[0] &~ blocked->sig[0];
> > - break;
> > -
> > - case 1: ready = signal->sig[0] &~ blocked->sig[0];
> > - }
> > - return ready != 0;
> > + unsigned long ready = 0;
> > + for (long i = 0; i < _NSIG_WORDS; i++)
> > + ready |= signal->sig[i] & ~blocked->sig[i];
> > + return ready != 0;
> > }
>
> Note that with your patch the main loop doesn't stop when ready
> becomes != 0...
>
> OK, I guess the modern compilers doesn't need this hint even if
> _NSIG_WORDS > 1, so
>
> Acked-by: Oleg Nesterov <oleg@redhat.com>
>
>
> But since your patch is just a cleanup, you could do
>
> for (int i = 0; i < _NSIG_WORDS; i++) {
> if (signal->sig[i] & ~blocked->sig[i])
> return true;
> }
>
> return false;
>
Good idea, I will send a v3 with this!
> but this is subjective, I won't insist.
>
> Oleg.
On Thu, 2026-05-21 at 15:34 +0200, Andrea Calabrese wrote:
> On Thu, 2026-05-21 at 14:50 +0200, Oleg Nesterov wrote:
> > On 05/20, Andrea Calabrese wrote:
> > >
> > > static inline bool has_pending_signals(sigset_t *signal,
> > > sigset_t
> > > *blocked)
> > > {
> > > - unsigned long ready;
> > > - long i;
> > > -
> > > - switch (_NSIG_WORDS) {
> > > - default:
> > > - for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;)
> > > - ready |= signal->sig[i] &~ blocked-
> > > > sig[i];
> > > - break;
> > > -
> > > - case 4: ready = signal->sig[3] &~ blocked->sig[3];
> > > - ready |= signal->sig[2] &~ blocked->sig[2];
> > > - ready |= signal->sig[1] &~ blocked->sig[1];
> > > - ready |= signal->sig[0] &~ blocked->sig[0];
> > > - break;
> > > -
> > > - case 2: ready = signal->sig[1] &~ blocked->sig[1];
> > > - ready |= signal->sig[0] &~ blocked->sig[0];
> > > - break;
> > > -
> > > - case 1: ready = signal->sig[0] &~ blocked->sig[0];
> > > - }
> > > - return ready != 0;
> > > + unsigned long ready = 0;
> > > + for (long i = 0; i < _NSIG_WORDS; i++)
> > > + ready |= signal->sig[i] & ~blocked->sig[i];
> > > + return ready != 0;
> > > }
> >
> > Note that with your patch the main loop doesn't stop when ready
> > becomes != 0...
> >
> > OK, I guess the modern compilers doesn't need this hint even if
> > _NSIG_WORDS > 1, so
> >
> > Acked-by: Oleg Nesterov <oleg@redhat.com>
> >
> >
> > But since your patch is just a cleanup, you could do
> >
> > for (int i = 0; i < _NSIG_WORDS; i++) {
> > if (signal->sig[i] & ~blocked->sig[i])
> > return true;
> > }
> >
> > return false;
> >
> Good idea, I will send a v3 with this!
>
Looking for a suggestion here: I have the objdumps before and after,
and I see that the assembly with this modification does change.
I am not sure whether the new one is ok in terms of performance, as it
adds one more instruction.
Based on the objdumps, I have the situation before the change:
____
0000000000001370 <recalc_sigpending>:
{
1370: f3 0f 1e fa endbr64
1374: e8 00 00 00 00 call 1379
<recalc_sigpending+0x9>
1379: 65 48 8b 15 00 00 00 mov %gs:0x0(%rip),%rdx
# 1381 <recalc_sigpending+0x11>
1380: 00
if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE))
||
1381: f6 82 a2 05 00 00 9a testb $0x9a,0x5a2(%rdx)
1388: 75 39 jne 13c3 <---- J1
<recalc_sigpending+0x53>
case 1: ready = signal->sig[0] &~ blocked->sig[0];
138a: 48 8b 82 40 08 00 00 mov 0x840(%rdx),%rax
1391: 48 f7 d0 not %rax
if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE))
||
1394: 48 85 82 68 08 00 00 test %rax,0x868(%rdx)
139b: 75 26 jne 13c3 <---- J2
<recalc_sigpending+0x53>
case 1: ready = signal->sig[0] &~ blocked->sig[0];
139d: 48 8b 8a 30 08 00 00 mov 0x830(%rdx),%rcx
PENDING(&t->pending, &t->blocked) ||
13a4: 48 85 41 50 test %rax,0x50(%rcx)
13a8: 75 19 jne 13c3 <---- J3
<recalc_sigpending+0x53>
PENDING(&t->signal->shared_pending, &t->blocked) ||
13aa: 80 ba b0 05 00 00 00 cmpb $0x0,0x5b0(%rdx)
13b1: 78 10 js 13c3 <---- J4
<recalc_sigpending+0x53>
JUMP_TABLE_ENTRY(key, label)
#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */
____
and the situation after the change:
____
0000000000001370 <recalc_sigpending>:
{
1370: f3 0f 1e fa endbr64
1374: e8 00 00 00 00 call 1379
<recalc_sigpending+0x9>
1379: 65 48 8b 15 00 00 00 mov %gs:0x0(%rip),%rdx
# 1381 <recalc_sigpending+0x11>
1380: 00
if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE))
||
1381: f6 82 a2 05 00 00 9a testb $0x9a,0x5a2(%rdx)
1388: 75 13 jne 139d <---- J1
<recalc_sigpending+0x2d>
if (signal->sig[i] & ~blocked->sig[i])
138a: 48 8b 82 40 08 00 00 mov 0x840(%rdx),%rax
1391: 48 f7 d0 not %rax
1394: 48 85 82 68 08 00 00 test %rax,0x868(%rdx)
139b: 74 09 je 13a6 <---- J2
<recalc_sigpending+0x36>
139d: f0 80 0a 02 lock orb $0x2,(%rdx) <-
return true;
13a1: e9 00 00 00 00 jmp 13a6 <---- J3
<recalc_sigpending+0x36>
if (signal->sig[i] & ~blocked->sig[i])
13a6: 48 8b 8a 30 08 00 00 mov 0x830(%rdx),%rcx
13ad: 48 85 41 50 test %rax,0x50(%rcx)
13b1: 75 ea jne 139d <---- J4
<recalc_sigpending+0x2d>
PENDING(&t->signal->shared_pending, &t->blocked) ||
13b3: 80 ba b0 05 00 00 00 cmpb $0x0,0x5b0(%rdx)
13ba: 78 e1 js 139d <---- J5
<recalc_sigpending+0x2d>
JUMP_TABLE_ENTRY(key, label)
#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */
____
The first instrution with the arrow was not there before, and the
second instruction is a forward jump, which is a return to the caller
and will most likely cause a branch prediction miss (moreover it is
also one more jump).
I think your judgement will be better then mine on this matter, as I
don't think I'm fully experienced.
Looking forward for your response.
> > but this is subjective, I won't insist.
> >
> > Oleg.
Andrea Calabrese
On 05/21, Andrea Calabrese wrote: > > Looking for a suggestion here: I have the objdumps before and after, > and I see that the assembly with this modification does change. OK, lets forget it, lets stay with V2. Oleg.
> Il giorno 22 mag 2026, alle ore 17:24, Oleg Nesterov <oleg@redhat.com> ha scritto: > > On 05/21, Andrea Calabrese wrote: >> >> Looking for a suggestion here: I have the objdumps before and after, >> and I see that the assembly with this modification does change. > > OK, lets forget it, lets stay with V2. > > Oleg. > Thank you! Andrea
© 2016 - 2026 Red Hat, Inc.