kernel/signal.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-)
I dug through posix[1], the FreeBSD version of kill(2), and the Illumos
version of kill(2). Common sense, the documentation and the other
implemnetations of kill(2) agree that an error should be returned if no
signal is delivered.
What is up in the air is which error code should be returned. FreeBSD
uses ESRCH for all errors. Illumos will return EPERM for some errors,
and ESRCH for others. According to the rationale POSIX allows both.
The current Linux behavior of reporting success even when no signal
was delivered dates back to Linux 0.1 with the introduction of
returning ESRCH when there were no processes being added in Linux 1.0.
Since the current behavior is buggy and user-space cares[2][3] change
the behavior to match the behavior when Linux sends signals to process
groups.
Petr Skocik <pskocik@gmail.com> wrote:
> The code sample below demonstrates the problem, which gets fixed by the
> patch:
>
> #define _GNU_SOURCE
> #include <assert.h>
> #include <errno.h>
> #include <signal.h>
> #include <stdio.h>
> #include <sys/wait.h>
> #include <unistd.h>
> #define VICTIM_UID 4200 //check these are safe to use on your system!
> #define UNUSED_UID 4300
> int main(){
> uid_t r,e,s;
> if(geteuid()) return 1; //requires root privileges
>
> //pipe to let the parent know when the child has changed ids
> int fds[2]; if(0>pipe(fds)) return 1;
> pid_t pid;
> if(0>(pid=fork())) return 1;
> else if(0==pid){
> setreuid(VICTIM_UID,VICTIM_UID);
> getresuid(&r,&e,&s); printf("child: %u %u %u\n", r,e,s);
> close(fds[0]); close(fds[1]); //let the parent continue
> for(;;) pause();
> }
> close(fds[1]);
> read(fds[0],&(char){0},1); //wait for uid change in the child
>
> #if 1
> setreuid(VICTIM_UID,(uid_t)-1); seteuid(VICTIM_UID);
> #else
> setresuid(UNUSED_UID,VICTIM_UID,0);
> #endif
> getresuid(&r,&e,&s); printf("parent: %u %u %u\n", r,e,s); //4200 4200 0
>
> int err = kill(-1,-111); (void)err; //test -EINVAL
> assert(err < 0 && errno == EINVAL);
>
> int rc = kill(-1,SIGTERM); //test 0
> if(rc>=0) wait(0);
> int rc2 = kill(-1,SIGTERM); //test -ESCHR
> printf("1st kill ok==%d; 2nd kill ESRCH==%d\n", rc==0, rc2<0&& errno==ESRCH);
> }
[1] https://pubs.opengroup.org/onlinepubs/9699919799/functions/kill.html
[2] https://lkml.kernel.org/r/336ae9be-c66c-d87f-61fe-b916e9f04ffc@gmail.com
[3] https://lkml.kernel.org/r/20221122161240.137570-1-pskocik@gmail.com
Reported-by: Petr Skocik <pskocik@gmail.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
kernel/signal.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/kernel/signal.c b/kernel/signal.c
index b5370fe5c198..731c6e3b351d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1582,8 +1582,9 @@ EXPORT_SYMBOL_GPL(kill_pid_usb_asyncio);
/*
* kill_something_info() interprets pid in interesting ways just like kill(2).
*
- * POSIX specifies that kill(-1,sig) is unspecified, but what we have
- * is probably wrong. Should make it like BSD or SYSV.
+ * POSIX allows the error codes EPERM and ESRCH when kill(-1,sig) does
+ * not deliver a signal to any process. For consistency use the same
+ * logic in kill_something_info and __kill_pgrp_info.
*/
static int kill_something_info(int sig, struct kernel_siginfo *info, pid_t pid)
@@ -1602,7 +1603,8 @@ static int kill_something_info(int sig, struct kernel_siginfo *info, pid_t pid)
ret = __kill_pgrp_info(sig, info,
pid ? find_vpid(-pid) : task_pgrp(current));
} else {
- int retval = 0, count = 0;
+ bool found = false, success = false;
+ int retval = 0;
struct task_struct * p;
for_each_process(p) {
@@ -1610,12 +1612,12 @@ static int kill_something_info(int sig, struct kernel_siginfo *info, pid_t pid)
!same_thread_group(p, current)) {
int err = group_send_sig_info(sig, info, p,
PIDTYPE_MAX);
- ++count;
- if (err != -EPERM)
- retval = err;
+ found = true;
+ success |= !err;
+ retval = err;
}
}
- ret = count ? retval : -ESRCH;
+ ret = success ? 0 : (found ? retval : -ESRCH);
}
read_unlock(&tasklist_lock);
--
2.35.3
Hi Eric,
This change LGTM, but ...
On 08/11, Eric W. Biederman wrote:
>
> @@ -1602,7 +1603,8 @@ static int kill_something_info(int sig, struct kernel_siginfo *info, pid_t pid)
> ret = __kill_pgrp_info(sig, info,
> pid ? find_vpid(-pid) : task_pgrp(current));
> } else {
> - int retval = 0, count = 0;
> + bool found = false, success = false;
> + int retval = 0;
> struct task_struct * p;
>
> for_each_process(p) {
> @@ -1610,12 +1612,12 @@ static int kill_something_info(int sig, struct kernel_siginfo *info, pid_t pid)
> !same_thread_group(p, current)) {
> int err = group_send_sig_info(sig, info, p,
> PIDTYPE_MAX);
> - ++count;
> - if (err != -EPERM)
> - retval = err;
> + found = true;
> + success |= !err;
> + retval = err;
> }
> }
> - ret = count ? retval : -ESRCH;
> + ret = success ? 0 : (found ? retval : -ESRCH);
Why do we need the "bool found" variable ? Afacis
} else {
bool success = false;
int retval = -ESRCH;
struct task_struct * p;
for_each_process(p) {
if (task_pid_vnr(p) > 1 &&
!same_thread_group(p, current)) {
int err = group_send_sig_info(sig, info, p,
PIDTYPE_MAX);
success |= !err;
retval = err;
}
}
ret = success ? 0 : retval;
}
does the same?
Oleg.
On 08/14, Oleg Nesterov wrote:
>
> Why do we need the "bool found" variable ? Afacis
>
> } else {
> bool success = false;
> int retval = -ESRCH;
> struct task_struct * p;
>
> for_each_process(p) {
> if (task_pid_vnr(p) > 1 &&
> !same_thread_group(p, current)) {
> int err = group_send_sig_info(sig, info, p,
> PIDTYPE_MAX);
> success |= !err;
> retval = err;
> }
> }
> ret = success ? 0 : retval;
> }
>
> does the same?
Even simpler
} else {
struct task_struct * p;
bool success = false;
int err = -ESRCH;
for_each_process(p) {
if (task_pid_vnr(p) > 1 &&
!same_thread_group(p, current)) {
err = group_send_sig_info(sig, info, p,
PIDTYPE_MAX);
success |= !err;
}
}
ret = success ? 0 : err;
}
unless I missed something...
Oleg.
From: Oleg Nesterov
> Sent: 14 August 2023 16:44
...
> Even simpler
>
> } else {
> struct task_struct * p;
> bool success = false;
> int err = -ESRCH;
int err;
ret = -ESRCH;
>
> for_each_process(p) {
> if (task_pid_vnr(p) > 1 &&
> !same_thread_group(p, current)) {
> err = group_send_sig_info(sig, info, p,
> PIDTYPE_MAX);
> success |= !err;
> }
> }
> ret = success ? 0 : err;
> }
or maybe even:
} else {
struct task_struct * p;
int err;
ret = -ESRCH;
for_each_process(p) {
if (task_pid_vnr(p) > 1 &&
!same_thread_group(p, current)) {
err = group_send_sig_info(sig, info, p,
PIDTYPE_MAX);
if (ret)
ret = err;
}
}
}
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On 08/15, David Laight wrote:
>
> or maybe even:
> } else {
> struct task_struct * p;
> int err;
> ret = -ESRCH;
>
> for_each_process(p) {
> if (task_pid_vnr(p) > 1 &&
> !same_thread_group(p, current)) {
> err = group_send_sig_info(sig, info, p,
> PIDTYPE_MAX);
> if (ret)
> ret = err;
Hmm, indeed ;)
and "err" can be declared inside the loop.
Oleg.
Oleg Nesterov <oleg@redhat.com> writes:
> On 08/15, David Laight wrote:
>>
>> or maybe even:
>> } else {
>> struct task_struct * p;
>> int err;
>> ret = -ESRCH;
>>
>> for_each_process(p) {
>> if (task_pid_vnr(p) > 1 &&
>> !same_thread_group(p, current)) {
>> err = group_send_sig_info(sig, info, p,
>> PIDTYPE_MAX);
>> if (ret)
>> ret = err;
>
> Hmm, indeed ;)
>
> and "err" can be declared inside the loop.
We can't remove the success case, from my posted patch.
A signal is considered as successfully delivered if at least
one process receives it.
That is something the current code for kill -1 actually gets
wrong (but hides because it ignores -EPERM).
Otherwise yes I expect we can simplify the use of variables as
suggested.
Eric
On 08/16, Eric W. Biederman wrote:
>
> Oleg Nesterov <oleg@redhat.com> writes:
>
> > On 08/15, David Laight wrote:
> >>
> >> or maybe even:
> >> } else {
> >> struct task_struct * p;
> >> int err;
> >> ret = -ESRCH;
> >>
> >> for_each_process(p) {
> >> if (task_pid_vnr(p) > 1 &&
> >> !same_thread_group(p, current)) {
> >> err = group_send_sig_info(sig, info, p,
> >> PIDTYPE_MAX);
> >> if (ret)
> >> ret = err;
> >
> > Hmm, indeed ;)
> >
> > and "err" can be declared inside the loop.
>
> We can't remove the success case, from my posted patch.
>
> A signal is considered as successfully delivered if at least
> one process receives it.
Yes.
Initially ret = -ESRCH.
Once group_send_sig_info() succeeds at least once (returns zero)
ret becomes 0.
After that
if (ret)
ret = err;
has no effect.
So if a signal is successfully delivered at least once the code
above returns zero.
Oleg.
Oleg Nesterov <oleg@redhat.com> writes:
> On 08/16, Eric W. Biederman wrote:
>>
>> Oleg Nesterov <oleg@redhat.com> writes:
>>
>> > On 08/15, David Laight wrote:
>> >>
>> >> or maybe even:
>> >> } else {
>> >> struct task_struct * p;
>> >> int err;
>> >> ret = -ESRCH;
>> >>
>> >> for_each_process(p) {
>> >> if (task_pid_vnr(p) > 1 &&
>> >> !same_thread_group(p, current)) {
>> >> err = group_send_sig_info(sig, info, p,
>> >> PIDTYPE_MAX);
>> >> if (ret)
>> >> ret = err;
>> >
>> > Hmm, indeed ;)
>> >
>> > and "err" can be declared inside the loop.
>>
>> We can't remove the success case, from my posted patch.
>>
>> A signal is considered as successfully delivered if at least
>> one process receives it.
>
> Yes.
>
> Initially ret = -ESRCH.
>
> Once group_send_sig_info() succeeds at least once (returns zero)
> ret becomes 0.
>
> After that
>
> if (ret)
> ret = err;
>
> has no effect.
>
> So if a signal is successfully delivered at least once the code
> above returns zero.
Point.
We should be consistent and ensure __kill_pgrp_info uses
the same code pattern, otherwise it will be difficult to
see they use the same logic.
Does "if (ret) ret = err;" generate better code than "success |= !err"?
I think for both patterns the reader of the code is going to have to
stop and think about what is going on to understand the logic.
We should probably do something like:
/* 0 for success or the last error */
if (ret)
ret = err;
I am somewhat partial to keeping the variable "success" simply because
while it's computation is clever it's use in generating the result is
not, so it should be more comprehensible code. Plus the variable
success seems not to need a comment just a minute to stare at
the code and confirm it works.
Eric
"Eric W. Biederman" <ebiederm@xmission.com> writes:
> Oleg Nesterov <oleg@redhat.com> writes:
>
>> On 08/16, Eric W. Biederman wrote:
>>>
>>> Oleg Nesterov <oleg@redhat.com> writes:
>>>
>>> > On 08/15, David Laight wrote:
>>> >>
>>> >> or maybe even:
>>> >> } else {
>>> >> struct task_struct * p;
>>> >> int err;
>>> >> ret = -ESRCH;
>>> >>
>>> >> for_each_process(p) {
>>> >> if (task_pid_vnr(p) > 1 &&
>>> >> !same_thread_group(p, current)) {
>>> >> err = group_send_sig_info(sig, info, p,
>>> >> PIDTYPE_MAX);
>>> >> if (ret)
>>> >> ret = err;
>>> >
>>> > Hmm, indeed ;)
>>> >
>>> > and "err" can be declared inside the loop.
>>>
>>> We can't remove the success case, from my posted patch.
>>>
>>> A signal is considered as successfully delivered if at least
>>> one process receives it.
>>
>> Yes.
>>
>> Initially ret = -ESRCH.
>>
>> Once group_send_sig_info() succeeds at least once (returns zero)
>> ret becomes 0.
>>
>> After that
>>
>> if (ret)
>> ret = err;
>>
>> has no effect.
>>
>> So if a signal is successfully delivered at least once the code
>> above returns zero.
>
> Point.
>
> We should be consistent and ensure __kill_pgrp_info uses
> the same code pattern, otherwise it will be difficult to
> see they use the same logic.
>
> Does "if (ret) ret = err;" generate better code than "success |= !err"?
>
I just looked at the assembly output and at least on x86 with cmov
"if (ret) ret = err;" generates the better assembly even in
the inner loop.
> I think for both patterns the reader of the code is going to have to
> stop and think about what is going on to understand the logic.
>
> We should probably do something like:
>
> /* 0 for success or the last error */
> if (ret)
> ret = err;
>
Even with that comment it feels awkward to me.
Does anyone have any idea how to make that idiom more obvious
what is happening?
Eric
On 08/16, Eric W. Biederman wrote:
>
> > We should be consistent and ensure __kill_pgrp_info uses
> > the same code pattern, otherwise it will be difficult to
> > see they use the same logic.
Hmm, agreed.
Then I think we should change __kill_pgrp_info() first, then "copy"
this pattern into kill_something_info() in a separate patch.
> > I think for both patterns the reader of the code is going to have to
> > stop and think about what is going on to understand the logic.
Yes, although to me the current code looks less clear but this is subjective.
But I agree this needs a comment. How about the patch below?
From 753d4edd1f2f21f9f9181b9ff7394ed098d58ff6 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 17 Aug 2023 17:38:55 +0200
Subject: [PATCH] __kill_pgrp_info: simplify the calculation of return value
No need to calculate/check the "success" variable, we can kill it and update
retval in the main loop unless it is zero.
Suggested-by: David Laight <David.Laight@ACULAB.COM>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
kernel/signal.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/kernel/signal.c b/kernel/signal.c
index 128e9bb3d1a2..c0acdfd4c81b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1460,16 +1460,21 @@ int group_send_sig_info(int sig, struct kernel_siginfo *info,
int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp)
{
struct task_struct *p = NULL;
- int retval, success;
+ int ret = -ESRCH;
- success = 0;
- retval = -ESRCH;
do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
int err = group_send_sig_info(sig, info, p, PIDTYPE_PGID);
- success |= !err;
- retval = err;
+ /*
+ * If group_send_sig_info() succeeds at least once ret
+ * becomes 0 and after that the code below has no effect.
+ * Otherwise we return the last err or -ESRCH if this
+ * process group is empty.
+ */
+ if (ret)
+ ret = err;
} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
- return success ? 0 : retval;
+
+ return ret;
}
int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
--
2.25.1.362.g51ebf55
© 2016 - 2025 Red Hat, Inc.