fs/pipe.c | 6 ++++-- include/linux/fs.h | 1 + include/uapi/linux/fs.h | 5 ++++- net/socket.c | 3 +++ 4 files changed, 12 insertions(+), 3 deletions(-)
For a user mode library to avoid generating SIGPIPE signals (e.g.
because this behaviour is not portable across operating systems) is
cumbersome. It is generally bad form to change the process-wide signal
mask in a library, so a local solution is needed instead.
For I/O performed directly using system calls (synchronous or readiness
based asynchronous) this currently involves applying a thread-specific
signal mask before the operation and reverting it afterwards. This can be
avoided when it is known that the file descriptor refers to neither a
pipe nor a socket, but a conservative implementation must always apply
the mask. This incurs the cost of two additional system calls. In the
case of sockets, the existing MSG_NOSIGNAL flag can be used with send.
For asynchronous I/O performed using io_uring, currently the only option
(apart from MSG_NOSIGNAL for sockets), is to mask SIGPIPE entirely in the
call to io_uring_enter. Thankfully io_uring_enter takes a signal mask, so
only a single syscall is needed. However, copying the signal mask on
every call incurs a non-zero performance penalty. Furthermore, this mask
applies to all completions, meaning that if the non-signaling behaviour
is desired only for some subset of operations, the desired signals must
be raised manually from user-mode depending on the completed operation.
Add RWF_NOSIGNAL flag for pwritev2. This flag prevents the SIGPIPE signal
from being raised when writing on disconnected pipes or sockets. The flag
is handled directly by the pipe filesystem and converted to the existing
MSG_NOSIGNAL flag for sockets.
Signed-off-by: Lauri Vasama <git@vasama.org>
---
fs/pipe.c | 6 ++++--
include/linux/fs.h | 1 +
include/uapi/linux/fs.h | 5 ++++-
net/socket.c | 3 +++
4 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/fs/pipe.c b/fs/pipe.c
index 731622d0738d..42fead1efe52 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -458,7 +458,8 @@ anon_pipe_write(struct kiocb *iocb, struct iov_iter *from)
mutex_lock(&pipe->mutex);
if (!pipe->readers) {
- send_sig(SIGPIPE, current, 0);
+ if ((iocb->ki_flags & IOCB_NOSIGNAL) == 0)
+ send_sig(SIGPIPE, current, 0);
ret = -EPIPE;
goto out;
}
@@ -498,7 +499,8 @@ anon_pipe_write(struct kiocb *iocb, struct iov_iter *from)
for (;;) {
if (!pipe->readers) {
- send_sig(SIGPIPE, current, 0);
+ if ((iocb->ki_flags & IOCB_NOSIGNAL) == 0)
+ send_sig(SIGPIPE, current, 0);
if (!ret)
ret = -EPIPE;
break;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..e440c5ae5d99 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -356,6 +356,7 @@ struct readahead_control;
#define IOCB_APPEND (__force int) RWF_APPEND
#define IOCB_ATOMIC (__force int) RWF_ATOMIC
#define IOCB_DONTCACHE (__force int) RWF_DONTCACHE
+#define IOCB_NOSIGNAL (__force int) RWF_NOSIGNAL
/* non-RWF related bits - start at 16 */
#define IOCB_EVENTFD (1 << 16)
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 0bd678a4a10e..beb4c2d1e41c 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -430,10 +430,13 @@ typedef int __bitwise __kernel_rwf_t;
/* buffered IO that drops the cache after reading or writing data */
#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080)
+/* prevent pipe and socket writes from raising SIGPIPE */
+#define RWF_NOSIGNAL ((__force __kernel_rwf_t)0x00000100)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
- RWF_DONTCACHE)
+ RWF_DONTCACHE | RWF_NOSIGNAL)
#define PROCFS_IOCTL_MAGIC 'f'
diff --git a/net/socket.c b/net/socket.c
index 682969deaed3..bac335ecee4c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1176,6 +1176,9 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (sock->type == SOCK_SEQPACKET)
msg.msg_flags |= MSG_EOR;
+ if (iocb->ki_flags & IOCB_NOSIGNAL)
+ msg.msg_flags |= MSG_NOSIGNAL;
+
res = __sock_sendmsg(sock, &msg);
*from = msg.msg_iter;
return res;
base-commit: fab1beda7597fac1cecc01707d55eadb6bbe773c
--
2.43.0
On Wed, 27 Aug 2025 16:39:00 +0300, Lauri Vasama wrote: > For a user mode library to avoid generating SIGPIPE signals (e.g. > because this behaviour is not portable across operating systems) is > cumbersome. It is generally bad form to change the process-wide signal > mask in a library, so a local solution is needed instead. > > For I/O performed directly using system calls (synchronous or readiness > based asynchronous) this currently involves applying a thread-specific > signal mask before the operation and reverting it afterwards. This can be > avoided when it is known that the file descriptor refers to neither a > pipe nor a socket, but a conservative implementation must always apply > the mask. This incurs the cost of two additional system calls. In the > case of sockets, the existing MSG_NOSIGNAL flag can be used with send. > > [...] Applied to the vfs-6.18.misc branch of the vfs/vfs.git tree. Patches in the vfs-6.18.misc branch should appear in linux-next soon. Please report any outstanding bugs that were missed during review in a new review to the original patch series allowing us to drop it. It's encouraged to provide Acked-bys and Reviewed-bys even though the patch has now been applied. If possible patch trailers will be updated. Note that commit hashes shown below are subject to change due to rebase, trailer updates or similar. If in doubt, please check the listed branch. tree: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git branch: vfs-6.18.misc [1/1] Add RWF_NOSIGNAL flag for pwritev2 https://git.kernel.org/vfs/vfs/c/db2ab24a341c
On 8/27/25 7:39 AM, Lauri Vasama wrote: > For a user mode library to avoid generating SIGPIPE signals (e.g. > because this behaviour is not portable across operating systems) is > cumbersome. It is generally bad form to change the process-wide signal > mask in a library, so a local solution is needed instead. > > For I/O performed directly using system calls (synchronous or readiness > based asynchronous) this currently involves applying a thread-specific > signal mask before the operation and reverting it afterwards. This can be > avoided when it is known that the file descriptor refers to neither a > pipe nor a socket, but a conservative implementation must always apply > the mask. This incurs the cost of two additional system calls. In the > case of sockets, the existing MSG_NOSIGNAL flag can be used with send. > > For asynchronous I/O performed using io_uring, currently the only option > (apart from MSG_NOSIGNAL for sockets), is to mask SIGPIPE entirely in the > call to io_uring_enter. Thankfully io_uring_enter takes a signal mask, so > only a single syscall is needed. However, copying the signal mask on > every call incurs a non-zero performance penalty. Furthermore, this mask > applies to all completions, meaning that if the non-signaling behaviour > is desired only for some subset of operations, the desired signals must > be raised manually from user-mode depending on the completed operation. > > Add RWF_NOSIGNAL flag for pwritev2. This flag prevents the SIGPIPE signal > from being raised when writing on disconnected pipes or sockets. The flag > is handled directly by the pipe filesystem and converted to the existing > MSG_NOSIGNAL flag for sockets. LGTM, only curiosity is why this hasn't been added before. Reviewed-by: Jens Axboe <axboe@kernel.dk> -- Jens Axboe
On Wed, Aug 27, 2025 at 04:39:00PM +0300, Lauri Vasama wrote: > For a user mode library to avoid generating SIGPIPE signals (e.g. > because this behaviour is not portable across operating systems) is > cumbersome. It is generally bad form to change the process-wide signal > mask in a library, so a local solution is needed instead. > > For I/O performed directly using system calls (synchronous or readiness > based asynchronous) this currently involves applying a thread-specific > signal mask before the operation and reverting it afterwards. This can be > avoided when it is known that the file descriptor refers to neither a > pipe nor a socket, but a conservative implementation must always apply > the mask. This incurs the cost of two additional system calls. In the > case of sockets, the existing MSG_NOSIGNAL flag can be used with send. > > For asynchronous I/O performed using io_uring, currently the only option > (apart from MSG_NOSIGNAL for sockets), is to mask SIGPIPE entirely in the > call to io_uring_enter. Thankfully io_uring_enter takes a signal mask, so > only a single syscall is needed. However, copying the signal mask on > every call incurs a non-zero performance penalty. Furthermore, this mask > applies to all completions, meaning that if the non-signaling behaviour > is desired only for some subset of operations, the desired signals must > be raised manually from user-mode depending on the completed operation. > > Add RWF_NOSIGNAL flag for pwritev2. This flag prevents the SIGPIPE signal > from being raised when writing on disconnected pipes or sockets. The flag > is handled directly by the pipe filesystem and converted to the existing > MSG_NOSIGNAL flag for sockets. > > Signed-off-by: Lauri Vasama <git@vasama.org> > --- So this makes sense to me. I'll wait for @Jens to chime in, too, before I apply it. > fs/pipe.c | 6 ++++-- > include/linux/fs.h | 1 + > include/uapi/linux/fs.h | 5 ++++- > net/socket.c | 3 +++ > 4 files changed, 12 insertions(+), 3 deletions(-) > > diff --git a/fs/pipe.c b/fs/pipe.c > index 731622d0738d..42fead1efe52 100644 > --- a/fs/pipe.c > +++ b/fs/pipe.c > @@ -458,7 +458,8 @@ anon_pipe_write(struct kiocb *iocb, struct iov_iter *from) > mutex_lock(&pipe->mutex); > > if (!pipe->readers) { > - send_sig(SIGPIPE, current, 0); > + if ((iocb->ki_flags & IOCB_NOSIGNAL) == 0) > + send_sig(SIGPIPE, current, 0); > ret = -EPIPE; > goto out; > } > @@ -498,7 +499,8 @@ anon_pipe_write(struct kiocb *iocb, struct iov_iter *from) > > for (;;) { > if (!pipe->readers) { > - send_sig(SIGPIPE, current, 0); > + if ((iocb->ki_flags & IOCB_NOSIGNAL) == 0) > + send_sig(SIGPIPE, current, 0); > if (!ret) > ret = -EPIPE; > break; > diff --git a/include/linux/fs.h b/include/linux/fs.h > index d7ab4f96d705..e440c5ae5d99 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -356,6 +356,7 @@ struct readahead_control; > #define IOCB_APPEND (__force int) RWF_APPEND > #define IOCB_ATOMIC (__force int) RWF_ATOMIC > #define IOCB_DONTCACHE (__force int) RWF_DONTCACHE > +#define IOCB_NOSIGNAL (__force int) RWF_NOSIGNAL > > /* non-RWF related bits - start at 16 */ > #define IOCB_EVENTFD (1 << 16) > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h > index 0bd678a4a10e..beb4c2d1e41c 100644 > --- a/include/uapi/linux/fs.h > +++ b/include/uapi/linux/fs.h > @@ -430,10 +430,13 @@ typedef int __bitwise __kernel_rwf_t; > /* buffered IO that drops the cache after reading or writing data */ > #define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) > > +/* prevent pipe and socket writes from raising SIGPIPE */ > +#define RWF_NOSIGNAL ((__force __kernel_rwf_t)0x00000100) > + > /* mask of flags supported by the kernel */ > #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ > RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ > - RWF_DONTCACHE) > + RWF_DONTCACHE | RWF_NOSIGNAL) > > #define PROCFS_IOCTL_MAGIC 'f' > > diff --git a/net/socket.c b/net/socket.c > index 682969deaed3..bac335ecee4c 100644 > --- a/net/socket.c > +++ b/net/socket.c > @@ -1176,6 +1176,9 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) > if (sock->type == SOCK_SEQPACKET) > msg.msg_flags |= MSG_EOR; > > + if (iocb->ki_flags & IOCB_NOSIGNAL) > + msg.msg_flags |= MSG_NOSIGNAL; > + > res = __sock_sendmsg(sock, &msg); > *from = msg.msg_iter; > return res; > > base-commit: fab1beda7597fac1cecc01707d55eadb6bbe773c > -- > 2.43.0 >
© 2016 - 2025 Red Hat, Inc.