net/tls/tls_main.c | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-)
proto->close is normally called from a userspace task which can be
interrupted by signals. When asynchronous encryption is used then KTLS
sends out the final data at close time. When a signal comes in during
close then it can happen tcp_sendmsg_locked() is interrupted by that
signal while waiting for memory in sk_stream_wait_memory() which then
returns with -ERSTARTSYS. It is not possible to recover from this situation
and the final transmit data is lost.
With this patch we defer the close operation to a kernel task which
doesn't get signals.
The described situation happens when KTLS is used in conjunction with
io_uring, as io_uring uses task_work_add() to add work to the current
userspace task.
The problem is discussed in [1] and [2] and the solution implemented in
this patch is suggested by Pavel Begunkov here [3]
[1] https://lore.kernel.org/all/20231010141932.GD3114228@pengutronix.de/
[2] https://lore.kernel.org/all/20240315100159.3898944-1-s.hauer@pengutronix.de/
[3] https://lore.kernel.org/all/bfc6afa9-501f-40b6-929a-3aa8c0298265@gmail.com
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
Note I only need this asynchronous close for the
ctx->tx_conf == TLS_SW case. I can refactor the patch to only go
asynchronous when necessary if that's desired.
---
net/tls/tls_main.c | 37 +++++++++++++++++++++++++++++++------
1 file changed, 31 insertions(+), 6 deletions(-)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b4674f03d71a9..b0b7e0d2f1145 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -365,16 +365,21 @@ static void tls_sk_proto_cleanup(struct sock *sk,
}
}
-static void tls_sk_proto_close(struct sock *sk, long timeout)
+struct tls_close_work {
+ struct work_struct work;
+ struct tls_context *ctx;
+ long timeout;
+};
+
+static void deferred_close(struct work_struct *work)
{
+ struct tls_close_work *cw = container_of(work, struct tls_close_work, work);
+ struct tls_context *ctx = cw->ctx;
+ struct sock *sk = ctx->sk;
struct inet_connection_sock *icsk = inet_csk(sk);
- struct tls_context *ctx = tls_get_ctx(sk);
long timeo = sock_sndtimeo(sk, 0);
bool free_ctx;
- if (ctx->tx_conf == TLS_SW)
- tls_sw_cancel_work_tx(ctx);
-
lock_sock(sk);
free_ctx = ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW;
@@ -395,10 +400,30 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
tls_sw_strparser_done(ctx);
if (ctx->rx_conf == TLS_SW)
tls_sw_free_ctx_rx(ctx);
- ctx->sk_proto->close(sk, timeout);
+ ctx->sk_proto->close(sk, cw->timeout);
if (free_ctx)
tls_ctx_free(sk, ctx);
+
+ kfree(cw);
+}
+
+static void tls_sk_proto_close(struct sock *sk, long timeout)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ struct tls_close_work *cw;
+
+ if (ctx->tx_conf == TLS_SW)
+ tls_sw_cancel_work_tx(ctx);
+
+ cw = kmalloc(sizeof(*cw), GFP_KERNEL);
+ if (!cw)
+ return;
+
+ INIT_WORK(&cw->work, deferred_close);
+ cw->timeout = timeout;
+ cw->ctx = ctx;
+ queue_work(system_unbound_wq, &cw->work);
}
static __poll_t tls_sk_poll(struct file *file, struct socket *sock,
---
base-commit: fec50db7033ea478773b159e0e2efb135270e3b7
change-id: 20240410-ktls-defer-close-002934564b09
Best regards,
--
Sascha Hauer <s.hauer@pengutronix.de>
On Wed, 10 Apr 2024 08:33:07 +0200 Sascha Hauer wrote: > proto->close is normally called from a userspace task which can be > interrupted by signals. When asynchronous encryption is used then KTLS > sends out the final data at close time. When a signal comes in during > close then it can happen tcp_sendmsg_locked() is interrupted by that > signal while waiting for memory in sk_stream_wait_memory() which then > returns with -ERSTARTSYS. It is not possible to recover from this situation > and the final transmit data is lost. > > With this patch we defer the close operation to a kernel task which > doesn't get signals. > > The described situation happens when KTLS is used in conjunction with > io_uring, as io_uring uses task_work_add() to add work to the current > userspace task. > > The problem is discussed in [1] and [2] and the solution implemented in > this patch is suggested by Pavel Begunkov here [3] Appears to crash reliably. Please run the tls selftests with KASAN enabled. -- pw-bot: cr
On Wed, Apr 10, 2024 at 06:11:28AM -0700, Jakub Kicinski wrote: > On Wed, 10 Apr 2024 08:33:07 +0200 Sascha Hauer wrote: > > proto->close is normally called from a userspace task which can be > > interrupted by signals. When asynchronous encryption is used then KTLS > > sends out the final data at close time. When a signal comes in during > > close then it can happen tcp_sendmsg_locked() is interrupted by that > > signal while waiting for memory in sk_stream_wait_memory() which then > > returns with -ERSTARTSYS. It is not possible to recover from this situation > > and the final transmit data is lost. > > > > With this patch we defer the close operation to a kernel task which > > doesn't get signals. > > > > The described situation happens when KTLS is used in conjunction with > > io_uring, as io_uring uses task_work_add() to add work to the current > > userspace task. > > > > The problem is discussed in [1] and [2] and the solution implemented in > > this patch is suggested by Pavel Begunkov here [3] > > Appears to crash reliably. > Please run the tls selftests with KASAN enabled. Oops, will run the tests and fix the fallout before resending. Sascha -- Pengutronix e.K. | | Steuerwalder Str. 21 | http://www.pengutronix.de/ | 31137 Hildesheim, Germany | Phone: +49-5121-206917-0 | Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
© 2016 - 2026 Red Hat, Inc.