From: Gang Yan <yangang@kylinos.cn>
We introduce the same handling for potential data races with the
'fully_established' flag in subflow as previously done for
msk->fully_established.
Additionally, we make a crucial change: convert the subflow's
'fully_established' from 'bit_field' to 'bool' type. This is
necessary because methods for avoiding data races don't work well
with 'bit_field'. Specifically, the 'READ_ONCE' needs to know
the size of the variable being accessed, which is not supported in
'bit_field'. Also, 'test_bit' expect the address of 'bit_field'.
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/516
Signed-off-by: Gang Yan <yangang@kylinos.cn>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
---
net/mptcp/diag.c | 2 +-
net/mptcp/options.c | 4 ++--
net/mptcp/protocol.c | 2 +-
net/mptcp/protocol.h | 6 +++---
net/mptcp/subflow.c | 4 ++--
5 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index 2d3efb405437d85c0bca70d7a92ca3a7363365e1..02205f7994d752cc505991efdf7aa0bbbfd830db 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -47,7 +47,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
flags |= MPTCP_SUBFLOW_FLAG_BKUP_REM;
if (sf->request_bkup)
flags |= MPTCP_SUBFLOW_FLAG_BKUP_LOC;
- if (sf->fully_established)
+ if (READ_ONCE(sf->fully_established))
flags |= MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED;
if (sf->conn_finished)
flags |= MPTCP_SUBFLOW_FLAG_CONNECTED;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 370c3836b7712f4ca97c99d35a20e88e85a33d70..1603b3702e2207f191fdeef2b29ea2f05fd2b910 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -461,7 +461,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
return false;
/* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */
- if (subflow->fully_established || snd_data_fin_enable ||
+ if (READ_ONCE(subflow->fully_established) || snd_data_fin_enable ||
subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
sk->sk_state != TCP_ESTABLISHED)
return false;
@@ -930,7 +930,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
/* here we can process OoO, in-window pkts, only in-sequence 4th ack
* will make the subflow fully established
*/
- if (likely(subflow->fully_established)) {
+ if (likely(READ_ONCE(subflow->fully_established))) {
/* on passive sockets, check for 3rd ack retransmission
* note that msk is always set by subflow_syn_recv_sock()
* for mp_join subflows
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1f5c63eb21f0db92341ec941cfe2aec919cdd3de..a6c9661a4c45a00e982d0f68f21621c3cf33469b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3511,7 +3511,7 @@ static void schedule_3rdack_retransmission(struct sock *ssk)
struct tcp_sock *tp = tcp_sk(ssk);
unsigned long timeout;
- if (mptcp_subflow_ctx(ssk)->fully_established)
+ if (READ_ONCE(mptcp_subflow_ctx(ssk)->fully_established))
return;
/* reschedule with a timeout above RTT, as we must look only for drop */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 568a72702b080d7610425ce5c3a409c7b88da13a..a93e661ef5c435155066ce9cc109092661f0711c 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -513,7 +513,6 @@ struct mptcp_subflow_context {
request_bkup : 1,
mp_capable : 1, /* remote is MPTCP capable */
mp_join : 1, /* remote is JOINing */
- fully_established : 1, /* path validated */
pm_notified : 1, /* PM hook called for established status */
conn_finished : 1,
map_valid : 1,
@@ -532,10 +531,11 @@ struct mptcp_subflow_context {
is_mptfo : 1, /* subflow is doing TFO */
close_event_done : 1, /* has done the post-closed part */
mpc_drop : 1, /* the MPC option has been dropped in a rtx */
- __unused : 8;
+ __unused : 9;
bool data_avail;
bool scheduled;
bool pm_listener; /* a listener managed by the kernel PM? */
+ bool fully_established; /* path validated */
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -780,7 +780,7 @@ static inline bool __tcp_can_send(const struct sock *ssk)
static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
/* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
- if (subflow->request_join && !subflow->fully_established)
+ if (subflow->request_join && !READ_ONCE(subflow->fully_established))
return false;
return __tcp_can_send(mptcp_subflow_tcp_sock(subflow));
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 6170f2fff71e4f9d64837f2ebf4d81bba224fafb..860903e0642255cf9efb39da9e24c39f6547481f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -800,7 +800,7 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
const struct mptcp_options_received *mp_opt)
{
subflow_set_remote_key(msk, subflow, mp_opt);
- subflow->fully_established = 1;
+ WRITE_ONCE(subflow->fully_established, true);
WRITE_ONCE(msk->fully_established, true);
if (subflow->is_mptfo)
@@ -2062,7 +2062,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
- new_ctx->fully_established = 1;
+ WRITE_ONCE(new_ctx->fully_established, true);
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
new_ctx->request_bkup = subflow_req->request_bkup;
--
2.45.2
On Mon, Oct 21, 2024 at 05:14:04PM +0200, Matthieu Baerts (NGI0) wrote:
> From: Gang Yan <yangang@kylinos.cn>
>
> We introduce the same handling for potential data races with the
> 'fully_established' flag in subflow as previously done for
> msk->fully_established.
>
> Additionally, we make a crucial change: convert the subflow's
> 'fully_established' from 'bit_field' to 'bool' type. This is
> necessary because methods for avoiding data races don't work well
> with 'bit_field'. Specifically, the 'READ_ONCE' needs to know
> the size of the variable being accessed, which is not supported in
> 'bit_field'. Also, 'test_bit' expect the address of 'bit_field'.
>
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/516
> Signed-off-by: Gang Yan <yangang@kylinos.cn>
> Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
...
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 568a72702b080d7610425ce5c3a409c7b88da13a..a93e661ef5c435155066ce9cc109092661f0711c 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -513,7 +513,6 @@ struct mptcp_subflow_context {
> request_bkup : 1,
> mp_capable : 1, /* remote is MPTCP capable */
> mp_join : 1, /* remote is JOINing */
> - fully_established : 1, /* path validated */
> pm_notified : 1, /* PM hook called for established status */
> conn_finished : 1,
> map_valid : 1,
> @@ -532,10 +531,11 @@ struct mptcp_subflow_context {
> is_mptfo : 1, /* subflow is doing TFO */
> close_event_done : 1, /* has done the post-closed part */
> mpc_drop : 1, /* the MPC option has been dropped in a rtx */
> - __unused : 8;
> + __unused : 9;
> bool data_avail;
> bool scheduled;
> bool pm_listener; /* a listener managed by the kernel PM? */
> + bool fully_established; /* path validated */
> u32 remote_nonce;
> u64 thmac;
> u32 local_nonce;
...
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 6170f2fff71e4f9d64837f2ebf4d81bba224fafb..860903e0642255cf9efb39da9e24c39f6547481f 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -800,7 +800,7 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
> const struct mptcp_options_received *mp_opt)
> {
> subflow_set_remote_key(msk, subflow, mp_opt);
> - subflow->fully_established = 1;
> + WRITE_ONCE(subflow->fully_established, true);
> WRITE_ONCE(msk->fully_established, true);
>
> if (subflow->is_mptfo)
> @@ -2062,7 +2062,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
> } else if (subflow_req->mp_join) {
> new_ctx->ssn_offset = subflow_req->ssn_offset;
> new_ctx->mp_join = 1;
> - new_ctx->fully_established = 1;
> + WRITE_ONCE(new_ctx->fully_established, true);
> new_ctx->remote_key_valid = 1;
> new_ctx->backup = subflow_req->backup;
> new_ctx->request_bkup = subflow_req->request_bkup;
My understanding is that 1) fully_established is now a single byte and
2) WRITE_ONCE is not necessary for a single byte, as if I understand Eric's
comment in [1] correctly, tearing is not possible in this case.
[1] https://lore.kernel.org/netdev/CANn89i+8myPgn61bn7DBqcnK5kXX2XvPo2oc2TfzntPUkeqQ6w@mail.gmail.com/
Hi Simon,
Thank you for the review!
On 25/10/2024 11:55, Simon Horman wrote:
> On Mon, Oct 21, 2024 at 05:14:04PM +0200, Matthieu Baerts (NGI0) wrote:
>> From: Gang Yan <yangang@kylinos.cn>
>>
>> We introduce the same handling for potential data races with the
>> 'fully_established' flag in subflow as previously done for
>> msk->fully_established.
>>
>> Additionally, we make a crucial change: convert the subflow's
>> 'fully_established' from 'bit_field' to 'bool' type. This is
>> necessary because methods for avoiding data races don't work well
>> with 'bit_field'. Specifically, the 'READ_ONCE' needs to know
>> the size of the variable being accessed, which is not supported in
>> 'bit_field'. Also, 'test_bit' expect the address of 'bit_field'.
>>
>> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/516
>> Signed-off-by: Gang Yan <yangang@kylinos.cn>
>> Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
>> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
>
> ...
>
>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>> index 568a72702b080d7610425ce5c3a409c7b88da13a..a93e661ef5c435155066ce9cc109092661f0711c 100644
>> --- a/net/mptcp/protocol.h
>> +++ b/net/mptcp/protocol.h
>> @@ -513,7 +513,6 @@ struct mptcp_subflow_context {
>> request_bkup : 1,
>> mp_capable : 1, /* remote is MPTCP capable */
>> mp_join : 1, /* remote is JOINing */
>> - fully_established : 1, /* path validated */
>> pm_notified : 1, /* PM hook called for established status */
>> conn_finished : 1,
>> map_valid : 1,
>> @@ -532,10 +531,11 @@ struct mptcp_subflow_context {
>> is_mptfo : 1, /* subflow is doing TFO */
>> close_event_done : 1, /* has done the post-closed part */
>> mpc_drop : 1, /* the MPC option has been dropped in a rtx */
>> - __unused : 8;
>> + __unused : 9;
>> bool data_avail;
>> bool scheduled;
>> bool pm_listener; /* a listener managed by the kernel PM? */
>> + bool fully_established; /* path validated */
>> u32 remote_nonce;
>> u64 thmac;
>> u32 local_nonce;
>
> ...
>
>> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
>> index 6170f2fff71e4f9d64837f2ebf4d81bba224fafb..860903e0642255cf9efb39da9e24c39f6547481f 100644
>> --- a/net/mptcp/subflow.c
>> +++ b/net/mptcp/subflow.c
>> @@ -800,7 +800,7 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
>> const struct mptcp_options_received *mp_opt)
>> {
>> subflow_set_remote_key(msk, subflow, mp_opt);
>> - subflow->fully_established = 1;
>> + WRITE_ONCE(subflow->fully_established, true);
>> WRITE_ONCE(msk->fully_established, true);
>>
>> if (subflow->is_mptfo)
>> @@ -2062,7 +2062,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
>> } else if (subflow_req->mp_join) {
>> new_ctx->ssn_offset = subflow_req->ssn_offset;
>> new_ctx->mp_join = 1;
>> - new_ctx->fully_established = 1;
>> + WRITE_ONCE(new_ctx->fully_established, true);
>> new_ctx->remote_key_valid = 1;
>> new_ctx->backup = subflow_req->backup;
>> new_ctx->request_bkup = subflow_req->request_bkup;
>
> My understanding is that 1) fully_established is now a single byte and
> 2) WRITE_ONCE is not necessary for a single byte, as if I understand Eric's
> comment in [1] correctly, tearing is not possible in this case.
Good point, I appreciate this note, I didn't realise it was always not
necessary to use it for a single byte!
Just to be sure: is it an issue to keep them?
I mean: here, we are not in the fast path, and I think it "feels" better
to see WRITE_ONCE() being used when all the readers use READ_ONCE(). Do
you see what I mean? Not to have to think "strange, no WRITE_ONCE() here
; oh but that's fine here because it is a single byte when I look at its
definition".
Also, many other single byte variables in MPTCP structures are being
used with WRITE_ONCE(): "msk->fully_established" (used just above), but
also the other booleans declared above the new one in the subflow
context structure, and in other structures declared in protocol.h.
(Note that WRITE_ONCE() could also be a NOOP when used with a single
byte to keep the consistency, if it is always useless in this case.)
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
© 2016 - 2026 Red Hat, Inc.