[PATCH mptcp-next 2/5] mptcp: annotate lockless access for the tx path

Paolo Abeni posted 5 patches 9 months ago
There is a newer version of this series
[PATCH mptcp-next 2/5] mptcp: annotate lockless access for the tx path
Posted by Paolo Abeni 9 months ago
The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under
the msk socket lock protection, and are accessed lockless in a few spots.

Always mark the write operations with WRITE_ONCE, read operations
outside the lock with READ_ONCE and drop the annotation for read
under such lock.

To simplify the annotations move mptcp_pending_data_fin_ack() from
__mptcp_data_acked() to __mptcp_clean_una(), under the msk socket
lock, where such call would belong.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/options.c  |  2 +-
 net/mptcp/protocol.c | 14 ++++++--------
 net/mptcp/protocol.h |  2 +-
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index cebbd0bc32aa..51b00d7e7c89 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
 		msk->wnd_end = new_wnd_end;
 
 	/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
-	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
+	if (after64(msk->wnd_end, snd_nxt))
 		__mptcp_check_push(sk, ssk);
 
 	if (after64(new_snd_una, old_snd_una)) {
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 9ad672a10c11..679d4576d2c1 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1033,13 +1033,14 @@ static void __mptcp_clean_una(struct sock *sk)
 		msk->recovery = false;
 
 out:
-	if (snd_una == READ_ONCE(msk->snd_nxt) &&
-	    snd_una == READ_ONCE(msk->write_seq)) {
+	if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) {
 		if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
 			mptcp_stop_rtx_timer(sk);
 	} else {
 		mptcp_reset_rtx_timer(sk);
 	}
+	if (mptcp_pending_data_fin_ack(sk))
+		mptcp_schedule_work(sk);
 }
 
 static void __mptcp_clean_una_wakeup(struct sock *sk)
@@ -1499,7 +1500,7 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
 	 */
 	if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
 		msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
-		msk->snd_nxt = snd_nxt_new;
+		WRITE_ONCE(msk->snd_nxt, snd_nxt_new);
 	}
 }
 
@@ -3210,8 +3211,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
 	if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
 		WRITE_ONCE(msk->csum_enabled, true);
 
-	msk->write_seq = subflow_req->idsn + 1;
-	msk->snd_nxt = msk->write_seq;
+	WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1);
+	WRITE_ONCE(msk->snd_nxt, msk->write_seq);
 	msk->snd_una = msk->write_seq;
 	msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
 	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
@@ -3316,9 +3317,6 @@ void __mptcp_data_acked(struct sock *sk)
 		__mptcp_clean_una(sk);
 	else
 		__set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
-
-	if (mptcp_pending_data_fin_ack(sk))
-		mptcp_schedule_work(sk);
 }
 
 void __mptcp_check_push(struct sock *sk, struct sock *ssk)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3af85643328e..d05ec76dd7c2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -402,7 +402,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
-	if (msk->snd_una == READ_ONCE(msk->snd_nxt))
+	if (msk->snd_una == msk->snd_nxt)
 		return NULL;
 
 	return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
-- 
2.43.0
Re: [PATCH mptcp-next 2/5] mptcp: annotate lockless access for the tx path
Posted by Mat Martineau 9 months ago
On Tue, 16 Jan 2024, Paolo Abeni wrote:

> The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under
> the msk socket lock protection, and are accessed lockless in a few spots.
>
> Always mark the write operations with WRITE_ONCE, read operations
> outside the lock with READ_ONCE and drop the annotation for read
> under such lock.
>
> To simplify the annotations move mptcp_pending_data_fin_ack() from
> __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket
> lock, where such call would belong.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> net/mptcp/options.c  |  2 +-
> net/mptcp/protocol.c | 14 ++++++--------
> net/mptcp/protocol.h |  2 +-
> 3 files changed, 8 insertions(+), 10 deletions(-)
>
> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> index cebbd0bc32aa..51b00d7e7c89 100644
> --- a/net/mptcp/options.c
> +++ b/net/mptcp/options.c
> @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
> 		msk->wnd_end = new_wnd_end;
>
> 	/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
> -	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
> +	if (after64(msk->wnd_end, snd_nxt))

Is it ok to remove this READ_ONCE()? The snd_nxt local is stored before 
the data_lock is acquired.

- Mat

> 		__mptcp_check_push(sk, ssk);
>
> 	if (after64(new_snd_una, old_snd_una)) {
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 9ad672a10c11..679d4576d2c1 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1033,13 +1033,14 @@ static void __mptcp_clean_una(struct sock *sk)
> 		msk->recovery = false;
>
> out:
> -	if (snd_una == READ_ONCE(msk->snd_nxt) &&
> -	    snd_una == READ_ONCE(msk->write_seq)) {
> +	if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) {
> 		if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
> 			mptcp_stop_rtx_timer(sk);
> 	} else {
> 		mptcp_reset_rtx_timer(sk);
> 	}
> +	if (mptcp_pending_data_fin_ack(sk))
> +		mptcp_schedule_work(sk);
> }
>
> static void __mptcp_clean_una_wakeup(struct sock *sk)
> @@ -1499,7 +1500,7 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
> 	 */
> 	if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
> 		msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
> -		msk->snd_nxt = snd_nxt_new;
> +		WRITE_ONCE(msk->snd_nxt, snd_nxt_new);
> 	}
> }
>
> @@ -3210,8 +3211,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
> 	if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
> 		WRITE_ONCE(msk->csum_enabled, true);
>
> -	msk->write_seq = subflow_req->idsn + 1;
> -	msk->snd_nxt = msk->write_seq;
> +	WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1);
> +	WRITE_ONCE(msk->snd_nxt, msk->write_seq);
> 	msk->snd_una = msk->write_seq;
> 	msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
> 	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
> @@ -3316,9 +3317,6 @@ void __mptcp_data_acked(struct sock *sk)
> 		__mptcp_clean_una(sk);
> 	else
> 		__set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
> -
> -	if (mptcp_pending_data_fin_ack(sk))
> -		mptcp_schedule_work(sk);
> }
>
> void __mptcp_check_push(struct sock *sk, struct sock *ssk)
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 3af85643328e..d05ec76dd7c2 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -402,7 +402,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk)
> {
> 	struct mptcp_sock *msk = mptcp_sk(sk);
>
> -	if (msk->snd_una == READ_ONCE(msk->snd_nxt))
> +	if (msk->snd_una == msk->snd_nxt)
> 		return NULL;
>
> 	return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
> -- 
> 2.43.0
>
>
>
Re: [PATCH mptcp-next 2/5] mptcp: annotate lockless access for the tx path
Posted by Paolo Abeni 9 months ago
On Thu, 2024-01-18 at 17:03 -0800, Mat Martineau wrote:
> On Tue, 16 Jan 2024, Paolo Abeni wrote:
> 
> > The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under
> > the msk socket lock protection, and are accessed lockless in a few spots.
> > 
> > Always mark the write operations with WRITE_ONCE, read operations
> > outside the lock with READ_ONCE and drop the annotation for read
> > under such lock.
> > 
> > To simplify the annotations move mptcp_pending_data_fin_ack() from
> > __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket
> > lock, where such call would belong.
> > 
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> > net/mptcp/options.c  |  2 +-
> > net/mptcp/protocol.c | 14 ++++++--------
> > net/mptcp/protocol.h |  2 +-
> > 3 files changed, 8 insertions(+), 10 deletions(-)
> > 
> > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > index cebbd0bc32aa..51b00d7e7c89 100644
> > --- a/net/mptcp/options.c
> > +++ b/net/mptcp/options.c
> > @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
> > 		msk->wnd_end = new_wnd_end;
> > 
> > 	/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
> > -	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
> > +	if (after64(msk->wnd_end, snd_nxt))
> 
> Is it ok to remove this READ_ONCE()? The snd_nxt local is stored before 
> the data_lock is acquired.

You are right, READ_ONCE is required here. This is outside the msk
socket lock. I'll send a v2.

Thanks!

Paolo
Re: [PATCH mptcp-next 2/5] mptcp: annotate lockless access for the tx path
Posted by Paolo Abeni 8 months, 4 weeks ago
On Sun, 2024-01-21 at 22:47 +0100, Paolo Abeni wrote:
> On Thu, 2024-01-18 at 17:03 -0800, Mat Martineau wrote:
> > On Tue, 16 Jan 2024, Paolo Abeni wrote:
> > 
> > > The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under
> > > the msk socket lock protection, and are accessed lockless in a few spots.
> > > 
> > > Always mark the write operations with WRITE_ONCE, read operations
> > > outside the lock with READ_ONCE and drop the annotation for read
> > > under such lock.
> > > 
> > > To simplify the annotations move mptcp_pending_data_fin_ack() from
> > > __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket
> > > lock, where such call would belong.
> > > 
> > > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > > ---
> > > net/mptcp/options.c  |  2 +-
> > > net/mptcp/protocol.c | 14 ++++++--------
> > > net/mptcp/protocol.h |  2 +-
> > > 3 files changed, 8 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > > index cebbd0bc32aa..51b00d7e7c89 100644
> > > --- a/net/mptcp/options.c
> > > +++ b/net/mptcp/options.c
> > > @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
> > > 		msk->wnd_end = new_wnd_end;
> > > 
> > > 	/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
> > > -	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
> > > +	if (after64(msk->wnd_end, snd_nxt))
> > 
> > Is it ok to remove this READ_ONCE()? The snd_nxt local is stored before 
> > the data_lock is acquired.
> 
> You are right, READ_ONCE is required here. This is outside the msk
> socket lock. I'll send a v2.

Oops, sorry I was too hasty. After a better re-read I think this patch
is correct.

We don't need to re-read snd_nxt inside the data_lock:
- the 'new' value will still _not_ be any more accurate, as the lock
protecting such lock is the msk _socket_ lock.
- we don't have any data dependency with others msk fields

The READ_ONCE annotation is there just to make the fuzzer happy and
avoid reporting data races, we don't need to issue the read operation
multiple time.

Please let me know if the above makes sense to you!

Cheers,

Paolo
Re: [PATCH mptcp-next 2/5] mptcp: annotate lockless access for the tx path
Posted by Mat Martineau 8 months, 4 weeks ago
On Mon, 22 Jan 2024, Paolo Abeni wrote:

> On Sun, 2024-01-21 at 22:47 +0100, Paolo Abeni wrote:
>> On Thu, 2024-01-18 at 17:03 -0800, Mat Martineau wrote:
>>> On Tue, 16 Jan 2024, Paolo Abeni wrote:
>>>
>>>> The mptcp-level TX path info (write_seq, bytes_sent, snd_nxt) are under
>>>> the msk socket lock protection, and are accessed lockless in a few spots.
>>>>
>>>> Always mark the write operations with WRITE_ONCE, read operations
>>>> outside the lock with READ_ONCE and drop the annotation for read
>>>> under such lock.
>>>>
>>>> To simplify the annotations move mptcp_pending_data_fin_ack() from
>>>> __mptcp_data_acked() to __mptcp_clean_una(), under the msk socket
>>>> lock, where such call would belong.
>>>>
>>>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>>>> ---
>>>> net/mptcp/options.c  |  2 +-
>>>> net/mptcp/protocol.c | 14 ++++++--------
>>>> net/mptcp/protocol.h |  2 +-
>>>> 3 files changed, 8 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
>>>> index cebbd0bc32aa..51b00d7e7c89 100644
>>>> --- a/net/mptcp/options.c
>>>> +++ b/net/mptcp/options.c
>>>> @@ -1061,7 +1061,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
>>>> 		msk->wnd_end = new_wnd_end;
>>>>
>>>> 	/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
>>>> -	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
>>>> +	if (after64(msk->wnd_end, snd_nxt))
>>>
>>> Is it ok to remove this READ_ONCE()? The snd_nxt local is stored before
>>> the data_lock is acquired.
>>
>> You are right, READ_ONCE is required here. This is outside the msk
>> socket lock. I'll send a v2.
>
> Oops, sorry I was too hasty. After a better re-read I think this patch
> is correct.
>
> We don't need to re-read snd_nxt inside the data_lock:
> - the 'new' value will still _not_ be any more accurate, as the lock
> protecting such lock is the msk _socket_ lock.
> - we don't have any data dependency with others msk fields
>
> The READ_ONCE annotation is there just to make the fuzzer happy and
> avoid reporting data races, we don't need to issue the read operation
> multiple time.
>
> Please let me know if the above makes sense to you!
>

Yeah, that makes sense. Also, the only (incredibly rare) consequence of a 
stale snd_nxt here could be an unnecessary call to __mptcp_check_push(), 
which is not a problem.

- Mat