[PATCH mptcp-next v21 5/7] mptcp: add mptcp_update_dfrags

Geliang Tang posted 7 patches 3 years, 2 months ago
Maintainers: Mat Martineau <mathew.j.martineau@linux.intel.com>, Matthieu Baerts <matthieu.baerts@tessares.net>, "David S. Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>, Andrii Nakryiko <andrii@kernel.org>, Mykola Lysenko <mykolal@fb.com>, Alexei Starovoitov <ast@kernel.org>, Daniel Borkmann <daniel@iogearbox.net>, Martin KaFai Lau <martin.lau@linux.dev>, Song Liu <song@kernel.org>, Yonghong Song <yhs@fb.com>, John Fastabend <john.fastabend@gmail.com>, KP Singh <kpsingh@kernel.org>, Stanislav Fomichev <sdf@google.com>, Hao Luo <haoluo@google.com>, Jiri Olsa <jolsa@kernel.org>, Shuah Khan <shuah@kernel.org>
[PATCH mptcp-next v21 5/7] mptcp: add mptcp_update_dfrags
Posted by Geliang Tang 3 years, 2 months ago
Add a new helper mptcp_next_frag() to get the next dfrag of the given
dfrag.

Add new members first and last in struct mptcp_sendmsg_info, to point
to the first and last sent dfrag in the dfrags iteration loop in
__subflow_push_pending().

Invoke the new helper mptcp_update_dfrags() to iterate the dfrags from
info->first to info->last to update already_sent of every dfrag to 0
in the redundant sends retrans path.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/protocol.c | 28 ++++++++++++++++++++++++++++
 net/mptcp/protocol.h | 13 ++++++++++---
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3fcd6af96721..23116b0840ad 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -992,6 +992,12 @@ static void __mptcp_clean_una(struct sock *sk)
 		msk->snd_una = READ_ONCE(msk->snd_nxt);
 
 	snd_una = msk->snd_una;
+	/* Fix this:
+	 * ------------[ cut here ]------------
+	 * WARNING: CPU: 6 PID: 3007 at net/mptcp/protocol.c:1003 __mptcp_clean_una+0x243/0x2b0
+	 */
+	if (msk->first_pending)
+		goto out;
 	list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
 		if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
 			break;
@@ -1112,6 +1118,7 @@ struct mptcp_sendmsg_info {
 	u16 sent;
 	unsigned int flags;
 	bool data_lock_held;
+	struct mptcp_data_frag *first, *last;
 };
 
 static int mptcp_check_allowed_size(const struct mptcp_sock *msk, struct sock *ssk,
@@ -1502,6 +1509,23 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
 		msk->snd_nxt = snd_nxt_new;
 }
 
+static void mptcp_update_dfrags(struct sock *sk, struct mptcp_sendmsg_info *info)
+{
+	struct mptcp_data_frag *dfrag = info->first;
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
+	if (!dfrag)
+		return;
+
+	do {
+		dfrag->already_sent = 0;
+		if (dfrag == info->last)
+			break;
+	} while ((dfrag = mptcp_next_frag(sk, dfrag)));
+
+	WRITE_ONCE(msk->first_pending, info->first);
+}
+
 void mptcp_check_and_set_pending(struct sock *sk)
 {
 	if (mptcp_send_head(sk))
@@ -1515,10 +1539,12 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
 	struct mptcp_data_frag *dfrag;
 	int len, copied = 0, err = 0;
 
+	info->first = mptcp_send_head(sk);
 	while ((dfrag = mptcp_send_head(sk))) {
 		info->sent = dfrag->already_sent;
 		info->limit = dfrag->data_len;
 		len = dfrag->data_len - dfrag->already_sent;
+		info->last = dfrag;
 		while (len > 0) {
 			int ret = 0;
 
@@ -1572,6 +1598,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 				if (i > 0) {
 					if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags))
 						mptcp_schedule_work(sk);
+					mptcp_update_dfrags(sk, &info);
 					goto out;
 				}
 
@@ -1657,6 +1684,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
 				if (i > 0) {
 					if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags))
 						mptcp_schedule_work(sk);
+					mptcp_update_dfrags(sk, &info);
 					goto out;
 				}
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index adfb758a842f..782dcfd55429 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -358,16 +358,23 @@ static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
 	return READ_ONCE(msk->first_pending);
 }
 
-static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)
+static inline struct mptcp_data_frag *mptcp_next_frag(const struct sock *sk,
+						      struct mptcp_data_frag *cur)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
-	struct mptcp_data_frag *cur;
 
-	cur = msk->first_pending;
+	if (!cur)
+		return NULL;
+
 	return list_is_last(&cur->list, &msk->rtx_queue) ? NULL :
 						     list_next_entry(cur, list);
 }
 
+static inline struct mptcp_data_frag *mptcp_send_next(const struct sock *sk)
+{
+	return mptcp_next_frag(sk, mptcp_send_head(sk));
+}
+
 static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
-- 
2.35.3
Re: [PATCH mptcp-next v21 5/7] mptcp: add mptcp_update_dfrags
Posted by Mat Martineau 3 years, 2 months ago
On Mon, 28 Nov 2022, Geliang Tang wrote:

> Add a new helper mptcp_next_frag() to get the next dfrag of the given
> dfrag.
>
> Add new members first and last in struct mptcp_sendmsg_info, to point
> to the first and last sent dfrag in the dfrags iteration loop in
> __subflow_push_pending().
>
> Invoke the new helper mptcp_update_dfrags() to iterate the dfrags from
> info->first to info->last to update already_sent of every dfrag to 0
> in the redundant sends retrans path.
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/protocol.c | 28 ++++++++++++++++++++++++++++
> net/mptcp/protocol.h | 13 ++++++++++---
> 2 files changed, 38 insertions(+), 3 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 3fcd6af96721..23116b0840ad 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -992,6 +992,12 @@ static void __mptcp_clean_una(struct sock *sk)
> 		msk->snd_una = READ_ONCE(msk->snd_nxt);
>
> 	snd_una = msk->snd_una;
> +	/* Fix this:
> +	 * ------------[ cut here ]------------
> +	 * WARNING: CPU: 6 PID: 3007 at net/mptcp/protocol.c:1003 __mptcp_clean_una+0x243/0x2b0
> +	 */
> +	if (msk->first_pending)
> +		goto out;

Is this triggered by WARN_ON_ONCE(!msk->recovery)?


I looked at my v20 comment on this patch again, and I could have been 
clearer about the issues with modifying dfrag->already_sent:

When called from __mptcp_push_pending() (with the msk lock held), dfrags 
can be modified more freely because the call to __mptcp_clean_una() is 
deferred using msk->cb_flags.

The tricky part is handling dfrag updates when using 
__mptcp_subflow_push_pending(). On this code path, the msk lock is not 
held continuously when doing a redundant send on multiple subflows. This 
means __mptcp_clean_una() can run between each subflow send. When this 
happens, if dfrag->already_sent == 0 then __mptcp_clean_una() doesn't know 
what to do.

I think it's important to find a way to do redundant sends with 
__mptcp_subflow_push_pending() without having to change 
dfrag->already_sent. After the data is sent for the first time and the msk 
lock is released, dfrag->already_sent should not be set back to 0 so 
__mptcp_clean_una() can work correctly if a DATA_ACK arrives.

One way to solve this is to have the scheduler set sequence numbers to 
limit the payload sent on the scheduled subflows. Right now, the scheduler 
tells the MPTCP core which subflows to send on, but does not have any 
limit on how much data to send. If you replaced msk->snd_burst with 
msk->sched_seq_start and msk->sched_seq_end and have the scheduler set 
those values for the current scheduled transmits, that could be used 
instead of dfrag->already_sent. If a DATA_ACK has already cleared the 
received and acknowledged dfrags, then the redundant sends on the 
__mptcp_subflow_push_pending() code path don't even need to transmit the 
data.

What do you think? I am very open to better ideas too!

Thanks,

Mat



> 	list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
> 		if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
> 			break;
> @@ -1112,6 +1118,7 @@ struct mptcp_sendmsg_info {
> 	u16 sent;
> 	unsigned int flags;
> 	bool data_lock_held;
> +	struct mptcp_data_frag *first, *last;
> };
>
> static int mptcp_check_allowed_size(const struct mptcp_sock *msk, struct sock *ssk,
> @@ -1502,6 +1509,23 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
> 		msk->snd_nxt = snd_nxt_new;
> }
>
> +static void mptcp_update_dfrags(struct sock *sk, struct mptcp_sendmsg_info *info)
> +{
> +	struct mptcp_data_frag *dfrag = info->first;
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +
> +	if (!dfrag)
> +		return;
> +
> +	do {
> +		dfrag->already_sent = 0;
> +		if (dfrag == info->last)
> +			break;
> +	} while ((dfrag = mptcp_next_frag(sk, dfrag)));
> +
> +	WRITE_ONCE(msk->first_pending, info->first);
> +}
> +
> void mptcp_check_and_set_pending(struct sock *sk)
> {
> 	if (mptcp_send_head(sk))
> @@ -1515,10 +1539,12 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
> 	struct mptcp_data_frag *dfrag;
> 	int len, copied = 0, err = 0;
>
> +	info->first = mptcp_send_head(sk);
> 	while ((dfrag = mptcp_send_head(sk))) {
> 		info->sent = dfrag->already_sent;
> 		info->limit = dfrag->data_len;
> 		len = dfrag->data_len - dfrag->already_sent;
> +		info->last = dfrag;
> 		while (len > 0) {
> 			int ret = 0;
>
> @@ -1572,6 +1598,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> 				if (i > 0) {
> 					if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags))
> 						mptcp_schedule_work(sk);
> +					mptcp_update_dfrags(sk, &info);
> 					goto out;
> 				}
>
> @@ -1657,6 +1684,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
> 				if (i > 0) {
> 					if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags))
> 						mptcp_schedule_work(sk);
> +					mptcp_update_dfrags(sk, &info);
> 					goto out;
> 				}
>
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index adfb758a842f..782dcfd55429 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -358,16 +358,23 @@ static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
> 	return READ_ONCE(msk->first_pending);
> }
>
> -static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)
> +static inline struct mptcp_data_frag *mptcp_next_frag(const struct sock *sk,
> +						      struct mptcp_data_frag *cur)
> {
> 	struct mptcp_sock *msk = mptcp_sk(sk);
> -	struct mptcp_data_frag *cur;
>
> -	cur = msk->first_pending;
> +	if (!cur)
> +		return NULL;
> +
> 	return list_is_last(&cur->list, &msk->rtx_queue) ? NULL :
> 						     list_next_entry(cur, list);
> }
>
> +static inline struct mptcp_data_frag *mptcp_send_next(const struct sock *sk)
> +{
> +	return mptcp_next_frag(sk, mptcp_send_head(sk));
> +}
> +
> static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
> {
> 	struct mptcp_sock *msk = mptcp_sk(sk);
> -- 
> 2.35.3
>
>
>

--
Mat Martineau
Intel