[PATCH mptcp-next v20 5/7] mptcp: delay updating already_sent

Geliang Tang posted 7 patches 3 years, 1 month ago
There is a newer version of this series
[PATCH mptcp-next v20 5/7] mptcp: delay updating already_sent
Posted by Geliang Tang 3 years, 1 month ago
This patch adds a new member sent in struct mptcp_data_frag, save
info->sent in it, to support delay updating already_sent of dfrags
until all data is sent.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/protocol.c | 18 ++++++++++++++++--
 net/mptcp/protocol.h |  1 +
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4c249d1b9ec6..a1007751bd4d 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1099,6 +1099,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
 	dfrag->data_seq = msk->write_seq;
 	dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
 	dfrag->offset = offset + sizeof(struct mptcp_data_frag);
+	dfrag->sent = 0;
 	dfrag->already_sent = 0;
 	dfrag->page = pfrag->page;
 
@@ -1484,11 +1485,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
 {
 	u64 snd_nxt_new = dfrag->data_seq;
 
-	dfrag->already_sent += sent;
+	dfrag->sent += sent;
 
 	msk->snd_burst -= sent;
 
-	snd_nxt_new += dfrag->already_sent;
+	snd_nxt_new += dfrag->sent;
 
 	/* snd_nxt_new can be smaller than snd_nxt in case mptcp
 	 * is recovering after a failover. In that event, this re-sends
@@ -1513,6 +1514,18 @@ static void mptcp_update_first_pending(struct sock *sk, struct mptcp_sendmsg_inf
 
 static void mptcp_update_dfrags(struct sock *sk, struct mptcp_sendmsg_info *info)
 {
+	struct mptcp_data_frag *dfrag = mptcp_send_head(sk);
+
+	if (!dfrag)
+		return;
+
+	do {
+		if (dfrag->sent) {
+			dfrag->already_sent = max(dfrag->already_sent, dfrag->sent);
+			dfrag->sent = 0;
+		}
+	} while ((dfrag = mptcp_next_frag(sk, dfrag)));
+
 	mptcp_update_first_pending(sk, info);
 }
 
@@ -1539,6 +1552,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
 		info->sent = dfrag->already_sent;
 		info->limit = dfrag->data_len;
 		len = dfrag->data_len - dfrag->already_sent;
+		dfrag->sent = info->sent;
 		while (len > 0) {
 			int ret = 0;
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index cdce0c092c3c..cdadb39a03da 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -250,6 +250,7 @@ struct mptcp_data_frag {
 	u16 data_len;
 	u16 offset;
 	u16 overhead;
+	u16 sent;
 	u16 already_sent;
 	struct page *page;
 };
-- 
2.35.3
Re: [PATCH mptcp-next v20 5/7] mptcp: delay updating already_sent
Posted by Mat Martineau 3 years, 1 month ago
On Wed, 16 Nov 2022, Geliang Tang wrote:

> This patch adds a new member sent in struct mptcp_data_frag, save
> info->sent in it, to support delay updating already_sent of dfrags
> until all data is sent.
>

I think this patch is likely the cause of the DSS issues you mentioned in 
https://lore.kernel.org/mptcp/20221022125610.GA28495@bogon/

Looking at the code some more, __mptcp_clean_una() accesses and modifies 
dfrag->already_sent. If data is sent on one subflow, it can be acked at 
any time - even while the redundant sends are still happening on other 
subflows. So I don't think delaying dfrag->already_sent updates is a 
design that can work.

This makes me think that redundant sends need to work more like the MPTCP 
retransmit code path. When the scheduler selects multiple subflows, the 
first subflow to send is a "normal" transmit, and any other subflows would 
act like a retransmit when accessing the dfrags.


- Mat


> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/protocol.c | 18 ++++++++++++++++--
> net/mptcp/protocol.h |  1 +
> 2 files changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 4c249d1b9ec6..a1007751bd4d 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1099,6 +1099,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
> 	dfrag->data_seq = msk->write_seq;
> 	dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
> 	dfrag->offset = offset + sizeof(struct mptcp_data_frag);
> +	dfrag->sent = 0;
> 	dfrag->already_sent = 0;
> 	dfrag->page = pfrag->page;
>
> @@ -1484,11 +1485,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
> {
> 	u64 snd_nxt_new = dfrag->data_seq;
>
> -	dfrag->already_sent += sent;
> +	dfrag->sent += sent;
>
> 	msk->snd_burst -= sent;
>
> -	snd_nxt_new += dfrag->already_sent;
> +	snd_nxt_new += dfrag->sent;
>
> 	/* snd_nxt_new can be smaller than snd_nxt in case mptcp
> 	 * is recovering after a failover. In that event, this re-sends
> @@ -1513,6 +1514,18 @@ static void mptcp_update_first_pending(struct sock *sk, struct mptcp_sendmsg_inf
>
> static void mptcp_update_dfrags(struct sock *sk, struct mptcp_sendmsg_info *info)
> {
> +	struct mptcp_data_frag *dfrag = mptcp_send_head(sk);
> +
> +	if (!dfrag)
> +		return;
> +
> +	do {
> +		if (dfrag->sent) {
> +			dfrag->already_sent = max(dfrag->already_sent, dfrag->sent);
> +			dfrag->sent = 0;
> +		}
> +	} while ((dfrag = mptcp_next_frag(sk, dfrag)));
> +
> 	mptcp_update_first_pending(sk, info);
> }
>
> @@ -1539,6 +1552,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
> 		info->sent = dfrag->already_sent;
> 		info->limit = dfrag->data_len;
> 		len = dfrag->data_len - dfrag->already_sent;
> +		dfrag->sent = info->sent;
> 		while (len > 0) {
> 			int ret = 0;
>
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index cdce0c092c3c..cdadb39a03da 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -250,6 +250,7 @@ struct mptcp_data_frag {
> 	u16 data_len;
> 	u16 offset;
> 	u16 overhead;
> +	u16 sent;
> 	u16 already_sent;
> 	struct page *page;
> };
> -- 
> 2.35.3
>
>
>

--
Mat Martineau
Intel
Re: [PATCH mptcp-next v20 5/7] mptcp: delay updating already_sent
Posted by Geliang Tang 3 years ago
On Fri, Nov 18, 2022 at 02:15:30PM -0800, Mat Martineau wrote:
> On Wed, 16 Nov 2022, Geliang Tang wrote:
> 
> > This patch adds a new member sent in struct mptcp_data_frag, save
> > info->sent in it, to support delay updating already_sent of dfrags
> > until all data is sent.
> > 
> 
> I think this patch is likely the cause of the DSS issues you mentioned in
> https://lore.kernel.org/mptcp/20221022125610.GA28495@bogon/
> 
> Looking at the code some more, __mptcp_clean_una() accesses and modifies
> dfrag->already_sent. If data is sent on one subflow, it can be acked at any
> time - even while the redundant sends are still happening on other subflows.
> So I don't think delaying dfrag->already_sent updates is a design that can
> work.
> 
> This makes me think that redundant sends need to work more like the MPTCP
> retransmit code path. When the scheduler selects multiple subflows, the
> first subflow to send is a "normal" transmit, and any other subflows would
> act like a retransmit when accessing the dfrags.

Hi Mat,

I changed the redundant sends on the retransmit code path in v21, but
the DSS issues still exist.

Thanks,
-Geliang

> 
> 
> - Mat
> 
> 
> > Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> > ---
> > net/mptcp/protocol.c | 18 ++++++++++++++++--
> > net/mptcp/protocol.h |  1 +
> > 2 files changed, 17 insertions(+), 2 deletions(-)
> > 
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index 4c249d1b9ec6..a1007751bd4d 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -1099,6 +1099,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
> > 	dfrag->data_seq = msk->write_seq;
> > 	dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
> > 	dfrag->offset = offset + sizeof(struct mptcp_data_frag);
> > +	dfrag->sent = 0;
> > 	dfrag->already_sent = 0;
> > 	dfrag->page = pfrag->page;
> > 
> > @@ -1484,11 +1485,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
> > {
> > 	u64 snd_nxt_new = dfrag->data_seq;
> > 
> > -	dfrag->already_sent += sent;
> > +	dfrag->sent += sent;
> > 
> > 	msk->snd_burst -= sent;
> > 
> > -	snd_nxt_new += dfrag->already_sent;
> > +	snd_nxt_new += dfrag->sent;
> > 
> > 	/* snd_nxt_new can be smaller than snd_nxt in case mptcp
> > 	 * is recovering after a failover. In that event, this re-sends
> > @@ -1513,6 +1514,18 @@ static void mptcp_update_first_pending(struct sock *sk, struct mptcp_sendmsg_inf
> > 
> > static void mptcp_update_dfrags(struct sock *sk, struct mptcp_sendmsg_info *info)
> > {
> > +	struct mptcp_data_frag *dfrag = mptcp_send_head(sk);
> > +
> > +	if (!dfrag)
> > +		return;
> > +
> > +	do {
> > +		if (dfrag->sent) {
> > +			dfrag->already_sent = max(dfrag->already_sent, dfrag->sent);
> > +			dfrag->sent = 0;
> > +		}
> > +	} while ((dfrag = mptcp_next_frag(sk, dfrag)));
> > +
> > 	mptcp_update_first_pending(sk, info);
> > }
> > 
> > @@ -1539,6 +1552,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
> > 		info->sent = dfrag->already_sent;
> > 		info->limit = dfrag->data_len;
> > 		len = dfrag->data_len - dfrag->already_sent;
> > +		dfrag->sent = info->sent;
> > 		while (len > 0) {
> > 			int ret = 0;
> > 
> > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> > index cdce0c092c3c..cdadb39a03da 100644
> > --- a/net/mptcp/protocol.h
> > +++ b/net/mptcp/protocol.h
> > @@ -250,6 +250,7 @@ struct mptcp_data_frag {
> > 	u16 data_len;
> > 	u16 offset;
> > 	u16 overhead;
> > +	u16 sent;
> > 	u16 already_sent;
> > 	struct page *page;
> > };
> > -- 
> > 2.35.3
> > 
> > 
> > 
> 
> --
> Mat Martineau
> Intel