This patch adds a new member sent in struct mptcp_data_frag, save
info->sent in it, to support delay updating already_sent of dfrags
until all data is sent.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/protocol.c | 18 ++++++++++++++++--
net/mptcp/protocol.h | 1 +
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4c249d1b9ec6..a1007751bd4d 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1099,6 +1099,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
dfrag->data_seq = msk->write_seq;
dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
dfrag->offset = offset + sizeof(struct mptcp_data_frag);
+ dfrag->sent = 0;
dfrag->already_sent = 0;
dfrag->page = pfrag->page;
@@ -1484,11 +1485,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
{
u64 snd_nxt_new = dfrag->data_seq;
- dfrag->already_sent += sent;
+ dfrag->sent += sent;
msk->snd_burst -= sent;
- snd_nxt_new += dfrag->already_sent;
+ snd_nxt_new += dfrag->sent;
/* snd_nxt_new can be smaller than snd_nxt in case mptcp
* is recovering after a failover. In that event, this re-sends
@@ -1513,6 +1514,18 @@ static void mptcp_update_first_pending(struct sock *sk, struct mptcp_sendmsg_inf
static void mptcp_update_dfrags(struct sock *sk, struct mptcp_sendmsg_info *info)
{
+ struct mptcp_data_frag *dfrag = mptcp_send_head(sk);
+
+ if (!dfrag)
+ return;
+
+ do {
+ if (dfrag->sent) {
+ dfrag->already_sent = max(dfrag->already_sent, dfrag->sent);
+ dfrag->sent = 0;
+ }
+ } while ((dfrag = mptcp_next_frag(sk, dfrag)));
+
mptcp_update_first_pending(sk, info);
}
@@ -1539,6 +1552,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
info->sent = dfrag->already_sent;
info->limit = dfrag->data_len;
len = dfrag->data_len - dfrag->already_sent;
+ dfrag->sent = info->sent;
while (len > 0) {
int ret = 0;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index cdce0c092c3c..cdadb39a03da 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -250,6 +250,7 @@ struct mptcp_data_frag {
u16 data_len;
u16 offset;
u16 overhead;
+ u16 sent;
u16 already_sent;
struct page *page;
};
--
2.35.3
On Wed, 16 Nov 2022, Geliang Tang wrote:
> This patch adds a new member sent in struct mptcp_data_frag, save
> info->sent in it, to support delay updating already_sent of dfrags
> until all data is sent.
>
I think this patch is likely the cause of the DSS issues you mentioned in
https://lore.kernel.org/mptcp/20221022125610.GA28495@bogon/
Looking at the code some more, __mptcp_clean_una() accesses and modifies
dfrag->already_sent. If data is sent on one subflow, it can be acked at
any time - even while the redundant sends are still happening on other
subflows. So I don't think delaying dfrag->already_sent updates is a
design that can work.
This makes me think that redundant sends need to work more like the MPTCP
retransmit code path. When the scheduler selects multiple subflows, the
first subflow to send is a "normal" transmit, and any other subflows would
act like a retransmit when accessing the dfrags.
- Mat
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/protocol.c | 18 ++++++++++++++++--
> net/mptcp/protocol.h | 1 +
> 2 files changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 4c249d1b9ec6..a1007751bd4d 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1099,6 +1099,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
> dfrag->data_seq = msk->write_seq;
> dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
> dfrag->offset = offset + sizeof(struct mptcp_data_frag);
> + dfrag->sent = 0;
> dfrag->already_sent = 0;
> dfrag->page = pfrag->page;
>
> @@ -1484,11 +1485,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
> {
> u64 snd_nxt_new = dfrag->data_seq;
>
> - dfrag->already_sent += sent;
> + dfrag->sent += sent;
>
> msk->snd_burst -= sent;
>
> - snd_nxt_new += dfrag->already_sent;
> + snd_nxt_new += dfrag->sent;
>
> /* snd_nxt_new can be smaller than snd_nxt in case mptcp
> * is recovering after a failover. In that event, this re-sends
> @@ -1513,6 +1514,18 @@ static void mptcp_update_first_pending(struct sock *sk, struct mptcp_sendmsg_inf
>
> static void mptcp_update_dfrags(struct sock *sk, struct mptcp_sendmsg_info *info)
> {
> + struct mptcp_data_frag *dfrag = mptcp_send_head(sk);
> +
> + if (!dfrag)
> + return;
> +
> + do {
> + if (dfrag->sent) {
> + dfrag->already_sent = max(dfrag->already_sent, dfrag->sent);
> + dfrag->sent = 0;
> + }
> + } while ((dfrag = mptcp_next_frag(sk, dfrag)));
> +
> mptcp_update_first_pending(sk, info);
> }
>
> @@ -1539,6 +1552,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
> info->sent = dfrag->already_sent;
> info->limit = dfrag->data_len;
> len = dfrag->data_len - dfrag->already_sent;
> + dfrag->sent = info->sent;
> while (len > 0) {
> int ret = 0;
>
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index cdce0c092c3c..cdadb39a03da 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -250,6 +250,7 @@ struct mptcp_data_frag {
> u16 data_len;
> u16 offset;
> u16 overhead;
> + u16 sent;
> u16 already_sent;
> struct page *page;
> };
> --
> 2.35.3
>
>
>
--
Mat Martineau
Intel
On Fri, Nov 18, 2022 at 02:15:30PM -0800, Mat Martineau wrote:
> On Wed, 16 Nov 2022, Geliang Tang wrote:
>
> > This patch adds a new member sent in struct mptcp_data_frag, save
> > info->sent in it, to support delay updating already_sent of dfrags
> > until all data is sent.
> >
>
> I think this patch is likely the cause of the DSS issues you mentioned in
> https://lore.kernel.org/mptcp/20221022125610.GA28495@bogon/
>
> Looking at the code some more, __mptcp_clean_una() accesses and modifies
> dfrag->already_sent. If data is sent on one subflow, it can be acked at any
> time - even while the redundant sends are still happening on other subflows.
> So I don't think delaying dfrag->already_sent updates is a design that can
> work.
>
> This makes me think that redundant sends need to work more like the MPTCP
> retransmit code path. When the scheduler selects multiple subflows, the
> first subflow to send is a "normal" transmit, and any other subflows would
> act like a retransmit when accessing the dfrags.
Hi Mat,
I changed the redundant sends on the retransmit code path in v21, but
the DSS issues still exist.
Thanks,
-Geliang
>
>
> - Mat
>
>
> > Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> > ---
> > net/mptcp/protocol.c | 18 ++++++++++++++++--
> > net/mptcp/protocol.h | 1 +
> > 2 files changed, 17 insertions(+), 2 deletions(-)
> >
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index 4c249d1b9ec6..a1007751bd4d 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -1099,6 +1099,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
> > dfrag->data_seq = msk->write_seq;
> > dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag);
> > dfrag->offset = offset + sizeof(struct mptcp_data_frag);
> > + dfrag->sent = 0;
> > dfrag->already_sent = 0;
> > dfrag->page = pfrag->page;
> >
> > @@ -1484,11 +1485,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
> > {
> > u64 snd_nxt_new = dfrag->data_seq;
> >
> > - dfrag->already_sent += sent;
> > + dfrag->sent += sent;
> >
> > msk->snd_burst -= sent;
> >
> > - snd_nxt_new += dfrag->already_sent;
> > + snd_nxt_new += dfrag->sent;
> >
> > /* snd_nxt_new can be smaller than snd_nxt in case mptcp
> > * is recovering after a failover. In that event, this re-sends
> > @@ -1513,6 +1514,18 @@ static void mptcp_update_first_pending(struct sock *sk, struct mptcp_sendmsg_inf
> >
> > static void mptcp_update_dfrags(struct sock *sk, struct mptcp_sendmsg_info *info)
> > {
> > + struct mptcp_data_frag *dfrag = mptcp_send_head(sk);
> > +
> > + if (!dfrag)
> > + return;
> > +
> > + do {
> > + if (dfrag->sent) {
> > + dfrag->already_sent = max(dfrag->already_sent, dfrag->sent);
> > + dfrag->sent = 0;
> > + }
> > + } while ((dfrag = mptcp_next_frag(sk, dfrag)));
> > +
> > mptcp_update_first_pending(sk, info);
> > }
> >
> > @@ -1539,6 +1552,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
> > info->sent = dfrag->already_sent;
> > info->limit = dfrag->data_len;
> > len = dfrag->data_len - dfrag->already_sent;
> > + dfrag->sent = info->sent;
> > while (len > 0) {
> > int ret = 0;
> >
> > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> > index cdce0c092c3c..cdadb39a03da 100644
> > --- a/net/mptcp/protocol.h
> > +++ b/net/mptcp/protocol.h
> > @@ -250,6 +250,7 @@ struct mptcp_data_frag {
> > u16 data_len;
> > u16 offset;
> > u16 overhead;
> > + u16 sent;
> > u16 already_sent;
> > struct page *page;
> > };
> > --
> > 2.35.3
> >
> >
> >
>
> --
> Mat Martineau
> Intel
© 2016 - 2025 Red Hat, Inc.