The MPTCP RFC requires that the MPTCP-level receive window's
right edge never moves backward. Currently the MPTCP code
enforces such constraint while tracking the right edge, but it
does not reflects it back on the wire, as lacks a suitable hook
to update accordingly the TCP header.
This change modifiy the existing mptcp_write_options() hook,
providing the current packet's TCP header up to the MPTCP protocol
level, so that the next patch could implement the above mentioned
constraint.
No functional changes intended.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
include/net/mptcp.h | 2 +-
net/ipv4/tcp_output.c | 13 +++++++------
net/mptcp/options.c | 2 +-
3 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 877077b53200..6b07011c060d 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -125,7 +125,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
struct mptcp_out_options *opts);
bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
-void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
+void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
struct mptcp_out_options *opts);
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c221f3bce975..27deec41a1f4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -444,12 +444,13 @@ struct tcp_out_options {
struct mptcp_out_options mptcp;
};
-static void mptcp_options_write(__be32 *ptr, const struct tcp_sock *tp,
+static void mptcp_options_write(struct tcphdr *th, __be32 *ptr,
+ struct tcp_sock *tp,
struct tcp_out_options *opts)
{
#if IS_ENABLED(CONFIG_MPTCP)
if (unlikely(OPTION_MPTCP & opts->options))
- mptcp_write_options(ptr, tp, &opts->mptcp);
+ mptcp_write_options(th, ptr, tp, &opts->mptcp);
#endif
}
@@ -605,7 +606,7 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
* At least SACK_PERM as the first option is known to lead to a disaster
* (but it may well be that other scenarios fail similarly).
*/
-static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
+static void tcp_options_write(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
struct tcp_out_options *opts)
{
u16 options = opts->options; /* mungable copy */
@@ -701,7 +702,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
smc_options_write(ptr, &options);
- mptcp_options_write(ptr, tp, opts);
+ mptcp_options_write(th, ptr, tp, opts);
}
static void smc_set_option(const struct tcp_sock *tp,
@@ -1354,7 +1355,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
th->window = htons(min(tp->rcv_wnd, 65535U));
}
- tcp_options_write((__be32 *)(th + 1), tp, &opts);
+ tcp_options_write(th, (__be32 *)(th + 1), tp, &opts);
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
@@ -3590,7 +3591,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
- tcp_options_write((__be32 *)(th + 1), NULL, &opts);
+ tcp_options_write(th, (__be32 *)(th + 1), NULL, &opts);
th->doff = (tcp_header_size >> 2);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index e05d9458a025..2570911735ab 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1265,7 +1265,7 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
~csum_unfold(mpext->csum));
}
-void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
+void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
const struct sock *ssk = (const struct sock *)tp;
--
2.35.1
On Fri, 15 Apr 2022, Paolo Abeni wrote:
> The MPTCP RFC requires that the MPTCP-level receive window's
> right edge never moves backward. Currently the MPTCP code
> enforces such constraint while tracking the right edge, but it
> does not reflects it back on the wire, as lacks a suitable hook
> to update accordingly the TCP header.
>
> This change modifiy the existing mptcp_write_options() hook,
> providing the current packet's TCP header up to the MPTCP protocol
> level, so that the next patch could implement the above mentioned
> constraint.
>
> No functional changes intended.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> include/net/mptcp.h | 2 +-
> net/ipv4/tcp_output.c | 13 +++++++------
> net/mptcp/options.c | 2 +-
> 3 files changed, 9 insertions(+), 8 deletions(-)
>
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index 877077b53200..6b07011c060d 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -125,7 +125,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> struct mptcp_out_options *opts);
> bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
>
> -void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
> +void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
> struct mptcp_out_options *opts);
>
> void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info);
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index c221f3bce975..27deec41a1f4 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -444,12 +444,13 @@ struct tcp_out_options {
> struct mptcp_out_options mptcp;
> };
>
> -static void mptcp_options_write(__be32 *ptr, const struct tcp_sock *tp,
> +static void mptcp_options_write(struct tcphdr *th, __be32 *ptr,
> + struct tcp_sock *tp,
> struct tcp_out_options *opts)
> {
> #if IS_ENABLED(CONFIG_MPTCP)
> if (unlikely(OPTION_MPTCP & opts->options))
> - mptcp_write_options(ptr, tp, &opts->mptcp);
> + mptcp_write_options(th, ptr, tp, &opts->mptcp);
> #endif
> }
>
> @@ -605,7 +606,7 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
> * At least SACK_PERM as the first option is known to lead to a disaster
> * (but it may well be that other scenarios fail similarly).
> */
> -static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
> +static void tcp_options_write(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
> struct tcp_out_options *opts)
Having both th and ptr seems redundant to my eyes. I'd rather just have a
th parameter and move the "ptr = (__be32 *)(th + 1);" code inside
tcp_options_write().
If you're thinking/hoping that Eric finds the additional parameter more
acceptible, we can go with this patch as-is.
- Mat
> {
> u16 options = opts->options; /* mungable copy */
> @@ -701,7 +702,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
>
> smc_options_write(ptr, &options);
>
> - mptcp_options_write(ptr, tp, opts);
> + mptcp_options_write(th, ptr, tp, opts);
> }
>
> static void smc_set_option(const struct tcp_sock *tp,
> @@ -1354,7 +1355,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
> th->window = htons(min(tp->rcv_wnd, 65535U));
> }
>
> - tcp_options_write((__be32 *)(th + 1), tp, &opts);
> + tcp_options_write(th, (__be32 *)(th + 1), tp, &opts);
>
> #ifdef CONFIG_TCP_MD5SIG
> /* Calculate the MD5 hash, as we have all we need now */
> @@ -3590,7 +3591,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
>
> /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
> th->window = htons(min(req->rsk_rcv_wnd, 65535U));
> - tcp_options_write((__be32 *)(th + 1), NULL, &opts);
> + tcp_options_write(th, (__be32 *)(th + 1), NULL, &opts);
> th->doff = (tcp_header_size >> 2);
> __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
>
> diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> index e05d9458a025..2570911735ab 100644
> --- a/net/mptcp/options.c
> +++ b/net/mptcp/options.c
> @@ -1265,7 +1265,7 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
> ~csum_unfold(mpext->csum));
> }
>
> -void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
> +void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
> struct mptcp_out_options *opts)
> {
> const struct sock *ssk = (const struct sock *)tp;
> --
> 2.35.1
>
>
>
--
Mat Martineau
Intel
On Mon, 2022-04-18 at 17:11 -0700, Mat Martineau wrote:
> On Fri, 15 Apr 2022, Paolo Abeni wrote:
>
> > The MPTCP RFC requires that the MPTCP-level receive window's
> > right edge never moves backward. Currently the MPTCP code
> > enforces such constraint while tracking the right edge, but it
> > does not reflects it back on the wire, as lacks a suitable hook
> > to update accordingly the TCP header.
> >
> > This change modifiy the existing mptcp_write_options() hook,
> > providing the current packet's TCP header up to the MPTCP protocol
> > level, so that the next patch could implement the above mentioned
> > constraint.
> >
> > No functional changes intended.
> >
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> > include/net/mptcp.h | 2 +-
> > net/ipv4/tcp_output.c | 13 +++++++------
> > net/mptcp/options.c | 2 +-
> > 3 files changed, 9 insertions(+), 8 deletions(-)
> >
> > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > index 877077b53200..6b07011c060d 100644
> > --- a/include/net/mptcp.h
> > +++ b/include/net/mptcp.h
> > @@ -125,7 +125,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
> > struct mptcp_out_options *opts);
> > bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);
> >
> > -void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
> > +void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
> > struct mptcp_out_options *opts);
> >
> > void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info);
> > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> > index c221f3bce975..27deec41a1f4 100644
> > --- a/net/ipv4/tcp_output.c
> > +++ b/net/ipv4/tcp_output.c
> > @@ -444,12 +444,13 @@ struct tcp_out_options {
> > struct mptcp_out_options mptcp;
> > };
> >
> > -static void mptcp_options_write(__be32 *ptr, const struct tcp_sock *tp,
> > +static void mptcp_options_write(struct tcphdr *th, __be32 *ptr,
> > + struct tcp_sock *tp,
> > struct tcp_out_options *opts)
> > {
> > #if IS_ENABLED(CONFIG_MPTCP)
> > if (unlikely(OPTION_MPTCP & opts->options))
> > - mptcp_write_options(ptr, tp, &opts->mptcp);
> > + mptcp_write_options(th, ptr, tp, &opts->mptcp);
> > #endif
> > }
> >
> > @@ -605,7 +606,7 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
> > * At least SACK_PERM as the first option is known to lead to a disaster
> > * (but it may well be that other scenarios fail similarly).
> > */
> > -static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
> > +static void tcp_options_write(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
> > struct tcp_out_options *opts)
>
> Having both th and ptr seems redundant to my eyes. I'd rather just have a
> th parameter and move the "ptr = (__be32 *)(th + 1);" code inside
> tcp_options_write().
>
> If you're thinking/hoping that Eric finds the additional parameter more
> acceptible, we can go with this patch as-is.
Yep, the main concerns here is obtaining the buy-in from Eric. I hoped
that less differences would help, but likely the code you suggested is
better. I'll send an RFC on netdev with this and the next patch to get
some direct feedback from him.
Thanks!
Paolo
On Tue, 2022-04-19 at 12:42 +0200, Paolo Abeni wrote: > On Mon, 2022-04-18 at 17:11 -0700, Mat Martineau wrote: > > On Fri, 15 Apr 2022, Paolo Abeni wrote: > > > @@ -605,7 +606,7 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, > > > * At least SACK_PERM as the first option is known to lead to a disaster > > > * (but it may well be that other scenarios fail similarly). > > > */ > > > -static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, > > > +static void tcp_options_write(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp, > > > struct tcp_out_options *opts) > > > > Having both th and ptr seems redundant to my eyes. I'd rather just have a > > th parameter and move the "ptr = (__be32 *)(th + 1);" code inside > > tcp_options_write(). > > > > If you're thinking/hoping that Eric finds the additional parameter more > > acceptible, we can go with this patch as-is. > > Yep, the main concerns here is obtaining the buy-in from Eric. I hoped > that less differences would help, but likely the code you suggested is > better. I'll send an RFC on netdev with this and the next patch to get > some direct feedback from him. Sharing the patches on netdev at least pointed out a build issue on some 32bit arches lacking cmpxchg64. I'll move to atomic64_cmpxchg(), which instead has a generic implementation. Cheers, Paolo
© 2016 - 2026 Red Hat, Inc.