[PATCH mptcp-next v16 5/8] mptcp: add get_subflow wrapper

Geliang Tang posted 8 patches 3 years, 4 months ago
Maintainers: Song Liu <songliubraving@fb.com>, Paolo Abeni <pabeni@redhat.com>, "David S. Miller" <davem@davemloft.net>, Alexei Starovoitov <ast@kernel.org>, John Fastabend <john.fastabend@gmail.com>, KP Singh <kpsingh@kernel.org>, Matthieu Baerts <matthieu.baerts@tessares.net>, Daniel Borkmann <daniel@iogearbox.net>, Mat Martineau <mathew.j.martineau@linux.intel.com>, Martin KaFai Lau <kafai@fb.com>, Andrii Nakryiko <andrii@kernel.org>, Jakub Kicinski <kuba@kernel.org>, Jonathan Corbet <corbet@lwn.net>, Shuah Khan <shuah@kernel.org>, Yonghong Song <yhs@fb.com>
There is a newer version of this series
[PATCH mptcp-next v16 5/8] mptcp: add get_subflow wrapper
Posted by Geliang Tang 3 years, 4 months ago
This patch defines a new wrapper mptcp_sched_get_subflow(), invoke
get_subflow() of msk->sched in it. Use the wrapper instead of using
mptcp_subflow_get_send() directly.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/protocol.c |  9 ++++-----
 net/mptcp/protocol.h | 16 ++++++++++++++++
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 7590e2d29f39..c6e963848b18 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1515,7 +1515,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 	subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
 					   READ_ONCE(ssk->sk_pacing_rate) * burst,
 					   burst + wmem);
-	msk->last_snd = ssk;
 	msk->snd_burst = burst;
 	return ssk;
 }
@@ -1575,7 +1574,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 			int ret = 0;
 
 			prev_ssk = ssk;
-			ssk = mptcp_subflow_get_send(msk);
+			ssk = mptcp_sched_get_subflow(msk, false);
 
 			/* First check. If the ssk has changed since
 			 * the last round, release prev_ssk
@@ -1644,7 +1643,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 			 * check for a different subflow usage only after
 			 * spooling the first chunk of data
 			 */
-			xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+			xmit_ssk = first ? ssk : mptcp_sched_get_subflow(mptcp_sk(sk), false);
 			if (!xmit_ssk)
 				goto out;
 			if (xmit_ssk != ssk) {
@@ -2489,7 +2488,7 @@ static void __mptcp_retrans(struct sock *sk)
 	mptcp_clean_una_wakeup(sk);
 
 	/* first check ssk: need to kick "stale" logic */
-	ssk = mptcp_subflow_get_retrans(msk);
+	ssk = mptcp_sched_get_subflow(msk, true);
 	dfrag = mptcp_rtx_head(sk);
 	if (!dfrag) {
 		if (mptcp_data_fin_enabled(msk)) {
@@ -3154,7 +3153,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
 		return;
 
 	if (!sock_owned_by_user(sk)) {
-		struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
+		struct sock *xmit_ssk = mptcp_sched_get_subflow(mptcp_sk(sk), false);
 
 		if (xmit_ssk == ssk)
 			__mptcp_subflow_push_pending(sk, ssk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 22f3f41e1e32..91512fc25128 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -633,6 +633,22 @@ int mptcp_init_sched(struct mptcp_sock *msk,
 		     struct mptcp_sched_ops *sched);
 void mptcp_release_sched(struct mptcp_sock *msk);
 
+static inline struct sock *mptcp_sched_get_subflow(struct mptcp_sock *msk, bool reinject)
+{
+	struct mptcp_sched_data data = {
+		.sock		= msk->first,
+		.call_again	= 0,
+	};
+
+	msk->sched ? INDIRECT_CALL_INET_1(msk->sched->get_subflow,
+					  mptcp_get_subflow_default,
+					  msk, reinject, &data) :
+		     mptcp_get_subflow_default(msk, reinject, &data);
+
+	msk->last_snd = data.sock;
+	return data.sock;
+}
+
 static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
 {
 	struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-- 
2.34.1


Re: [PATCH mptcp-next v16 5/8] mptcp: add get_subflow wrapper
Posted by Paolo Abeni 3 years, 4 months ago
Hello,

First of all I'm sorry for the very late feedback: I had some
difficulties to allocate time to follow this development.

On Wed, 2022-04-27 at 09:56 +0800, Geliang Tang wrote:
> This patch defines a new wrapper mptcp_sched_get_subflow(), invoke
> get_subflow() of msk->sched in it. Use the wrapper instead of using
> mptcp_subflow_get_send() directly.
> 
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
>  net/mptcp/protocol.c |  9 ++++-----
>  net/mptcp/protocol.h | 16 ++++++++++++++++
>  2 files changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 7590e2d29f39..c6e963848b18 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1515,7 +1515,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
>  	subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
>  					   READ_ONCE(ssk->sk_pacing_rate) * burst,
>  					   burst + wmem);
> -	msk->last_snd = ssk;
>  	msk->snd_burst = burst;
>  	return ssk;
>  }

This breaks "mptcp: really share subflow snd_wnd": When the MPTCP-level
cwin is 0, we want to do 0-window probe (so mptcp_subflow_get_send()
return a 'valid' subflow) but we don't want to start a new burst for
such 0 win probe (so mptcp_subflow_get_send() clears last_snd).

With this change, when MPTCP-level cwin is 0, 'last_send' is not
cleared anymore. 

A simple sulution would be:

---
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 653757ea0aca..c20f5fd04cad 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1507,7 +1507,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
        burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
        wmem = READ_ONCE(ssk->sk_wmem_queued);
        if (!burst) {
-               msk->last_snd = NULL;
+               msk->snd_burst = 0;
                return ssk;
        }
---

Probably a better solution would be adding 'snd_burst' to struct
mptcp_sched_data, and let mptcp_sched_get_subflow() update msk-
>snd_burst as specified by the scheduler. With this latter change the
'msk' argument in mptcp_sched_ops->schedule() could be const.

> @@ -1575,7 +1574,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
>  			int ret = 0;
>  
>  			prev_ssk = ssk;
> -			ssk = mptcp_subflow_get_send(msk);
> +			ssk = mptcp_sched_get_subflow(msk, false);
>  
>  			/* First check. If the ssk has changed since
>  			 * the last round, release prev_ssk
> @@ -1644,7 +1643,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
>  			 * check for a different subflow usage only after
>  			 * spooling the first chunk of data
>  			 */
> -			xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
> +			xmit_ssk = first ? ssk : mptcp_sched_get_subflow(mptcp_sk(sk), false);
>  			if (!xmit_ssk)
>  				goto out;
>  			if (xmit_ssk != ssk) {
> @@ -2489,7 +2488,7 @@ static void __mptcp_retrans(struct sock *sk)
>  	mptcp_clean_una_wakeup(sk);
>  
>  	/* first check ssk: need to kick "stale" logic */
> -	ssk = mptcp_subflow_get_retrans(msk);
> +	ssk = mptcp_sched_get_subflow(msk, true);
>  	dfrag = mptcp_rtx_head(sk);
>  	if (!dfrag) {
>  		if (mptcp_data_fin_enabled(msk)) {
> @@ -3154,7 +3153,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
>  		return;
>  
>  	if (!sock_owned_by_user(sk)) {
> -		struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
> +		struct sock *xmit_ssk = mptcp_sched_get_subflow(mptcp_sk(sk), false);
>  
>  		if (xmit_ssk == ssk)
>  			__mptcp_subflow_push_pending(sk, ssk);
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 22f3f41e1e32..91512fc25128 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -633,6 +633,22 @@ int mptcp_init_sched(struct mptcp_sock *msk,
>  		     struct mptcp_sched_ops *sched);
>  void mptcp_release_sched(struct mptcp_sock *msk);
>  
> +static inline struct sock *mptcp_sched_get_subflow(struct mptcp_sock *msk, bool reinject)
> +{
> +	struct mptcp_sched_data data = {
> +		.sock		= msk->first,
> +		.call_again	= 0,
> +	};

It's not clear to me:
- why we need the 'sock' argument, since the scheduler already get
'msk' as argument?
- why we need to initialize it (looks like an 'output' argument ?!?)
- what is the goal/role of the 'call_again' argument. It looks like we
just ignore it?!?

Side note: perhaps we should move the following chunk:

---
       sock_owned_by_me(sk);

        if (__mptcp_check_fallback(msk)) {
                if (!msk->first)
                        return NULL;
                return sk_stream_memory_free(msk->first) ? msk->first : NULL;
        }
---

  out of mptcp_subflow_get_send() into mptcp_sched_get_subflow(), so
that every scheduler will not have to deal with fallback sockets.

> +
> +	msk->sched ? INDIRECT_CALL_INET_1(msk->sched->get_subflow,
> +					  mptcp_get_subflow_default,
> +					  msk, reinject, &data) :
> +		     mptcp_get_subflow_default(msk, reinject, &data);

With this we have quite a lot of conditionals for the default
scheduler. I think we can drop the INDIRECT_CALL_INET_1() wrapper, and
rework the code so that msk->sched is always NULL with the default
scheduler.

And sorry to bring the next topic so late, but why a 'reinject'
argument instead of an additional mptcp_sched_ops op? the latter option
will avoid another branch in fast-path.

Thanks!

Paolo


Re: [PATCH mptcp-next v16 5/8] mptcp: add get_subflow wrapper
Posted by Geliang Tang 3 years, 4 months ago
Hi Paolo,

Thanks for your review!

Paolo Abeni <pabeni@redhat.com> 于2022年4月27日周三 16:27写道:
>
> Hello,
>
> First of all I'm sorry for the very late feedback: I had some
> difficulties to allocate time to follow this development.
>
> On Wed, 2022-04-27 at 09:56 +0800, Geliang Tang wrote:
> > This patch defines a new wrapper mptcp_sched_get_subflow(), invoke
> > get_subflow() of msk->sched in it. Use the wrapper instead of using
> > mptcp_subflow_get_send() directly.
> >
> > Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> > ---
> >  net/mptcp/protocol.c |  9 ++++-----
> >  net/mptcp/protocol.h | 16 ++++++++++++++++
> >  2 files changed, 20 insertions(+), 5 deletions(-)
> >
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index 7590e2d29f39..c6e963848b18 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -1515,7 +1515,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
> >       subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
> >                                          READ_ONCE(ssk->sk_pacing_rate) * burst,
> >                                          burst + wmem);
> > -     msk->last_snd = ssk;
> >       msk->snd_burst = burst;
> >       return ssk;
> >  }
>
> This breaks "mptcp: really share subflow snd_wnd": When the MPTCP-level
> cwin is 0, we want to do 0-window probe (so mptcp_subflow_get_send()
> return a 'valid' subflow) but we don't want to start a new burst for
> such 0 win probe (so mptcp_subflow_get_send() clears last_snd).
>
> With this change, when MPTCP-level cwin is 0, 'last_send' is not
> cleared anymore.
>
> A simple sulution would be:
>
> ---
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 653757ea0aca..c20f5fd04cad 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1507,7 +1507,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
>         burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
>         wmem = READ_ONCE(ssk->sk_wmem_queued);
>         if (!burst) {
> -               msk->last_snd = NULL;
> +               msk->snd_burst = 0;
>                 return ssk;
>         }
> ---

I think it's better to add back "msk->last_snd = ssk;" in
mptcp_subflow_get_send(), keep the original mptcp_subflow_get_send()
unchanged. And set msk->last_snd in the get_subflow() of the
scheduler, the same as v12:

https://patchwork.kernel.org/project/mptcp/patch/396f8edcbf8e4f65a1d76c228dc3d12aa4dd2f11.1650430389.git.geliang.tang@suse.com/

>
> Probably a better solution would be adding 'snd_burst' to struct
> mptcp_sched_data, and let mptcp_sched_get_subflow() update msk-
> >snd_burst as specified by the scheduler. With this latter change the
> 'msk' argument in mptcp_sched_ops->schedule() could be const.
>
> > @@ -1575,7 +1574,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> >                       int ret = 0;
> >
> >                       prev_ssk = ssk;
> > -                     ssk = mptcp_subflow_get_send(msk);
> > +                     ssk = mptcp_sched_get_subflow(msk, false);
> >
> >                       /* First check. If the ssk has changed since
> >                        * the last round, release prev_ssk
> > @@ -1644,7 +1643,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
> >                        * check for a different subflow usage only after
> >                        * spooling the first chunk of data
> >                        */
> > -                     xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
> > +                     xmit_ssk = first ? ssk : mptcp_sched_get_subflow(mptcp_sk(sk), false);
> >                       if (!xmit_ssk)
> >                               goto out;
> >                       if (xmit_ssk != ssk) {
> > @@ -2489,7 +2488,7 @@ static void __mptcp_retrans(struct sock *sk)
> >       mptcp_clean_una_wakeup(sk);
> >
> >       /* first check ssk: need to kick "stale" logic */
> > -     ssk = mptcp_subflow_get_retrans(msk);
> > +     ssk = mptcp_sched_get_subflow(msk, true);
> >       dfrag = mptcp_rtx_head(sk);
> >       if (!dfrag) {
> >               if (mptcp_data_fin_enabled(msk)) {
> > @@ -3154,7 +3153,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
> >               return;
> >
> >       if (!sock_owned_by_user(sk)) {
> > -             struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
> > +             struct sock *xmit_ssk = mptcp_sched_get_subflow(mptcp_sk(sk), false);
> >
> >               if (xmit_ssk == ssk)
> >                       __mptcp_subflow_push_pending(sk, ssk);
> > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> > index 22f3f41e1e32..91512fc25128 100644
> > --- a/net/mptcp/protocol.h
> > +++ b/net/mptcp/protocol.h
> > @@ -633,6 +633,22 @@ int mptcp_init_sched(struct mptcp_sock *msk,
> >                    struct mptcp_sched_ops *sched);
> >  void mptcp_release_sched(struct mptcp_sock *msk);
> >
> > +static inline struct sock *mptcp_sched_get_subflow(struct mptcp_sock *msk, bool reinject)
> > +{
> > +     struct mptcp_sched_data data = {
> > +             .sock           = msk->first,
> > +             .call_again     = 0,
> > +     };
>
> It's not clear to me:
> - why we need the 'sock' argument, since the scheduler already get
> 'msk' as argument?

sock is the selected subflow to return in the scheduer.

> - why we need to initialize it (looks like an 'output' argument ?!?)

Yes, it's an output argument. No need to init it.

> - what is the goal/role of the 'call_again' argument. It looks like we
> just ignore it?!?

call_again is for supporting a "redundant" packet scheduler in the
future indicate that get_subflow() function needs to be called again.

>
> Side note: perhaps we should move the following chunk:
>
> ---
>        sock_owned_by_me(sk);
>
>         if (__mptcp_check_fallback(msk)) {
>                 if (!msk->first)
>                         return NULL;
>                 return sk_stream_memory_free(msk->first) ? msk->first : NULL;
>         }
> ---
>
>   out of mptcp_subflow_get_send() into mptcp_sched_get_subflow(), so
> that every scheduler will not have to deal with fallback sockets.
>
> > +
> > +     msk->sched ? INDIRECT_CALL_INET_1(msk->sched->get_subflow,
> > +                                       mptcp_get_subflow_default,
> > +                                       msk, reinject, &data) :
> > +                  mptcp_get_subflow_default(msk, reinject, &data);
>
> With this we have quite a lot of conditionals for the default
> scheduler. I think we can drop the INDIRECT_CALL_INET_1() wrapper, and
> rework the code so that msk->sched is always NULL with the default
> scheduler.

So I need to drop patch 2 (2/8 mptcp: register default scheduler) in
this series, right?

>
> And sorry to bring the next topic so late, but why a 'reinject'
> argument instead of an additional mptcp_sched_ops op? the latter option
> will avoid another branch in fast-path.

How about keeping th 'reinject' argument like this:

----
static inline struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
{
        struct mptcp_sched_data data;

        sock_owned_by_me((struct sock *)msk);

        /* the following check is moved out of mptcp_subflow_get_send */
        if (__mptcp_check_fallback(msk)) {
                if (!msk->first)
                        return NULL;
                return sk_stream_memory_free(msk->first) ? msk->first : NULL;
        }

        if (!msk->sched)
                return mptcp_subflow_get_send(msk);

        msk->sched->get_subflow(msk, false, &data);

        return data.sock;
}

static inline struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
{
        struct mptcp_sched_data data;

        sock_owned_by_me((const struct sock *)msk);

        if (__mptcp_check_fallback(msk))
                return NULL;

        if (!msk->sched)
                return mptcp_subflow_get_retrans(msk);

        msk->sched->get_subflow(msk, true, &data);

        return data.sock;
}
-----

Thanks,
-Geliang

>
> Thanks!
>
> Paolo
>
>

Re: [PATCH mptcp-next v16 5/8] mptcp: add get_subflow wrapper
Posted by Matthieu Baerts 3 years, 4 months ago
Hi Geliang, Paolo,

On 27/04/2022 16:34, Geliang Tang wrote:
> Hi Paolo,
> 
> Thanks for your review!
> 
> Paolo Abeni <pabeni@redhat.com> 于2022年4月27日周三 16:27写道:
>>
>> Hello,
>>
>> First of all I'm sorry for the very late feedback: I had some
>> difficulties to allocate time to follow this development.
>>
>> On Wed, 2022-04-27 at 09:56 +0800, Geliang Tang wrote:
>>> This patch defines a new wrapper mptcp_sched_get_subflow(), invoke
>>> get_subflow() of msk->sched in it. Use the wrapper instead of using
>>> mptcp_subflow_get_send() directly.
>>>
>>> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
>>> ---
>>>  net/mptcp/protocol.c |  9 ++++-----
>>>  net/mptcp/protocol.h | 16 ++++++++++++++++
>>>  2 files changed, 20 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>>> index 7590e2d29f39..c6e963848b18 100644
>>> --- a/net/mptcp/protocol.c
>>> +++ b/net/mptcp/protocol.c
>>> @@ -1515,7 +1515,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
>>>       subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
>>>                                          READ_ONCE(ssk->sk_pacing_rate) * burst,
>>>                                          burst + wmem);
>>> -     msk->last_snd = ssk;
>>>       msk->snd_burst = burst;
>>>       return ssk;
>>>  }
>>
>> This breaks "mptcp: really share subflow snd_wnd": When the MPTCP-level
>> cwin is 0, we want to do 0-window probe (so mptcp_subflow_get_send()
>> return a 'valid' subflow) but we don't want to start a new burst for
>> such 0 win probe (so mptcp_subflow_get_send() clears last_snd).
>>
>> With this change, when MPTCP-level cwin is 0, 'last_send' is not
>> cleared anymore.
>>
>> A simple sulution would be:
>>
>> ---
>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>> index 653757ea0aca..c20f5fd04cad 100644
>> --- a/net/mptcp/protocol.c
>> +++ b/net/mptcp/protocol.c
>> @@ -1507,7 +1507,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
>>         burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
>>         wmem = READ_ONCE(ssk->sk_wmem_queued);
>>         if (!burst) {
>> -               msk->last_snd = NULL;
>> +               msk->snd_burst = 0;
>>                 return ssk;
>>         }
>> ---
> 
> I think it's better to add back "msk->last_snd = ssk;" in
> mptcp_subflow_get_send(), keep the original mptcp_subflow_get_send()
> unchanged. And set msk->last_snd in the get_subflow() of the
> scheduler, the same as v12:
> 
> https://patchwork.kernel.org/project/mptcp/patch/396f8edcbf8e4f65a1d76c228dc3d12aa4dd2f11.1650430389.git.geliang.tang@suse.com/

It might be interesting to discuss about the scheduler API at the
meeting tomorrow (sadly I will not be there).

Maybe it is interesting to separate the work in two steps like it is
done in mptcp.org: select data, then select subflow.

If the scheduler needs to know if it is is selecting a subflow for a
zero window test or not: we are getting closer to the mptcp.org API :)

(or maybe just enough to reset msk->snd_burst instead of asking all
schedulers to set 'last_snd' + give access to that for BPF)

Cheers,
Matt

> 
>>
>> Probably a better solution would be adding 'snd_burst' to struct
>> mptcp_sched_data, and let mptcp_sched_get_subflow() update msk-
>>> snd_burst as specified by the scheduler. With this latter change the
>> 'msk' argument in mptcp_sched_ops->schedule() could be const.
>>
>>> @@ -1575,7 +1574,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
>>>                       int ret = 0;
>>>
>>>                       prev_ssk = ssk;
>>> -                     ssk = mptcp_subflow_get_send(msk);
>>> +                     ssk = mptcp_sched_get_subflow(msk, false);
>>>
>>>                       /* First check. If the ssk has changed since
>>>                        * the last round, release prev_ssk
>>> @@ -1644,7 +1643,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
>>>                        * check for a different subflow usage only after
>>>                        * spooling the first chunk of data
>>>                        */
>>> -                     xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
>>> +                     xmit_ssk = first ? ssk : mptcp_sched_get_subflow(mptcp_sk(sk), false);
>>>                       if (!xmit_ssk)
>>>                               goto out;
>>>                       if (xmit_ssk != ssk) {
>>> @@ -2489,7 +2488,7 @@ static void __mptcp_retrans(struct sock *sk)
>>>       mptcp_clean_una_wakeup(sk);
>>>
>>>       /* first check ssk: need to kick "stale" logic */
>>> -     ssk = mptcp_subflow_get_retrans(msk);
>>> +     ssk = mptcp_sched_get_subflow(msk, true);
>>>       dfrag = mptcp_rtx_head(sk);
>>>       if (!dfrag) {
>>>               if (mptcp_data_fin_enabled(msk)) {
>>> @@ -3154,7 +3153,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
>>>               return;
>>>
>>>       if (!sock_owned_by_user(sk)) {
>>> -             struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
>>> +             struct sock *xmit_ssk = mptcp_sched_get_subflow(mptcp_sk(sk), false);
>>>
>>>               if (xmit_ssk == ssk)
>>>                       __mptcp_subflow_push_pending(sk, ssk);
>>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>>> index 22f3f41e1e32..91512fc25128 100644
>>> --- a/net/mptcp/protocol.h
>>> +++ b/net/mptcp/protocol.h
>>> @@ -633,6 +633,22 @@ int mptcp_init_sched(struct mptcp_sock *msk,
>>>                    struct mptcp_sched_ops *sched);
>>>  void mptcp_release_sched(struct mptcp_sock *msk);
>>>
>>> +static inline struct sock *mptcp_sched_get_subflow(struct mptcp_sock *msk, bool reinject)
>>> +{
>>> +     struct mptcp_sched_data data = {
>>> +             .sock           = msk->first,
>>> +             .call_again     = 0,
>>> +     };
>>
>> It's not clear to me:
>> - why we need the 'sock' argument, since the scheduler already get
>> 'msk' as argument?
> 
> sock is the selected subflow to return in the scheduer.
> 
>> - why we need to initialize it (looks like an 'output' argument ?!?)
> 
> Yes, it's an output argument. No need to init it.
> 
>> - what is the goal/role of the 'call_again' argument. It looks like we
>> just ignore it?!?
> 
> call_again is for supporting a "redundant" packet scheduler in the
> future indicate that get_subflow() function needs to be called again.
> 
>>
>> Side note: perhaps we should move the following chunk:
>>
>> ---
>>        sock_owned_by_me(sk);
>>
>>         if (__mptcp_check_fallback(msk)) {
>>                 if (!msk->first)
>>                         return NULL;
>>                 return sk_stream_memory_free(msk->first) ? msk->first : NULL;
>>         }
>> ---
>>
>>   out of mptcp_subflow_get_send() into mptcp_sched_get_subflow(), so
>> that every scheduler will not have to deal with fallback sockets.
>>
>>> +
>>> +     msk->sched ? INDIRECT_CALL_INET_1(msk->sched->get_subflow,
>>> +                                       mptcp_get_subflow_default,
>>> +                                       msk, reinject, &data) :
>>> +                  mptcp_get_subflow_default(msk, reinject, &data);
>>
>> With this we have quite a lot of conditionals for the default
>> scheduler. I think we can drop the INDIRECT_CALL_INET_1() wrapper, and
>> rework the code so that msk->sched is always NULL with the default
>> scheduler.
> 
> So I need to drop patch 2 (2/8 mptcp: register default scheduler) in
> this series, right?
> 
>>
>> And sorry to bring the next topic so late, but why a 'reinject'
>> argument instead of an additional mptcp_sched_ops op? the latter option
>> will avoid another branch in fast-path.
> 
> How about keeping th 'reinject' argument like this:
> 
> ----
> static inline struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> {
>         struct mptcp_sched_data data;
> 
>         sock_owned_by_me((struct sock *)msk);
> 
>         /* the following check is moved out of mptcp_subflow_get_send */
>         if (__mptcp_check_fallback(msk)) {
>                 if (!msk->first)
>                         return NULL;
>                 return sk_stream_memory_free(msk->first) ? msk->first : NULL;
>         }
> 
>         if (!msk->sched)
>                 return mptcp_subflow_get_send(msk);
> 
>         msk->sched->get_subflow(msk, false, &data);
> 
>         return data.sock;
> }
> 
> static inline struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> {
>         struct mptcp_sched_data data;
> 
>         sock_owned_by_me((const struct sock *)msk);
> 
>         if (__mptcp_check_fallback(msk))
>                 return NULL;
> 
>         if (!msk->sched)
>                 return mptcp_subflow_get_retrans(msk);
> 
>         msk->sched->get_subflow(msk, true, &data);
> 
>         return data.sock;
> }
> -----
> 
> Thanks,
> -Geliang
> 
>>
>> Thanks!
>>
>> Paolo
>>
>>
> 

-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net

Re: [PATCH mptcp-next v16 5/8] mptcp: add get_subflow wrapper
Posted by Paolo Abeni 3 years, 4 months ago
On Wed, 2022-04-27 at 10:27 +0200, Paolo Abeni wrote:
> > +
> > +	msk->sched ? INDIRECT_CALL_INET_1(msk->sched->get_subflow,
> > +					  mptcp_get_subflow_default,
> > +					  msk, reinject, &data) :
> > +		     mptcp_get_subflow_default(msk, reinject, &data);
> 
> With this we have quite a lot of conditionals for the default
> scheduler. I think we can drop the INDIRECT_CALL_INET_1() wrapper, and
> rework the code so that msk->sched is always NULL with the default
> scheduler.

Even better, we could drop the mptcp_get_subflow_default() wrapper and
call directly the current scheduler when msk->sched is NULL, even
before filling the 'data' struct. That should fit nicely expecially if
we use 2 separate ops for plain xmit and retrans.

Overall that would lead to something alike:

---
static inline struct sock *mptcp_sched_get_send(struct mptcp_sock msk)
{
	struct mptcp_sched_data data;

        sock_owned_by_me(sk);

	/* the following check is moved out of mptcp_subflow_get_send */
        if (__mptcp_check_fallback(msk)) {
                if (!msk->first)
                        return NULL;
                return sk_stream_memory_free(msk->first) ? msk->first : NULL;
        }

	if (!msk->sched)
		return mptcp_subflow_get_send(msk);

	data.sock = msk->first; /* if needed */
	data.call_again = 0;	/* if needed */
	msk->sched->get_send(msk, &data);
	msk->last_snd = data.sock;
	return data.sock;
}
---
plus something similar for retrans.

Thanks!

Paolo