This patch defines a new wrapper mptcp_sched_get_subflow(), invoke
get_subflow() of msk->sched in it. Use the wrapper instead of using
mptcp_subflow_get_send() directly.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
net/mptcp/protocol.c | 9 ++++-----
net/mptcp/protocol.h | 16 ++++++++++++++++
2 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 7590e2d29f39..c6e963848b18 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1515,7 +1515,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
READ_ONCE(ssk->sk_pacing_rate) * burst,
burst + wmem);
- msk->last_snd = ssk;
msk->snd_burst = burst;
return ssk;
}
@@ -1575,7 +1574,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
int ret = 0;
prev_ssk = ssk;
- ssk = mptcp_subflow_get_send(msk);
+ ssk = mptcp_sched_get_subflow(msk, false);
/* First check. If the ssk has changed since
* the last round, release prev_ssk
@@ -1644,7 +1643,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
* check for a different subflow usage only after
* spooling the first chunk of data
*/
- xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+ xmit_ssk = first ? ssk : mptcp_sched_get_subflow(mptcp_sk(sk), false);
if (!xmit_ssk)
goto out;
if (xmit_ssk != ssk) {
@@ -2489,7 +2488,7 @@ static void __mptcp_retrans(struct sock *sk)
mptcp_clean_una_wakeup(sk);
/* first check ssk: need to kick "stale" logic */
- ssk = mptcp_subflow_get_retrans(msk);
+ ssk = mptcp_sched_get_subflow(msk, true);
dfrag = mptcp_rtx_head(sk);
if (!dfrag) {
if (mptcp_data_fin_enabled(msk)) {
@@ -3154,7 +3153,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
return;
if (!sock_owned_by_user(sk)) {
- struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
+ struct sock *xmit_ssk = mptcp_sched_get_subflow(mptcp_sk(sk), false);
if (xmit_ssk == ssk)
__mptcp_subflow_push_pending(sk, ssk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 22f3f41e1e32..91512fc25128 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -633,6 +633,22 @@ int mptcp_init_sched(struct mptcp_sock *msk,
struct mptcp_sched_ops *sched);
void mptcp_release_sched(struct mptcp_sock *msk);
+static inline struct sock *mptcp_sched_get_subflow(struct mptcp_sock *msk, bool reinject)
+{
+ struct mptcp_sched_data data = {
+ .sock = msk->first,
+ .call_again = 0,
+ };
+
+ msk->sched ? INDIRECT_CALL_INET_1(msk->sched->get_subflow,
+ mptcp_get_subflow_default,
+ msk, reinject, &data) :
+ mptcp_get_subflow_default(msk, reinject, &data);
+
+ msk->last_snd = data.sock;
+ return data.sock;
+}
+
static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
--
2.34.1
Hello,
First of all I'm sorry for the very late feedback: I had some
difficulties to allocate time to follow this development.
On Wed, 2022-04-27 at 09:56 +0800, Geliang Tang wrote:
> This patch defines a new wrapper mptcp_sched_get_subflow(), invoke
> get_subflow() of msk->sched in it. Use the wrapper instead of using
> mptcp_subflow_get_send() directly.
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/protocol.c | 9 ++++-----
> net/mptcp/protocol.h | 16 ++++++++++++++++
> 2 files changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 7590e2d29f39..c6e963848b18 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1515,7 +1515,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
> subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
> READ_ONCE(ssk->sk_pacing_rate) * burst,
> burst + wmem);
> - msk->last_snd = ssk;
> msk->snd_burst = burst;
> return ssk;
> }
This breaks "mptcp: really share subflow snd_wnd": When the MPTCP-level
cwin is 0, we want to do 0-window probe (so mptcp_subflow_get_send()
return a 'valid' subflow) but we don't want to start a new burst for
such 0 win probe (so mptcp_subflow_get_send() clears last_snd).
With this change, when MPTCP-level cwin is 0, 'last_send' is not
cleared anymore.
A simple sulution would be:
---
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 653757ea0aca..c20f5fd04cad 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1507,7 +1507,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
wmem = READ_ONCE(ssk->sk_wmem_queued);
if (!burst) {
- msk->last_snd = NULL;
+ msk->snd_burst = 0;
return ssk;
}
---
Probably a better solution would be adding 'snd_burst' to struct
mptcp_sched_data, and let mptcp_sched_get_subflow() update msk-
>snd_burst as specified by the scheduler. With this latter change the
'msk' argument in mptcp_sched_ops->schedule() could be const.
> @@ -1575,7 +1574,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> int ret = 0;
>
> prev_ssk = ssk;
> - ssk = mptcp_subflow_get_send(msk);
> + ssk = mptcp_sched_get_subflow(msk, false);
>
> /* First check. If the ssk has changed since
> * the last round, release prev_ssk
> @@ -1644,7 +1643,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
> * check for a different subflow usage only after
> * spooling the first chunk of data
> */
> - xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
> + xmit_ssk = first ? ssk : mptcp_sched_get_subflow(mptcp_sk(sk), false);
> if (!xmit_ssk)
> goto out;
> if (xmit_ssk != ssk) {
> @@ -2489,7 +2488,7 @@ static void __mptcp_retrans(struct sock *sk)
> mptcp_clean_una_wakeup(sk);
>
> /* first check ssk: need to kick "stale" logic */
> - ssk = mptcp_subflow_get_retrans(msk);
> + ssk = mptcp_sched_get_subflow(msk, true);
> dfrag = mptcp_rtx_head(sk);
> if (!dfrag) {
> if (mptcp_data_fin_enabled(msk)) {
> @@ -3154,7 +3153,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
> return;
>
> if (!sock_owned_by_user(sk)) {
> - struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
> + struct sock *xmit_ssk = mptcp_sched_get_subflow(mptcp_sk(sk), false);
>
> if (xmit_ssk == ssk)
> __mptcp_subflow_push_pending(sk, ssk);
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 22f3f41e1e32..91512fc25128 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -633,6 +633,22 @@ int mptcp_init_sched(struct mptcp_sock *msk,
> struct mptcp_sched_ops *sched);
> void mptcp_release_sched(struct mptcp_sock *msk);
>
> +static inline struct sock *mptcp_sched_get_subflow(struct mptcp_sock *msk, bool reinject)
> +{
> + struct mptcp_sched_data data = {
> + .sock = msk->first,
> + .call_again = 0,
> + };
It's not clear to me:
- why we need the 'sock' argument, since the scheduler already get
'msk' as argument?
- why we need to initialize it (looks like an 'output' argument ?!?)
- what is the goal/role of the 'call_again' argument. It looks like we
just ignore it?!?
Side note: perhaps we should move the following chunk:
---
sock_owned_by_me(sk);
if (__mptcp_check_fallback(msk)) {
if (!msk->first)
return NULL;
return sk_stream_memory_free(msk->first) ? msk->first : NULL;
}
---
out of mptcp_subflow_get_send() into mptcp_sched_get_subflow(), so
that every scheduler will not have to deal with fallback sockets.
> +
> + msk->sched ? INDIRECT_CALL_INET_1(msk->sched->get_subflow,
> + mptcp_get_subflow_default,
> + msk, reinject, &data) :
> + mptcp_get_subflow_default(msk, reinject, &data);
With this we have quite a lot of conditionals for the default
scheduler. I think we can drop the INDIRECT_CALL_INET_1() wrapper, and
rework the code so that msk->sched is always NULL with the default
scheduler.
And sorry to bring the next topic so late, but why a 'reinject'
argument instead of an additional mptcp_sched_ops op? the latter option
will avoid another branch in fast-path.
Thanks!
Paolo
Hi Paolo,
Thanks for your review!
Paolo Abeni <pabeni@redhat.com> 于2022年4月27日周三 16:27写道:
>
> Hello,
>
> First of all I'm sorry for the very late feedback: I had some
> difficulties to allocate time to follow this development.
>
> On Wed, 2022-04-27 at 09:56 +0800, Geliang Tang wrote:
> > This patch defines a new wrapper mptcp_sched_get_subflow(), invoke
> > get_subflow() of msk->sched in it. Use the wrapper instead of using
> > mptcp_subflow_get_send() directly.
> >
> > Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> > ---
> > net/mptcp/protocol.c | 9 ++++-----
> > net/mptcp/protocol.h | 16 ++++++++++++++++
> > 2 files changed, 20 insertions(+), 5 deletions(-)
> >
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index 7590e2d29f39..c6e963848b18 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -1515,7 +1515,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
> > subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
> > READ_ONCE(ssk->sk_pacing_rate) * burst,
> > burst + wmem);
> > - msk->last_snd = ssk;
> > msk->snd_burst = burst;
> > return ssk;
> > }
>
> This breaks "mptcp: really share subflow snd_wnd": When the MPTCP-level
> cwin is 0, we want to do 0-window probe (so mptcp_subflow_get_send()
> return a 'valid' subflow) but we don't want to start a new burst for
> such 0 win probe (so mptcp_subflow_get_send() clears last_snd).
>
> With this change, when MPTCP-level cwin is 0, 'last_send' is not
> cleared anymore.
>
> A simple sulution would be:
>
> ---
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 653757ea0aca..c20f5fd04cad 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1507,7 +1507,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
> burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
> wmem = READ_ONCE(ssk->sk_wmem_queued);
> if (!burst) {
> - msk->last_snd = NULL;
> + msk->snd_burst = 0;
> return ssk;
> }
> ---
I think it's better to add back "msk->last_snd = ssk;" in
mptcp_subflow_get_send(), keep the original mptcp_subflow_get_send()
unchanged. And set msk->last_snd in the get_subflow() of the
scheduler, the same as v12:
https://patchwork.kernel.org/project/mptcp/patch/396f8edcbf8e4f65a1d76c228dc3d12aa4dd2f11.1650430389.git.geliang.tang@suse.com/
>
> Probably a better solution would be adding 'snd_burst' to struct
> mptcp_sched_data, and let mptcp_sched_get_subflow() update msk-
> >snd_burst as specified by the scheduler. With this latter change the
> 'msk' argument in mptcp_sched_ops->schedule() could be const.
>
> > @@ -1575,7 +1574,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> > int ret = 0;
> >
> > prev_ssk = ssk;
> > - ssk = mptcp_subflow_get_send(msk);
> > + ssk = mptcp_sched_get_subflow(msk, false);
> >
> > /* First check. If the ssk has changed since
> > * the last round, release prev_ssk
> > @@ -1644,7 +1643,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
> > * check for a different subflow usage only after
> > * spooling the first chunk of data
> > */
> > - xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
> > + xmit_ssk = first ? ssk : mptcp_sched_get_subflow(mptcp_sk(sk), false);
> > if (!xmit_ssk)
> > goto out;
> > if (xmit_ssk != ssk) {
> > @@ -2489,7 +2488,7 @@ static void __mptcp_retrans(struct sock *sk)
> > mptcp_clean_una_wakeup(sk);
> >
> > /* first check ssk: need to kick "stale" logic */
> > - ssk = mptcp_subflow_get_retrans(msk);
> > + ssk = mptcp_sched_get_subflow(msk, true);
> > dfrag = mptcp_rtx_head(sk);
> > if (!dfrag) {
> > if (mptcp_data_fin_enabled(msk)) {
> > @@ -3154,7 +3153,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
> > return;
> >
> > if (!sock_owned_by_user(sk)) {
> > - struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
> > + struct sock *xmit_ssk = mptcp_sched_get_subflow(mptcp_sk(sk), false);
> >
> > if (xmit_ssk == ssk)
> > __mptcp_subflow_push_pending(sk, ssk);
> > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> > index 22f3f41e1e32..91512fc25128 100644
> > --- a/net/mptcp/protocol.h
> > +++ b/net/mptcp/protocol.h
> > @@ -633,6 +633,22 @@ int mptcp_init_sched(struct mptcp_sock *msk,
> > struct mptcp_sched_ops *sched);
> > void mptcp_release_sched(struct mptcp_sock *msk);
> >
> > +static inline struct sock *mptcp_sched_get_subflow(struct mptcp_sock *msk, bool reinject)
> > +{
> > + struct mptcp_sched_data data = {
> > + .sock = msk->first,
> > + .call_again = 0,
> > + };
>
> It's not clear to me:
> - why we need the 'sock' argument, since the scheduler already get
> 'msk' as argument?
sock is the selected subflow to return in the scheduer.
> - why we need to initialize it (looks like an 'output' argument ?!?)
Yes, it's an output argument. No need to init it.
> - what is the goal/role of the 'call_again' argument. It looks like we
> just ignore it?!?
call_again is for supporting a "redundant" packet scheduler in the
future indicate that get_subflow() function needs to be called again.
>
> Side note: perhaps we should move the following chunk:
>
> ---
> sock_owned_by_me(sk);
>
> if (__mptcp_check_fallback(msk)) {
> if (!msk->first)
> return NULL;
> return sk_stream_memory_free(msk->first) ? msk->first : NULL;
> }
> ---
>
> out of mptcp_subflow_get_send() into mptcp_sched_get_subflow(), so
> that every scheduler will not have to deal with fallback sockets.
>
> > +
> > + msk->sched ? INDIRECT_CALL_INET_1(msk->sched->get_subflow,
> > + mptcp_get_subflow_default,
> > + msk, reinject, &data) :
> > + mptcp_get_subflow_default(msk, reinject, &data);
>
> With this we have quite a lot of conditionals for the default
> scheduler. I think we can drop the INDIRECT_CALL_INET_1() wrapper, and
> rework the code so that msk->sched is always NULL with the default
> scheduler.
So I need to drop patch 2 (2/8 mptcp: register default scheduler) in
this series, right?
>
> And sorry to bring the next topic so late, but why a 'reinject'
> argument instead of an additional mptcp_sched_ops op? the latter option
> will avoid another branch in fast-path.
How about keeping th 'reinject' argument like this:
----
static inline struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
{
struct mptcp_sched_data data;
sock_owned_by_me((struct sock *)msk);
/* the following check is moved out of mptcp_subflow_get_send */
if (__mptcp_check_fallback(msk)) {
if (!msk->first)
return NULL;
return sk_stream_memory_free(msk->first) ? msk->first : NULL;
}
if (!msk->sched)
return mptcp_subflow_get_send(msk);
msk->sched->get_subflow(msk, false, &data);
return data.sock;
}
static inline struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
{
struct mptcp_sched_data data;
sock_owned_by_me((const struct sock *)msk);
if (__mptcp_check_fallback(msk))
return NULL;
if (!msk->sched)
return mptcp_subflow_get_retrans(msk);
msk->sched->get_subflow(msk, true, &data);
return data.sock;
}
-----
Thanks,
-Geliang
>
> Thanks!
>
> Paolo
>
>
Hi Geliang, Paolo,
On 27/04/2022 16:34, Geliang Tang wrote:
> Hi Paolo,
>
> Thanks for your review!
>
> Paolo Abeni <pabeni@redhat.com> 于2022年4月27日周三 16:27写道:
>>
>> Hello,
>>
>> First of all I'm sorry for the very late feedback: I had some
>> difficulties to allocate time to follow this development.
>>
>> On Wed, 2022-04-27 at 09:56 +0800, Geliang Tang wrote:
>>> This patch defines a new wrapper mptcp_sched_get_subflow(), invoke
>>> get_subflow() of msk->sched in it. Use the wrapper instead of using
>>> mptcp_subflow_get_send() directly.
>>>
>>> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
>>> ---
>>> net/mptcp/protocol.c | 9 ++++-----
>>> net/mptcp/protocol.h | 16 ++++++++++++++++
>>> 2 files changed, 20 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>>> index 7590e2d29f39..c6e963848b18 100644
>>> --- a/net/mptcp/protocol.c
>>> +++ b/net/mptcp/protocol.c
>>> @@ -1515,7 +1515,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
>>> subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
>>> READ_ONCE(ssk->sk_pacing_rate) * burst,
>>> burst + wmem);
>>> - msk->last_snd = ssk;
>>> msk->snd_burst = burst;
>>> return ssk;
>>> }
>>
>> This breaks "mptcp: really share subflow snd_wnd": When the MPTCP-level
>> cwin is 0, we want to do 0-window probe (so mptcp_subflow_get_send()
>> return a 'valid' subflow) but we don't want to start a new burst for
>> such 0 win probe (so mptcp_subflow_get_send() clears last_snd).
>>
>> With this change, when MPTCP-level cwin is 0, 'last_send' is not
>> cleared anymore.
>>
>> A simple sulution would be:
>>
>> ---
>> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
>> index 653757ea0aca..c20f5fd04cad 100644
>> --- a/net/mptcp/protocol.c
>> +++ b/net/mptcp/protocol.c
>> @@ -1507,7 +1507,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
>> burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
>> wmem = READ_ONCE(ssk->sk_wmem_queued);
>> if (!burst) {
>> - msk->last_snd = NULL;
>> + msk->snd_burst = 0;
>> return ssk;
>> }
>> ---
>
> I think it's better to add back "msk->last_snd = ssk;" in
> mptcp_subflow_get_send(), keep the original mptcp_subflow_get_send()
> unchanged. And set msk->last_snd in the get_subflow() of the
> scheduler, the same as v12:
>
> https://patchwork.kernel.org/project/mptcp/patch/396f8edcbf8e4f65a1d76c228dc3d12aa4dd2f11.1650430389.git.geliang.tang@suse.com/
It might be interesting to discuss about the scheduler API at the
meeting tomorrow (sadly I will not be there).
Maybe it is interesting to separate the work in two steps like it is
done in mptcp.org: select data, then select subflow.
If the scheduler needs to know if it is is selecting a subflow for a
zero window test or not: we are getting closer to the mptcp.org API :)
(or maybe just enough to reset msk->snd_burst instead of asking all
schedulers to set 'last_snd' + give access to that for BPF)
Cheers,
Matt
>
>>
>> Probably a better solution would be adding 'snd_burst' to struct
>> mptcp_sched_data, and let mptcp_sched_get_subflow() update msk-
>>> snd_burst as specified by the scheduler. With this latter change the
>> 'msk' argument in mptcp_sched_ops->schedule() could be const.
>>
>>> @@ -1575,7 +1574,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
>>> int ret = 0;
>>>
>>> prev_ssk = ssk;
>>> - ssk = mptcp_subflow_get_send(msk);
>>> + ssk = mptcp_sched_get_subflow(msk, false);
>>>
>>> /* First check. If the ssk has changed since
>>> * the last round, release prev_ssk
>>> @@ -1644,7 +1643,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
>>> * check for a different subflow usage only after
>>> * spooling the first chunk of data
>>> */
>>> - xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
>>> + xmit_ssk = first ? ssk : mptcp_sched_get_subflow(mptcp_sk(sk), false);
>>> if (!xmit_ssk)
>>> goto out;
>>> if (xmit_ssk != ssk) {
>>> @@ -2489,7 +2488,7 @@ static void __mptcp_retrans(struct sock *sk)
>>> mptcp_clean_una_wakeup(sk);
>>>
>>> /* first check ssk: need to kick "stale" logic */
>>> - ssk = mptcp_subflow_get_retrans(msk);
>>> + ssk = mptcp_sched_get_subflow(msk, true);
>>> dfrag = mptcp_rtx_head(sk);
>>> if (!dfrag) {
>>> if (mptcp_data_fin_enabled(msk)) {
>>> @@ -3154,7 +3153,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
>>> return;
>>>
>>> if (!sock_owned_by_user(sk)) {
>>> - struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
>>> + struct sock *xmit_ssk = mptcp_sched_get_subflow(mptcp_sk(sk), false);
>>>
>>> if (xmit_ssk == ssk)
>>> __mptcp_subflow_push_pending(sk, ssk);
>>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>>> index 22f3f41e1e32..91512fc25128 100644
>>> --- a/net/mptcp/protocol.h
>>> +++ b/net/mptcp/protocol.h
>>> @@ -633,6 +633,22 @@ int mptcp_init_sched(struct mptcp_sock *msk,
>>> struct mptcp_sched_ops *sched);
>>> void mptcp_release_sched(struct mptcp_sock *msk);
>>>
>>> +static inline struct sock *mptcp_sched_get_subflow(struct mptcp_sock *msk, bool reinject)
>>> +{
>>> + struct mptcp_sched_data data = {
>>> + .sock = msk->first,
>>> + .call_again = 0,
>>> + };
>>
>> It's not clear to me:
>> - why we need the 'sock' argument, since the scheduler already get
>> 'msk' as argument?
>
> sock is the selected subflow to return in the scheduer.
>
>> - why we need to initialize it (looks like an 'output' argument ?!?)
>
> Yes, it's an output argument. No need to init it.
>
>> - what is the goal/role of the 'call_again' argument. It looks like we
>> just ignore it?!?
>
> call_again is for supporting a "redundant" packet scheduler in the
> future indicate that get_subflow() function needs to be called again.
>
>>
>> Side note: perhaps we should move the following chunk:
>>
>> ---
>> sock_owned_by_me(sk);
>>
>> if (__mptcp_check_fallback(msk)) {
>> if (!msk->first)
>> return NULL;
>> return sk_stream_memory_free(msk->first) ? msk->first : NULL;
>> }
>> ---
>>
>> out of mptcp_subflow_get_send() into mptcp_sched_get_subflow(), so
>> that every scheduler will not have to deal with fallback sockets.
>>
>>> +
>>> + msk->sched ? INDIRECT_CALL_INET_1(msk->sched->get_subflow,
>>> + mptcp_get_subflow_default,
>>> + msk, reinject, &data) :
>>> + mptcp_get_subflow_default(msk, reinject, &data);
>>
>> With this we have quite a lot of conditionals for the default
>> scheduler. I think we can drop the INDIRECT_CALL_INET_1() wrapper, and
>> rework the code so that msk->sched is always NULL with the default
>> scheduler.
>
> So I need to drop patch 2 (2/8 mptcp: register default scheduler) in
> this series, right?
>
>>
>> And sorry to bring the next topic so late, but why a 'reinject'
>> argument instead of an additional mptcp_sched_ops op? the latter option
>> will avoid another branch in fast-path.
>
> How about keeping th 'reinject' argument like this:
>
> ----
> static inline struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> {
> struct mptcp_sched_data data;
>
> sock_owned_by_me((struct sock *)msk);
>
> /* the following check is moved out of mptcp_subflow_get_send */
> if (__mptcp_check_fallback(msk)) {
> if (!msk->first)
> return NULL;
> return sk_stream_memory_free(msk->first) ? msk->first : NULL;
> }
>
> if (!msk->sched)
> return mptcp_subflow_get_send(msk);
>
> msk->sched->get_subflow(msk, false, &data);
>
> return data.sock;
> }
>
> static inline struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> {
> struct mptcp_sched_data data;
>
> sock_owned_by_me((const struct sock *)msk);
>
> if (__mptcp_check_fallback(msk))
> return NULL;
>
> if (!msk->sched)
> return mptcp_subflow_get_retrans(msk);
>
> msk->sched->get_subflow(msk, true, &data);
>
> return data.sock;
> }
> -----
>
> Thanks,
> -Geliang
>
>>
>> Thanks!
>>
>> Paolo
>>
>>
>
--
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
On Wed, 2022-04-27 at 10:27 +0200, Paolo Abeni wrote:
> > +
> > + msk->sched ? INDIRECT_CALL_INET_1(msk->sched->get_subflow,
> > + mptcp_get_subflow_default,
> > + msk, reinject, &data) :
> > + mptcp_get_subflow_default(msk, reinject, &data);
>
> With this we have quite a lot of conditionals for the default
> scheduler. I think we can drop the INDIRECT_CALL_INET_1() wrapper, and
> rework the code so that msk->sched is always NULL with the default
> scheduler.
Even better, we could drop the mptcp_get_subflow_default() wrapper and
call directly the current scheduler when msk->sched is NULL, even
before filling the 'data' struct. That should fit nicely expecially if
we use 2 separate ops for plain xmit and retrans.
Overall that would lead to something alike:
---
static inline struct sock *mptcp_sched_get_send(struct mptcp_sock msk)
{
struct mptcp_sched_data data;
sock_owned_by_me(sk);
/* the following check is moved out of mptcp_subflow_get_send */
if (__mptcp_check_fallback(msk)) {
if (!msk->first)
return NULL;
return sk_stream_memory_free(msk->first) ? msk->first : NULL;
}
if (!msk->sched)
return mptcp_subflow_get_send(msk);
data.sock = msk->first; /* if needed */
data.call_again = 0; /* if needed */
msk->sched->get_send(msk, &data);
msk->last_snd = data.sock;
return data.sock;
}
---
plus something similar for retrans.
Thanks!
Paolo
© 2016 - 2026 Red Hat, Inc.